blob: 3d4a569914d3ce38ae272f9c8839bbf2762efce6 [file] [log] [blame]
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "include/hw_ip/mmu/mmu_general.h"
10#include "include/hw_ip/mmu/mmu_v1_1.h"
11#include "include/gaudi/gaudi_masks.h"
12#include "include/gaudi/gaudi_fw_if.h"
13#include "include/gaudi/gaudi_reg_map.h"
Ofir Bittonebd8d122020-05-10 13:41:28 +030014#include "include/gaudi/gaudi_async_ids_map_extended.h"
Oded Gabbayac0ae6a2020-05-11 10:29:27 +030015
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
83
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
99#define GAUDI_ARB_WDT_TIMEOUT 0x400000
100
101static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
102 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
103 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
104 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
105 "gaudi cpu eq"
106};
107
108static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
109 [GAUDI_PCI_DMA_1] = 0,
110 [GAUDI_PCI_DMA_2] = 1,
111 [GAUDI_PCI_DMA_3] = 5,
112 [GAUDI_HBM_DMA_1] = 2,
113 [GAUDI_HBM_DMA_2] = 3,
114 [GAUDI_HBM_DMA_3] = 4,
115 [GAUDI_HBM_DMA_4] = 6,
116 [GAUDI_HBM_DMA_5] = 7
117};
118
119static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
120 [0] = GAUDI_QUEUE_ID_DMA_0_0,
121 [1] = GAUDI_QUEUE_ID_DMA_0_1,
122 [2] = GAUDI_QUEUE_ID_DMA_0_2,
123 [3] = GAUDI_QUEUE_ID_DMA_0_3,
124 [4] = GAUDI_QUEUE_ID_DMA_1_0,
125 [5] = GAUDI_QUEUE_ID_DMA_1_1,
126 [6] = GAUDI_QUEUE_ID_DMA_1_2,
127 [7] = GAUDI_QUEUE_ID_DMA_1_3,
128 [8] = GAUDI_QUEUE_ID_DMA_5_0,
129 [9] = GAUDI_QUEUE_ID_DMA_5_1,
130 [10] = GAUDI_QUEUE_ID_DMA_5_2,
131 [11] = GAUDI_QUEUE_ID_DMA_5_3
132};
133
134static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
135 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
136 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
137 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
138 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
139 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
140 [PACKET_REPEAT] = sizeof(struct packet_repeat),
141 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
142 [PACKET_FENCE] = sizeof(struct packet_fence),
143 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
144 [PACKET_NOP] = sizeof(struct packet_nop),
145 [PACKET_STOP] = sizeof(struct packet_stop),
146 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
147 [PACKET_WAIT] = sizeof(struct packet_wait),
148 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
149};
150
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300151static const char * const
152gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
153 "tpc_address_exceed_slm",
154 "tpc_div_by_0",
155 "tpc_spu_mac_overflow",
156 "tpc_spu_addsub_overflow",
157 "tpc_spu_abs_overflow",
158 "tpc_spu_fp_dst_nan_inf",
159 "tpc_spu_fp_dst_denorm",
160 "tpc_vpu_mac_overflow",
161 "tpc_vpu_addsub_overflow",
162 "tpc_vpu_abs_overflow",
163 "tpc_vpu_fp_dst_nan_inf",
164 "tpc_vpu_fp_dst_denorm",
165 "tpc_assertions",
166 "tpc_illegal_instruction",
167 "tpc_pc_wrap_around",
168 "tpc_qm_sw_err",
169 "tpc_hbw_rresp_err",
170 "tpc_hbw_bresp_err",
171 "tpc_lbw_rresp_err",
172 "tpc_lbw_bresp_err"
173};
174
175static const char * const
176gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
177 "PQ AXI HBW error",
178 "CQ AXI HBW error",
179 "CP AXI HBW error",
180 "CP error due to undefined OPCODE",
181 "CP encountered STOP OPCODE",
182 "CP AXI LBW error",
183 "CP WRREG32 or WRBULK returned error",
184 "N/A",
185 "FENCE 0 inc over max value and clipped",
186 "FENCE 1 inc over max value and clipped",
187 "FENCE 2 inc over max value and clipped",
188 "FENCE 3 inc over max value and clipped",
189 "FENCE 0 dec under min value and clipped",
190 "FENCE 1 dec under min value and clipped",
191 "FENCE 2 dec under min value and clipped",
192 "FENCE 3 dec under min value and clipped"
193};
194
195static const char * const
196gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
197 "Choice push while full error",
198 "Choice Q watchdog error",
199 "MSG AXI LBW returned with error"
200};
201
202static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
203 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
204 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
205 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
206 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
207 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
208 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
209 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
210 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
211 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
212 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
213 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
214 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
215 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
216 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
217 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
218 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
219 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
220 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
221 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
222 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
223 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
224 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
225 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
226 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
227 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
228 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
229 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
230 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
231 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
232 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
233 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
234 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
235 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
236 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
237 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
276 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
277 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
278 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
279 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
280 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
281 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
282 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
283 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
284 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
285 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
286 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
287 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
288 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
289 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
290 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
291 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
292 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
293 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
294 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
295 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
296 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
297 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
298 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
299 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
300 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
301 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
302 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
303 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
316};
317
318static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
319 u64 phys_addr);
320static int gaudi_send_job_on_qman0(struct hl_device *hdev,
321 struct hl_cs_job *job);
322static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
323 u32 size, u64 val);
324static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
325 u32 tpc_id);
326static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
327static int gaudi_armcp_info_get(struct hl_device *hdev);
328static void gaudi_disable_clock_gating(struct hl_device *hdev);
329static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
330
331static int gaudi_get_fixed_properties(struct hl_device *hdev)
332{
333 struct asic_fixed_properties *prop = &hdev->asic_prop;
334 int i;
335
336 if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
337 dev_err(hdev->dev,
338 "Number of H/W queues must be smaller than %d\n",
339 HL_MAX_QUEUES);
340 return -EFAULT;
341 }
342
343 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
344 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
345 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
346 prop->hw_queues_props[i].driver_only = 0;
347 prop->hw_queues_props[i].requires_kernel_cb = 1;
348 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
349 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
350 prop->hw_queues_props[i].driver_only = 1;
351 prop->hw_queues_props[i].requires_kernel_cb = 0;
352 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
353 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
354 prop->hw_queues_props[i].driver_only = 0;
355 prop->hw_queues_props[i].requires_kernel_cb = 0;
356 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
357 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
358 prop->hw_queues_props[i].driver_only = 0;
359 prop->hw_queues_props[i].requires_kernel_cb = 0;
360 }
361 }
362
363 for (; i < HL_MAX_QUEUES; i++)
364 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
365
366 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
367
368 prop->dram_base_address = DRAM_PHYS_BASE;
369 prop->dram_size = GAUDI_HBM_SIZE_32GB;
370 prop->dram_end_address = prop->dram_base_address +
371 prop->dram_size;
372 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
373
374 prop->sram_base_address = SRAM_BASE_ADDR;
375 prop->sram_size = SRAM_SIZE;
376 prop->sram_end_address = prop->sram_base_address +
377 prop->sram_size;
378 prop->sram_user_base_address = prop->sram_base_address +
379 SRAM_USER_BASE_OFFSET;
380
381 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
382 if (hdev->pldm)
383 prop->mmu_pgt_size = 0x800000; /* 8MB */
384 else
385 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
386 prop->mmu_pte_size = HL_PTE_SIZE;
387 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
388 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
389 prop->dram_page_size = PAGE_SIZE_2MB;
390
391 prop->pmmu.hop0_shift = HOP0_SHIFT;
392 prop->pmmu.hop1_shift = HOP1_SHIFT;
393 prop->pmmu.hop2_shift = HOP2_SHIFT;
394 prop->pmmu.hop3_shift = HOP3_SHIFT;
395 prop->pmmu.hop4_shift = HOP4_SHIFT;
396 prop->pmmu.hop0_mask = HOP0_MASK;
397 prop->pmmu.hop1_mask = HOP1_MASK;
398 prop->pmmu.hop2_mask = HOP2_MASK;
399 prop->pmmu.hop3_mask = HOP3_MASK;
400 prop->pmmu.hop4_mask = HOP4_MASK;
401 prop->pmmu.start_addr = VA_HOST_SPACE_START;
402 prop->pmmu.end_addr =
403 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
404 prop->pmmu.page_size = PAGE_SIZE_4KB;
405
406 /* PMMU and HPMMU are the same except of page size */
407 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
408 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
409
410 /* shifts and masks are the same in PMMU and DMMU */
411 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
412 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
413 prop->dmmu.end_addr = VA_HOST_SPACE_END;
414 prop->dmmu.page_size = PAGE_SIZE_2MB;
415
416 prop->cfg_size = CFG_SIZE;
417 prop->max_asid = MAX_ASID;
418 prop->num_of_events = GAUDI_EVENT_SIZE;
419 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
420
421 prop->max_power_default = MAX_POWER_DEFAULT;
422
423 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
424 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
425
426 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
427 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
428
429 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
430 CARD_NAME_MAX_LEN);
431
432 return 0;
433}
434
435static int gaudi_pci_bars_map(struct hl_device *hdev)
436{
437 static const char * const name[] = {"SRAM", "CFG", "HBM"};
438 bool is_wc[3] = {false, false, true};
439 int rc;
440
441 rc = hl_pci_bars_map(hdev, name, is_wc);
442 if (rc)
443 return rc;
444
445 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
446 (CFG_BASE - SPI_FLASH_BASE_ADDR);
447
448 return 0;
449}
450
451static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
452{
453 struct gaudi_device *gaudi = hdev->asic_specific;
454 u64 old_addr = addr;
455 int rc;
456
457 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
458 return old_addr;
459
460 /* Inbound Region 2 - Bar 4 - Point to HBM */
461 rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
462 if (rc)
463 return U64_MAX;
464
465 if (gaudi) {
466 old_addr = gaudi->hbm_bar_cur_addr;
467 gaudi->hbm_bar_cur_addr = addr;
468 }
469
470 return old_addr;
471}
472
473static int gaudi_init_iatu(struct hl_device *hdev)
474{
475 int rc = 0;
476
477 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
478 rc = hl_pci_iatu_write(hdev, 0x314,
479 lower_32_bits(SPI_FLASH_BASE_ADDR));
480 rc |= hl_pci_iatu_write(hdev, 0x318,
481 upper_32_bits(SPI_FLASH_BASE_ADDR));
482 rc |= hl_pci_iatu_write(hdev, 0x300, 0);
483 /* Enable + Bar match + match enable */
484 rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
485
486 if (rc)
487 return -EIO;
488
489 return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
490 HOST_PHYS_BASE, HOST_PHYS_SIZE);
491}
492
493static int gaudi_early_init(struct hl_device *hdev)
494{
495 struct asic_fixed_properties *prop = &hdev->asic_prop;
496 struct pci_dev *pdev = hdev->pdev;
497 int rc;
498
499 rc = gaudi_get_fixed_properties(hdev);
500 if (rc) {
501 dev_err(hdev->dev, "Failed to get fixed properties\n");
502 return rc;
503 }
504
505 /* Check BAR sizes */
506 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
507 dev_err(hdev->dev,
508 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
509 SRAM_BAR_ID,
510 (unsigned long long) pci_resource_len(pdev,
511 SRAM_BAR_ID),
512 SRAM_BAR_SIZE);
513 return -ENODEV;
514 }
515
516 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
517 dev_err(hdev->dev,
518 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
519 CFG_BAR_ID,
520 (unsigned long long) pci_resource_len(pdev,
521 CFG_BAR_ID),
522 CFG_BAR_SIZE);
523 return -ENODEV;
524 }
525
526 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
527
528 rc = hl_pci_init(hdev);
529 if (rc)
530 return rc;
531
532 return 0;
533}
534
535static int gaudi_early_fini(struct hl_device *hdev)
536{
537 hl_pci_fini(hdev);
538
539 return 0;
540}
541
542/**
543 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
544 *
545 * @hdev: pointer to hl_device structure
546 *
547 */
548static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
549{
550 struct asic_fixed_properties *prop = &hdev->asic_prop;
551
552 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
553 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
554 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
555 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
556}
557
558static int _gaudi_init_tpc_mem(struct hl_device *hdev,
559 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
560{
561 struct asic_fixed_properties *prop = &hdev->asic_prop;
562 struct packet_lin_dma *init_tpc_mem_pkt;
563 struct hl_cs_job *job;
564 struct hl_cb *cb;
565 u64 dst_addr;
566 u32 cb_size, ctl;
567 u8 tpc_id;
568 int rc;
569
570 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
571 if (!cb)
572 return -EFAULT;
573
574 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
575 cb->kernel_address;
576 cb_size = sizeof(*init_tpc_mem_pkt);
577 memset(init_tpc_mem_pkt, 0, cb_size);
578
579 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
580
581 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
582 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
583 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
584 (1 << GAUDI_PKT_CTL_MB_SHIFT));
585
586 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
587
588 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
589 dst_addr = (prop->sram_user_base_address &
590 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
591 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
592 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
593
594 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
595 if (!job) {
596 dev_err(hdev->dev, "Failed to allocate a new job\n");
597 rc = -ENOMEM;
598 goto release_cb;
599 }
600
601 job->id = 0;
602 job->user_cb = cb;
603 job->user_cb->cs_cnt++;
604 job->user_cb_size = cb_size;
605 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
606 job->patched_cb = job->user_cb;
607 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
608
609 hl_debugfs_add_job(hdev, job);
610
611 rc = gaudi_send_job_on_qman0(hdev, job);
612
613 if (rc)
614 goto free_job;
615
616 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
617 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
618 if (rc)
619 break;
620 }
621
622free_job:
623 hl_userptr_delete_list(hdev, &job->userptr_list);
624 hl_debugfs_remove_job(hdev, job);
625 kfree(job);
626 cb->cs_cnt--;
627
628release_cb:
629 hl_cb_put(cb);
630 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
631
632 return rc;
633}
634
635/*
636 * gaudi_init_tpc_mem() - Initialize TPC memories.
637 * @hdev: Pointer to hl_device structure.
638 *
639 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
640 *
641 * Return: 0 for success, negative value for error.
642 */
643static int gaudi_init_tpc_mem(struct hl_device *hdev)
644{
645 const struct firmware *fw;
646 size_t fw_size;
647 void *cpu_addr;
648 dma_addr_t dma_handle;
649 int rc;
650
651 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
652 if (rc) {
653 dev_err(hdev->dev, "Firmware file %s is not found!\n",
654 GAUDI_TPC_FW_FILE);
655 goto out;
656 }
657
658 fw_size = fw->size;
659 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
660 &dma_handle, GFP_KERNEL | __GFP_ZERO);
661 if (!cpu_addr) {
662 dev_err(hdev->dev,
663 "Failed to allocate %zu of dma memory for TPC kernel\n",
664 fw_size);
665 rc = -ENOMEM;
666 goto out;
667 }
668
669 memcpy(cpu_addr, fw->data, fw_size);
670
671 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
672
673 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
674 dma_handle);
675
676out:
677 release_firmware(fw);
678 return rc;
679}
680
681static int gaudi_late_init(struct hl_device *hdev)
682{
683 struct gaudi_device *gaudi = hdev->asic_specific;
684 int rc;
685
686 rc = gaudi->armcp_info_get(hdev);
687 if (rc) {
688 dev_err(hdev->dev, "Failed to get armcp info\n");
689 return rc;
690 }
691
692 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
693 if (rc) {
694 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
695 return rc;
696 }
697
698 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
699
700 gaudi_fetch_psoc_frequency(hdev);
701
702 rc = gaudi_mmu_clear_pgt_range(hdev);
703 if (rc) {
704 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
705 goto disable_pci_access;
706 }
707
708 rc = gaudi_init_tpc_mem(hdev);
709 if (rc) {
710 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
711 goto disable_pci_access;
712 }
713
714 return 0;
715
716disable_pci_access:
717 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
718
719 return rc;
720}
721
722static void gaudi_late_fini(struct hl_device *hdev)
723{
724 const struct hwmon_channel_info **channel_info_arr;
725 int i = 0;
726
727 if (!hdev->hl_chip_info->info)
728 return;
729
730 channel_info_arr = hdev->hl_chip_info->info;
731
732 while (channel_info_arr[i]) {
733 kfree(channel_info_arr[i]->config);
734 kfree(channel_info_arr[i]);
735 i++;
736 }
737
738 kfree(channel_info_arr);
739
740 hdev->hl_chip_info->info = NULL;
741}
742
743static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
744{
745 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
746 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
747 int i, j, rc = 0;
748
749 /*
750 * The device CPU works with 40-bits addresses, while bit 39 must be set
751 * to '1' when accessing the host.
752 * Bits 49:39 of the full host address are saved for a later
753 * configuration of the HW to perform extension to 50 bits.
754 * Because there is a single HW register that holds the extension bits,
755 * these bits must be identical in all allocated range.
756 */
757
758 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
759 virt_addr_arr[i] =
760 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
761 HL_CPU_ACCESSIBLE_MEM_SIZE,
762 &dma_addr_arr[i],
763 GFP_KERNEL | __GFP_ZERO);
764 if (!virt_addr_arr[i]) {
765 rc = -ENOMEM;
766 goto free_dma_mem_arr;
767 }
768
769 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
770 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
771 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
772 break;
773 }
774
775 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
776 dev_err(hdev->dev,
777 "MSB of CPU accessible DMA memory are not identical in all range\n");
778 rc = -EFAULT;
779 goto free_dma_mem_arr;
780 }
781
782 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
783 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
784 hdev->cpu_pci_msb_addr =
785 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
786
787 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
788
789free_dma_mem_arr:
790 for (j = 0 ; j < i ; j++)
791 hdev->asic_funcs->asic_dma_free_coherent(hdev,
792 HL_CPU_ACCESSIBLE_MEM_SIZE,
793 virt_addr_arr[j],
794 dma_addr_arr[j]);
795
796 return rc;
797}
798
799static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
800{
801 struct gaudi_device *gaudi = hdev->asic_specific;
802 struct gaudi_internal_qman_info *q;
803 u32 i;
804
805 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
806 q = &gaudi->internal_qmans[i];
807 if (!q->pq_kernel_addr)
808 continue;
809 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
810 q->pq_kernel_addr,
811 q->pq_dma_addr);
812 }
813}
814
815static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
816{
817 struct gaudi_device *gaudi = hdev->asic_specific;
818 struct gaudi_internal_qman_info *q;
819 int rc, i;
820
821 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
822 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
823 continue;
824
825 q = &gaudi->internal_qmans[i];
826
827 switch (i) {
828 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
829 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
830 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
831 break;
832 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
833 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
834 break;
835 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
836 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
837 break;
838 default:
839 dev_err(hdev->dev, "Bad internal queue index %d", i);
840 rc = -EINVAL;
841 goto free_internal_qmans_pq_mem;
842 }
843
844 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
845 hdev, q->pq_size,
846 &q->pq_dma_addr,
847 GFP_KERNEL | __GFP_ZERO);
848 if (!q->pq_kernel_addr) {
849 rc = -ENOMEM;
850 goto free_internal_qmans_pq_mem;
851 }
852 }
853
854 return 0;
855
856free_internal_qmans_pq_mem:
857 gaudi_free_internal_qmans_pq_mem(hdev);
858 return rc;
859}
860
861static int gaudi_sw_init(struct hl_device *hdev)
862{
863 struct gaudi_device *gaudi;
Ofir Bittonebd8d122020-05-10 13:41:28 +0300864 u32 i, event_id = 0;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300865 int rc;
866
867 /* Allocate device structure */
868 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
869 if (!gaudi)
870 return -ENOMEM;
871
Ofir Bittonebd8d122020-05-10 13:41:28 +0300872 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
873 if (gaudi_irq_map_table[i].valid) {
874 if (event_id == GAUDI_EVENT_SIZE) {
875 dev_err(hdev->dev,
876 "Event array exceeds the limit of %u events\n",
877 GAUDI_EVENT_SIZE);
878 rc = -EINVAL;
879 goto free_gaudi_device;
880 }
881
882 gaudi->events[event_id++] =
883 gaudi_irq_map_table[i].fc_id;
884 }
885 }
886
Oded Gabbayac0ae6a2020-05-11 10:29:27 +0300887 gaudi->armcp_info_get = gaudi_armcp_info_get;
888
889 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
890
891 hdev->asic_specific = gaudi;
892
893 /* Create DMA pool for small allocations */
894 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
895 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
896 if (!hdev->dma_pool) {
897 dev_err(hdev->dev, "failed to create DMA pool\n");
898 rc = -ENOMEM;
899 goto free_gaudi_device;
900 }
901
902 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
903 if (rc)
904 goto free_dma_pool;
905
906 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
907 if (!hdev->cpu_accessible_dma_pool) {
908 dev_err(hdev->dev,
909 "Failed to create CPU accessible DMA pool\n");
910 rc = -ENOMEM;
911 goto free_cpu_dma_mem;
912 }
913
914 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
915 (uintptr_t) hdev->cpu_accessible_dma_mem,
916 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
917 if (rc) {
918 dev_err(hdev->dev,
919 "Failed to add memory to CPU accessible DMA pool\n");
920 rc = -EFAULT;
921 goto free_cpu_accessible_dma_pool;
922 }
923
924 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
925 if (rc)
926 goto free_cpu_accessible_dma_pool;
927
928 spin_lock_init(&gaudi->hw_queues_lock);
929 mutex_init(&gaudi->clk_gate_mutex);
930
931 hdev->supports_sync_stream = true;
932 hdev->supports_coresight = true;
933
934 return 0;
935
936free_cpu_accessible_dma_pool:
937 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
938free_cpu_dma_mem:
939 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
940 hdev->cpu_pci_msb_addr);
941 hdev->asic_funcs->asic_dma_free_coherent(hdev,
942 HL_CPU_ACCESSIBLE_MEM_SIZE,
943 hdev->cpu_accessible_dma_mem,
944 hdev->cpu_accessible_dma_address);
945free_dma_pool:
946 dma_pool_destroy(hdev->dma_pool);
947free_gaudi_device:
948 kfree(gaudi);
949 return rc;
950}
951
952static int gaudi_sw_fini(struct hl_device *hdev)
953{
954 struct gaudi_device *gaudi = hdev->asic_specific;
955
956 gaudi_free_internal_qmans_pq_mem(hdev);
957
958 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
959
960 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
961 hdev->cpu_pci_msb_addr);
962 hdev->asic_funcs->asic_dma_free_coherent(hdev,
963 HL_CPU_ACCESSIBLE_MEM_SIZE,
964 hdev->cpu_accessible_dma_mem,
965 hdev->cpu_accessible_dma_address);
966
967 dma_pool_destroy(hdev->dma_pool);
968
969 mutex_destroy(&gaudi->clk_gate_mutex);
970
971 kfree(gaudi);
972
973 return 0;
974}
975
976static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
977{
978 struct hl_device *hdev = arg;
979 int i;
980
981 if (hdev->disabled)
982 return IRQ_HANDLED;
983
984 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
985 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
986
987 hl_irq_handler_eq(irq, &hdev->event_queue);
988
989 return IRQ_HANDLED;
990}
991
992/*
993 * For backward compatibility, new MSI interrupts should be set after the
994 * existing CPU and NIC interrupts.
995 */
996static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
997 bool cpu_eq)
998{
999 int msi_vec;
1000
1001 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1002 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1003 GAUDI_EVENT_QUEUE_MSI_IDX);
1004
1005 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1006 (nr + NIC_NUMBER_OF_ENGINES + 1);
1007
1008 return pci_irq_vector(hdev->pdev, msi_vec);
1009}
1010
1011static int gaudi_enable_msi_single(struct hl_device *hdev)
1012{
1013 int rc, irq;
1014
1015 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1016
1017 irq = gaudi_pci_irq_vector(hdev, 0, false);
1018 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1019 "gaudi single msi", hdev);
1020 if (rc)
1021 dev_err(hdev->dev,
1022 "Failed to request single MSI IRQ\n");
1023
1024 return rc;
1025}
1026
1027static int gaudi_enable_msi_multi(struct hl_device *hdev)
1028{
1029 int cq_cnt = hdev->asic_prop.completion_queues_count;
1030 int rc, i, irq_cnt_init, irq;
1031
1032 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1033 irq = gaudi_pci_irq_vector(hdev, i, false);
1034 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1035 &hdev->completion_queue[i]);
1036 if (rc) {
1037 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1038 goto free_irqs;
1039 }
1040 }
1041
1042 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1043 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1044 &hdev->event_queue);
1045 if (rc) {
1046 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1047 goto free_irqs;
1048 }
1049
1050 return 0;
1051
1052free_irqs:
1053 for (i = 0 ; i < irq_cnt_init ; i++)
1054 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1055 &hdev->completion_queue[i]);
1056 return rc;
1057}
1058
1059static int gaudi_enable_msi(struct hl_device *hdev)
1060{
1061 struct gaudi_device *gaudi = hdev->asic_specific;
1062 int rc;
1063
1064 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1065 return 0;
1066
1067 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1068 PCI_IRQ_MSI);
1069 if (rc < 0) {
1070 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1071 return rc;
1072 }
1073
1074 if (rc < NUMBER_OF_INTERRUPTS) {
1075 gaudi->multi_msi_mode = false;
1076 rc = gaudi_enable_msi_single(hdev);
1077 } else {
1078 gaudi->multi_msi_mode = true;
1079 rc = gaudi_enable_msi_multi(hdev);
1080 }
1081
1082 if (rc)
1083 goto free_pci_irq_vectors;
1084
1085 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1086
1087 return 0;
1088
1089free_pci_irq_vectors:
1090 pci_free_irq_vectors(hdev->pdev);
1091 return rc;
1092}
1093
1094static void gaudi_sync_irqs(struct hl_device *hdev)
1095{
1096 struct gaudi_device *gaudi = hdev->asic_specific;
1097 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1098
1099 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1100 return;
1101
1102 /* Wait for all pending IRQs to be finished */
1103 if (gaudi->multi_msi_mode) {
1104 for (i = 0 ; i < cq_cnt ; i++)
1105 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1106
1107 synchronize_irq(gaudi_pci_irq_vector(hdev,
1108 GAUDI_EVENT_QUEUE_MSI_IDX,
1109 true));
1110 } else {
1111 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1112 }
1113}
1114
1115static void gaudi_disable_msi(struct hl_device *hdev)
1116{
1117 struct gaudi_device *gaudi = hdev->asic_specific;
1118 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1119
1120 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1121 return;
1122
1123 gaudi_sync_irqs(hdev);
1124
1125 if (gaudi->multi_msi_mode) {
1126 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1127 true);
1128 free_irq(irq, &hdev->event_queue);
1129
1130 for (i = 0 ; i < cq_cnt ; i++) {
1131 irq = gaudi_pci_irq_vector(hdev, i, false);
1132 free_irq(irq, &hdev->completion_queue[i]);
1133 }
1134 } else {
1135 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1136 }
1137
1138 pci_free_irq_vectors(hdev->pdev);
1139
1140 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1141}
1142
1143static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1144{
1145 struct gaudi_device *gaudi = hdev->asic_specific;
1146
1147 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1148 return;
1149
1150 if (!hdev->sram_scrambler_enable)
1151 return;
1152
1153 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1154 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1155 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1156 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1157 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1159 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1161 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1163 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1165 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1167 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1169
1170 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1171 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1172 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1173 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1174 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1175 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1176 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1177 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1178 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1179 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1180 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1181 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1182 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1183 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1184 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1185 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1186
1187 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1188 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1189 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1190 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1191 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1192 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1193 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1194 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1195 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1196 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1197 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1198 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1199 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1200 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1201 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1202 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1203
1204 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1205}
1206
1207static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1208{
1209 struct gaudi_device *gaudi = hdev->asic_specific;
1210
1211 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1212 return;
1213
1214 if (!hdev->dram_scrambler_enable)
1215 return;
1216
1217 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1218 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1219 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1220 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1221 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1222 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1223 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1224 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1225 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1227 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1229 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1231 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1233
1234 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1235 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1236 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1237 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1238 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1239 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1240 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1241 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1242 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1243 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1244 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1245 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1246 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1247 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1248 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1249 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1250
1251 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1252 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1253 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1254 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1255 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1256 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1257 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1258 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1259 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1260 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1261 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1262 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1263 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1264 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1265 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1266 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1267
1268 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1269}
1270
1271static void gaudi_init_e2e(struct hl_device *hdev)
1272{
1273 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1274 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1275 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1276 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1277
1278 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1279 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1280 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1281 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1282
1283 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1284 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1285 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1286 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1287
1288 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1289 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1290 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1291 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1292
1293 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1294 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1295 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1296 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1297
1298 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1299 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1300 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1301 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1302
1303 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1304 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1305 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1306 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1307
1308 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1309 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1310 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1311 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1312
1313 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1314 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1315 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1316 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1317
1318 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1319 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1320 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1321 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1322
1323 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1324 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1325 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1326 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1327
1328 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1329 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1330 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1331 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1332
1333 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1334 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1335 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1336 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1337
1338 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1339 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1340 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1341 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1342
1343 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1344 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1345 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1346 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1347
1348 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1349 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1350 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1351 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1352
1353 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1354 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1355 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1356 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1357
1358 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1359 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1360 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1361 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1362
1363 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1364 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1365 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1366 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1367
1368 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1369 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1370 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1371 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1372
1373 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1374 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1375 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1376 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1377
1378 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1379 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1380 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1381 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1382
1383 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1384 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1385 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1386 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1387
1388 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1389 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1390 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1391 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1392
1393 if (!hdev->dram_scrambler_enable) {
1394 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1395 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1396 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1397 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1398
1399 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1400 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1401 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1402 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1403
1404 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1405 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1406 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1407 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1408
1409 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1410 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1411 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1412 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1413
1414 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1415 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1416 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1417 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1418
1419 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1420 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1421 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1422 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1423
1424 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1425 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1426 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1427 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1428
1429 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1430 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1431 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1432 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1433
1434 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1435 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1436 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1437 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1438
1439 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1440 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1441 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1442 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1443
1444 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1445 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1446 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1447 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1448
1449 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1450 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1451 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1452 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1453
1454 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1455 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1456 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1457 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1458
1459 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1460 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1461 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1462 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1463
1464 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1465 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1466 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1467 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1468
1469 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1470 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1471 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1472 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1473
1474 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1475 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1476 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1477 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1478
1479 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1480 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1481 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1482 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1483
1484 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1485 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1486 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1487 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1488
1489 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1490 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1491 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1492 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1493
1494 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1495 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1496 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1497 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1498
1499 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1500 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1501 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1502 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1503
1504 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1505 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1506 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1507 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1508
1509 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1510 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1511 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1512 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1513 }
1514
1515 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1516 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1517 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1518 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1519
1520 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1521 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1522 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1523 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1524
1525 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1526 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1527 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1528 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1529
1530 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1531 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1532 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1533 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1534
1535 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1536 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1537 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1538 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1539
1540 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1541 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1542 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1543 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1544
1545 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1546 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1547 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1548 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1549
1550 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1551 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1552 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1553 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1554
1555 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1556 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1557 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1558 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1559
1560 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1561 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1562 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1563 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1564
1565 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1566 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1567 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1568 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1569
1570 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1571 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1572 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1573 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1574
1575 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1576 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1577 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1578 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1579
1580 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1581 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1582 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1583 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1584
1585 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1586 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1587 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1588 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1589
1590 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1591 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1592 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1593 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1594
1595 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1596 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1597 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1598 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1599
1600 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1601 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1602 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1603 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1604
1605 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1606 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1607 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1608 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1609
1610 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1611 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1612 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1613 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1614
1615 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1616 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1617 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1618 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1619
1620 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1621 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1622 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1623 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1624
1625 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1626 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1627 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1628 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1629
1630 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1631 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1632 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1633 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1634}
1635
1636static void gaudi_init_hbm_cred(struct hl_device *hdev)
1637{
1638 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1639
1640 hbm0_wr = 0x33333333;
1641 hbm1_wr = 0x33333333;
1642 hbm0_rd = 0x77777777;
1643 hbm1_rd = 0xDDDDDDDD;
1644
1645 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1646 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1647 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1648 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1649
1650 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1651 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1652 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1653 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1654
1655 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1656 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1657 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1658 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1659
1660 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1661 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1662 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1663 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1664
1665 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1666 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1667 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1668 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1669 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1670 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1671 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1672 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1673 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1674 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1675 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1676 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1677
1678 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1679 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1680 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1681 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1682 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1683 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1684 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1685 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1686 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1687 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1688 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1689 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1690}
1691
1692static void gaudi_init_rate_limiter(struct hl_device *hdev)
1693{
1694 u32 nr, nf, od, sat, rst, timeout;
1695 u64 freq;
1696
1697 nr = RREG32(mmPSOC_HBM_PLL_NR);
1698 nf = RREG32(mmPSOC_HBM_PLL_NF);
1699 od = RREG32(mmPSOC_HBM_PLL_OD);
1700 freq = (50 * (nf + 1)) / ((nr + 1) * (od + 1));
1701
1702 dev_dbg(hdev->dev, "HBM frequency is %lluMHz\n", freq);
1703
1704 /* Configuration is for five (5) DDMA channels */
1705 if (freq == 800) {
1706 sat = 4;
1707 rst = 11;
1708 timeout = 15;
1709 } else if (freq == 900) {
1710 sat = 4;
1711 rst = 15;
1712 timeout = 16;
1713 } else if (freq == 950) {
1714 sat = 4;
1715 rst = 15;
1716 timeout = 15;
1717 } else {
1718 dev_warn(hdev->dev,
1719 "unsupported HBM frequency %lluMHz, no rate-limiters\n",
1720 freq);
1721 return;
1722 }
1723
1724 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_0, 0x111);
1725 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_1, 0x111);
1726 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_0, 0x111);
1727 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_1, 0x111);
1728 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_0, 0x111);
1729 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_1, 0x111);
1730 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_0, 0x111);
1731 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_1, 0x111);
1732
1733 if (!hdev->rl_enable) {
1734 dev_info(hdev->dev, "Rate limiters disabled\n");
1735 return;
1736 }
1737
1738 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_SAT, sat);
1739 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_SAT, sat);
1740 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_SAT, sat);
1741 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_SAT, sat);
1742 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_SAT, sat);
1743 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_SAT, sat);
1744 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_SAT, sat);
1745 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_SAT, sat);
1746
1747 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_RST, rst);
1748 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_RST, rst);
1749 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_RST, rst);
1750 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_RST, rst);
1751 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_RST, rst);
1752 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_RST, rst);
1753 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_RST, rst);
1754 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_RST, rst);
1755
1756 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1757 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1758 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1759 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1760 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1761 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1762 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1763 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1764
1765 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_EN, 1);
1766 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_EN, 1);
1767 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_EN, 1);
1768 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_EN, 1);
1769 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_EN, 1);
1770 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_EN, 1);
1771 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_EN, 1);
1772 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_EN, 1);
1773
1774 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_SAT, sat);
1775 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_SAT, sat);
1776 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_SAT, sat);
1777 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_SAT, sat);
1778 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_SAT, sat);
1779 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_SAT, sat);
1780 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_SAT, sat);
1781 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_SAT, sat);
1782
1783 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_RST, rst);
1784 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_RST, rst);
1785 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_RST, rst);
1786 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_RST, rst);
1787 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_RST, rst);
1788 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_RST, rst);
1789 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_RST, rst);
1790 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_RST, rst);
1791
1792 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1793 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1794 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1795 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1796 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1797 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1798 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1799 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1800
1801 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_EN, 1);
1802 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_EN, 1);
1803 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_EN, 1);
1804 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_EN, 1);
1805 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_EN, 1);
1806 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_EN, 1);
1807 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_EN, 1);
1808 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_EN, 1);
1809}
1810
1811static void gaudi_init_golden_registers(struct hl_device *hdev)
1812{
1813 u32 tpc_offset;
1814 int tpc_id, i;
1815
1816 gaudi_init_e2e(hdev);
1817
1818 gaudi_init_hbm_cred(hdev);
1819
1820 gaudi_init_rate_limiter(hdev);
1821
1822 gaudi_disable_clock_gating(hdev);
1823
1824 for (tpc_id = 0, tpc_offset = 0;
1825 tpc_id < TPC_NUMBER_OF_ENGINES;
1826 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1827 /* Mask all arithmetic interrupts from TPC */
1828 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1829 /* Set 16 cache lines */
1830 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1831 ICACHE_FETCH_LINE_NUM, 2);
1832 }
1833
1834 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1835 for (i = 0 ; i < 128 ; i += 8)
1836 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1837
1838 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1839 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1840 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1841 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1842
1843 /* WA for H3-2081 */
1844 WREG32(mmPCIE_WRAP_MAX_OUTSTAND, 0x10ff);
1845}
1846
1847static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1848 int qman_id, dma_addr_t qman_pq_addr)
1849{
1850 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1851 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1852 u32 q_off, dma_qm_offset;
1853 u32 dma_qm_err_cfg;
1854
1855 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1856
1857 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1858 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1859 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1860 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1861 so_base_en_lo = lower_32_bits(CFG_BASE +
1862 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1863 so_base_en_hi = upper_32_bits(CFG_BASE +
1864 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1865 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1866 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1867 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1868 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1869 so_base_ws_lo = lower_32_bits(CFG_BASE +
1870 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1871 so_base_ws_hi = upper_32_bits(CFG_BASE +
1872 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1873
1874 q_off = dma_qm_offset + qman_id * 4;
1875
1876 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1877 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1878
1879 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1880 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1881 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1882
1883 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1884 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1885 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1886
1887 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1888 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1889 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1890 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1891 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1892 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1893 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1894 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1895
1896 /* The following configuration is needed only once per QMAN */
1897 if (qman_id == 0) {
1898 /* Configure RAZWI IRQ */
1899 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1900 if (hdev->stop_on_err) {
1901 dma_qm_err_cfg |=
1902 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1903 }
1904
1905 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1906 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1907 lower_32_bits(CFG_BASE +
1908 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1909 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1910 upper_32_bits(CFG_BASE +
1911 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1912 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1913 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1914 dma_id);
1915
1916 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1917 QM_ARB_ERR_MSG_EN_MASK);
1918
1919 /* Increase ARB WDT to support streams architecture */
1920 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1921 GAUDI_ARB_WDT_TIMEOUT);
1922
1923 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1924 QMAN_EXTERNAL_MAKE_TRUSTED);
1925
1926 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1927 }
1928}
1929
1930static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1931{
1932 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1933 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1934
1935 /* Set to maximum possible according to physical size */
1936 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1937 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1938
1939 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1940 if (hdev->stop_on_err)
1941 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1942
1943 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1944 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1945 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1946 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1947 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1948 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1949 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1950 WREG32(mmDMA0_CORE_PROT + dma_offset,
1951 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1952 /* If the channel is secured, it should be in MMU bypass mode */
1953 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1954 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1955 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1956}
1957
1958static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1959 u32 enable_mask)
1960{
1961 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1962
1963 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1964}
1965
1966static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1967{
1968 struct gaudi_device *gaudi = hdev->asic_specific;
1969 struct hl_hw_queue *q;
1970 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1971
1972 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1973 return;
1974
1975 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1976 dma_id = gaudi_dma_assignment[i];
1977 /*
1978 * For queues after the CPU Q need to add 1 to get the correct
1979 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1980 * order to get the correct MSI register.
1981 */
1982 if (dma_id > 1) {
1983 cpu_skip = 1;
1984 nic_skip = NIC_NUMBER_OF_ENGINES;
1985 } else {
1986 cpu_skip = 0;
1987 nic_skip = 0;
1988 }
1989
1990 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1991 q_idx = 4 * dma_id + j + cpu_skip;
1992 q = &hdev->kernel_queues[q_idx];
1993 q->cq_id = cq_id++;
1994 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1995 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1996 q->bus_address);
1997 }
1998
1999 gaudi_init_dma_core(hdev, dma_id);
2000
2001 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2002 }
2003
2004 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2005}
2006
2007static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2008 int qman_id, u64 qman_base_addr)
2009{
2010 u32 mtr_base_lo, mtr_base_hi;
2011 u32 so_base_lo, so_base_hi;
2012 u32 q_off, dma_qm_offset;
2013 u32 dma_qm_err_cfg;
2014
2015 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2016
2017 mtr_base_lo = lower_32_bits(CFG_BASE +
2018 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2019 mtr_base_hi = upper_32_bits(CFG_BASE +
2020 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2021 so_base_lo = lower_32_bits(CFG_BASE +
2022 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2023 so_base_hi = upper_32_bits(CFG_BASE +
2024 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2025
2026 q_off = dma_qm_offset + qman_id * 4;
2027
2028 if (qman_id < 4) {
2029 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2030 lower_32_bits(qman_base_addr));
2031 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2032 upper_32_bits(qman_base_addr));
2033
2034 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2035 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2036 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2037
2038 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2039 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2040 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2041 } else {
2042 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2043 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2044 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2045
2046 /* Configure RAZWI IRQ */
2047 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2048 if (hdev->stop_on_err) {
2049 dma_qm_err_cfg |=
2050 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2051 }
2052 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2053
2054 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2055 lower_32_bits(CFG_BASE +
2056 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2057 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2058 upper_32_bits(CFG_BASE +
2059 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2060 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2061 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2062 dma_id);
2063
2064 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2065 QM_ARB_ERR_MSG_EN_MASK);
2066
2067 /* Increase ARB WDT to support streams architecture */
2068 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2069 GAUDI_ARB_WDT_TIMEOUT);
2070
2071 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2072 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2073 QMAN_INTERNAL_MAKE_TRUSTED);
2074 }
2075
2076 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2077 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2078 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2079 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2080}
2081
2082static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2083{
2084 struct gaudi_device *gaudi = hdev->asic_specific;
2085 struct gaudi_internal_qman_info *q;
2086 u64 qman_base_addr;
2087 int i, j, dma_id, internal_q_index;
2088
2089 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2090 return;
2091
2092 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2093 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2094
2095 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2096 /*
2097 * Add the CPU queue in order to get the correct queue
2098 * number as all internal queue are placed after it
2099 */
2100 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2101
2102 q = &gaudi->internal_qmans[internal_q_index];
2103 qman_base_addr = (u64) q->pq_dma_addr;
2104 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2105 qman_base_addr);
2106 }
2107
2108 /* Initializing lower CP for HBM DMA QMAN */
2109 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2110
2111 gaudi_init_dma_core(hdev, dma_id);
2112
2113 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2114 }
2115
2116 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2117}
2118
2119static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2120 int qman_id, u64 qman_base_addr)
2121{
2122 u32 mtr_base_lo, mtr_base_hi;
2123 u32 so_base_lo, so_base_hi;
2124 u32 q_off, mme_id;
2125 u32 mme_qm_err_cfg;
2126
2127 mtr_base_lo = lower_32_bits(CFG_BASE +
2128 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2129 mtr_base_hi = upper_32_bits(CFG_BASE +
2130 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2131 so_base_lo = lower_32_bits(CFG_BASE +
2132 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2133 so_base_hi = upper_32_bits(CFG_BASE +
2134 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2135
2136 q_off = mme_offset + qman_id * 4;
2137
2138 if (qman_id < 4) {
2139 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2140 lower_32_bits(qman_base_addr));
2141 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2142 upper_32_bits(qman_base_addr));
2143
2144 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2145 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2146 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2147
2148 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2149 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2150 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2151 } else {
2152 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2153 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2154 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2155
2156 /* Configure RAZWI IRQ */
2157 mme_id = mme_offset /
2158 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2159
2160 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2161 if (hdev->stop_on_err) {
2162 mme_qm_err_cfg |=
2163 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2164 }
2165 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2166 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2167 lower_32_bits(CFG_BASE +
2168 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2169 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2170 upper_32_bits(CFG_BASE +
2171 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2172 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2173 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2174 mme_id);
2175
2176 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2177 QM_ARB_ERR_MSG_EN_MASK);
2178
2179 /* Increase ARB WDT to support streams architecture */
2180 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2181 GAUDI_ARB_WDT_TIMEOUT);
2182
2183 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2184 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2185 QMAN_INTERNAL_MAKE_TRUSTED);
2186 }
2187
2188 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2189 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2190 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2191 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2192}
2193
2194static void gaudi_init_mme_qmans(struct hl_device *hdev)
2195{
2196 struct gaudi_device *gaudi = hdev->asic_specific;
2197 struct gaudi_internal_qman_info *q;
2198 u64 qman_base_addr;
2199 u32 mme_offset;
2200 int i, internal_q_index;
2201
2202 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2203 return;
2204
2205 /*
2206 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2207 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2208 */
2209
2210 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2211
2212 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2213 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2214 q = &gaudi->internal_qmans[internal_q_index];
2215 qman_base_addr = (u64) q->pq_dma_addr;
2216 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2217 qman_base_addr);
2218 if (i == 3)
2219 mme_offset = 0;
2220 }
2221
2222 /* Initializing lower CP for MME QMANs */
2223 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2224 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2225 gaudi_init_mme_qman(hdev, 0, 4, 0);
2226
2227 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2228 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2229
2230 gaudi->hw_cap_initialized |= HW_CAP_MME;
2231}
2232
2233static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2234 int qman_id, u64 qman_base_addr)
2235{
2236 u32 mtr_base_lo, mtr_base_hi;
2237 u32 so_base_lo, so_base_hi;
2238 u32 q_off, tpc_id;
2239 u32 tpc_qm_err_cfg;
2240
2241 mtr_base_lo = lower_32_bits(CFG_BASE +
2242 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2243 mtr_base_hi = upper_32_bits(CFG_BASE +
2244 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2245 so_base_lo = lower_32_bits(CFG_BASE +
2246 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2247 so_base_hi = upper_32_bits(CFG_BASE +
2248 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2249
2250 q_off = tpc_offset + qman_id * 4;
2251
2252 if (qman_id < 4) {
2253 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2254 lower_32_bits(qman_base_addr));
2255 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2256 upper_32_bits(qman_base_addr));
2257
2258 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2259 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2260 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2261
2262 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2263 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2264 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2265 } else {
2266 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2267 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2268 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2269
2270 /* Configure RAZWI IRQ */
2271 tpc_id = tpc_offset /
2272 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2273
2274 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2275 if (hdev->stop_on_err) {
2276 tpc_qm_err_cfg |=
2277 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2278 }
2279
2280 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2281 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2282 lower_32_bits(CFG_BASE +
2283 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2284 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2285 upper_32_bits(CFG_BASE +
2286 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2287 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2288 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2289 tpc_id);
2290
2291 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2292 QM_ARB_ERR_MSG_EN_MASK);
2293
2294 /* Increase ARB WDT to support streams architecture */
2295 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2296 GAUDI_ARB_WDT_TIMEOUT);
2297
2298 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2299 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2300 QMAN_INTERNAL_MAKE_TRUSTED);
2301 }
2302
2303 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2304 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2305 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2306 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2307}
2308
2309static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2310{
2311 struct gaudi_device *gaudi = hdev->asic_specific;
2312 struct gaudi_internal_qman_info *q;
2313 u64 qman_base_addr;
2314 u32 so_base_hi, tpc_offset = 0;
2315 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2316 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2317 int i, tpc_id, internal_q_index;
2318
2319 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2320 return;
2321
2322 so_base_hi = upper_32_bits(CFG_BASE +
2323 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2324
2325 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2326 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2327 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2328 tpc_id * QMAN_STREAMS + i;
2329 q = &gaudi->internal_qmans[internal_q_index];
2330 qman_base_addr = (u64) q->pq_dma_addr;
2331 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2332 qman_base_addr);
2333
2334 if (i == 3) {
2335 /* Initializing lower CP for TPC QMAN */
2336 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2337
2338 /* Enable the QMAN and TPC channel */
2339 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2340 QMAN_TPC_ENABLE);
2341 }
2342 }
2343
2344 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2345 so_base_hi);
2346
2347 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2348
2349 gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
2350 }
2351}
2352
2353static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2354{
2355 struct gaudi_device *gaudi = hdev->asic_specific;
2356
2357 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2358 return;
2359
2360 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2361 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2362 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2363}
2364
2365static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2366{
2367 struct gaudi_device *gaudi = hdev->asic_specific;
2368
2369 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2370 return;
2371
2372 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2373 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2374 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2375 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2376 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2377}
2378
2379static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2380{
2381 struct gaudi_device *gaudi = hdev->asic_specific;
2382
2383 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2384 return;
2385
2386 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2387 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2388}
2389
2390static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2391{
2392 struct gaudi_device *gaudi = hdev->asic_specific;
2393 u32 tpc_offset = 0;
2394 int tpc_id;
2395
2396 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2397 return;
2398
2399 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2400 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2401 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2402 }
2403}
2404
2405static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2406{
2407 struct gaudi_device *gaudi = hdev->asic_specific;
2408
2409 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2410 return;
2411
2412 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2413 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2414 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2415 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2416}
2417
2418static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2419{
2420 struct gaudi_device *gaudi = hdev->asic_specific;
2421
2422 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2423 return;
2424
2425 /* Stop CPs of HBM DMA QMANs */
2426
2427 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2428 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2429 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2430 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2431 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2432}
2433
2434static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2435{
2436 struct gaudi_device *gaudi = hdev->asic_specific;
2437
2438 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2439 return;
2440
2441 /* Stop CPs of MME QMANs */
2442 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2443 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444}
2445
2446static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2447{
2448 struct gaudi_device *gaudi = hdev->asic_specific;
2449
2450 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2451 return;
2452
2453 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2454 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2455 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2456 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2457 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2458 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2459 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2460 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2461}
2462
2463static void gaudi_pci_dma_stall(struct hl_device *hdev)
2464{
2465 struct gaudi_device *gaudi = hdev->asic_specific;
2466
2467 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2468 return;
2469
2470 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2471 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2472 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2473}
2474
2475static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2476{
2477 struct gaudi_device *gaudi = hdev->asic_specific;
2478
2479 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2480 return;
2481
2482 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2483 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2484 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2485 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2486 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2487}
2488
2489static void gaudi_mme_stall(struct hl_device *hdev)
2490{
2491 struct gaudi_device *gaudi = hdev->asic_specific;
2492
2493 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2494 return;
2495
2496 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2497 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2498 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2499 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2500 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2501 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2502 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2503 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2504 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2505 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2506 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2507 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2508 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2509 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2510 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2511 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2512 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2513}
2514
2515static void gaudi_tpc_stall(struct hl_device *hdev)
2516{
2517 struct gaudi_device *gaudi = hdev->asic_specific;
2518
2519 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2520 return;
2521
2522 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2523 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2524 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2525 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2526 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2527 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2528 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2529 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2530}
2531
2532static void gaudi_enable_clock_gating(struct hl_device *hdev)
2533{
2534 struct gaudi_device *gaudi = hdev->asic_specific;
2535 u32 qman_offset;
2536 int i;
2537
2538 if (!hdev->clock_gating)
2539 return;
2540
2541 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
2542 return;
2543
2544 /* In case we are during debug session, don't enable the clock gate
2545 * as it may interfere
2546 */
2547 if (hdev->in_debug)
2548 return;
2549
2550 for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2551 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2552 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2553 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2554 QMAN_UPPER_CP_CGM_PWR_GATE_EN);
2555 }
2556
2557 for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2558 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2559 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2560 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2561 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2562 }
2563
2564 WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2565 WREG32(mmMME0_QM_CGM_CFG,
2566 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2567 WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2568 WREG32(mmMME2_QM_CGM_CFG,
2569 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2570
2571 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2572 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2573 QMAN_CGM1_PWR_GATE_EN);
2574 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2575 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2576
2577 qman_offset += TPC_QMAN_OFFSET;
2578 }
2579
2580 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2581}
2582
2583static void gaudi_disable_clock_gating(struct hl_device *hdev)
2584{
2585 struct gaudi_device *gaudi = hdev->asic_specific;
2586 u32 qman_offset;
2587 int i;
2588
2589 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2590 return;
2591
2592 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2593 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2594 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2595
2596 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2597 }
2598
2599 WREG32(mmMME0_QM_CGM_CFG, 0);
2600 WREG32(mmMME0_QM_CGM_CFG1, 0);
2601 WREG32(mmMME2_QM_CGM_CFG, 0);
2602 WREG32(mmMME2_QM_CGM_CFG1, 0);
2603
2604 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2605 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2606 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2607
2608 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2609 }
2610
2611 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2612}
2613
2614static void gaudi_enable_timestamp(struct hl_device *hdev)
2615{
2616 /* Disable the timestamp counter */
2617 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2618
2619 /* Zero the lower/upper parts of the 64-bit counter */
2620 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2621 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2622
2623 /* Enable the counter */
2624 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2625}
2626
2627static void gaudi_disable_timestamp(struct hl_device *hdev)
2628{
2629 /* Disable the timestamp counter */
2630 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2631}
2632
2633static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2634{
2635 u32 wait_timeout_ms, cpu_timeout_ms;
2636
2637 dev_info(hdev->dev,
2638 "Halting compute engines and disabling interrupts\n");
2639
2640 if (hdev->pldm) {
2641 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2642 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2643 } else {
2644 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2645 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
2646 }
2647
2648 if (hard_reset) {
2649 /*
2650 * I don't know what is the state of the CPU so make sure it is
2651 * stopped in any means necessary
2652 */
2653 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
2654 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
Ofir Bittonebd8d122020-05-10 13:41:28 +03002655 GAUDI_EVENT_HALT_MACHINE);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002656 msleep(cpu_timeout_ms);
2657 }
2658
2659 gaudi_stop_mme_qmans(hdev);
2660 gaudi_stop_tpc_qmans(hdev);
2661 gaudi_stop_hbm_dma_qmans(hdev);
2662 gaudi_stop_pci_dma_qmans(hdev);
2663
2664 gaudi_disable_clock_gating(hdev);
2665
2666 msleep(wait_timeout_ms);
2667
2668 gaudi_pci_dma_stall(hdev);
2669 gaudi_hbm_dma_stall(hdev);
2670 gaudi_tpc_stall(hdev);
2671 gaudi_mme_stall(hdev);
2672
2673 msleep(wait_timeout_ms);
2674
2675 gaudi_disable_mme_qmans(hdev);
2676 gaudi_disable_tpc_qmans(hdev);
2677 gaudi_disable_hbm_dma_qmans(hdev);
2678 gaudi_disable_pci_dma_qmans(hdev);
2679
2680 gaudi_disable_timestamp(hdev);
2681
2682 if (hard_reset)
2683 gaudi_disable_msi(hdev);
2684 else
2685 gaudi_sync_irqs(hdev);
2686}
2687
2688static int gaudi_mmu_init(struct hl_device *hdev)
2689{
2690 struct asic_fixed_properties *prop = &hdev->asic_prop;
2691 struct gaudi_device *gaudi = hdev->asic_specific;
2692 u64 hop0_addr;
2693 int rc, i;
2694
2695 if (!hdev->mmu_enable)
2696 return 0;
2697
2698 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2699 return 0;
2700
2701 hdev->dram_supports_virtual_memory = false;
2702
2703 for (i = 0 ; i < prop->max_asid ; i++) {
2704 hop0_addr = prop->mmu_pgt_addr +
2705 (i * prop->mmu_hop_table_size);
2706
2707 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2708 if (rc) {
2709 dev_err(hdev->dev,
2710 "failed to set hop0 addr for asid %d\n", i);
2711 goto err;
2712 }
2713 }
2714
2715 /* init MMU cache manage page */
2716 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2717 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2718
2719 hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2720 VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2721
2722 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2723 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2724
2725 WREG32(mmSTLB_HOP_CONFIGURATION,
2726 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2727
2728 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2729
2730 return 0;
2731
2732err:
2733 return rc;
2734}
2735
2736static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2737{
2738 void __iomem *dst;
2739
2740 /* HBM scrambler must be initialized before pushing F/W to HBM */
2741 gaudi_init_scrambler_hbm(hdev);
2742
2743 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2744
2745 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2746}
2747
2748static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2749{
2750 void __iomem *dst;
2751
2752 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2753
2754 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2755}
2756
2757static void gaudi_read_device_fw_version(struct hl_device *hdev,
2758 enum hl_fw_component fwc)
2759{
2760 const char *name;
2761 u32 ver_off;
2762 char *dest;
2763
2764 switch (fwc) {
2765 case FW_COMP_UBOOT:
2766 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2767 dest = hdev->asic_prop.uboot_ver;
2768 name = "U-Boot";
2769 break;
2770 case FW_COMP_PREBOOT:
2771 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2772 dest = hdev->asic_prop.preboot_ver;
2773 name = "Preboot";
2774 break;
2775 default:
2776 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2777 return;
2778 }
2779
2780 ver_off &= ~((u32)SRAM_BASE_ADDR);
2781
2782 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2783 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2784 VERSION_MAX_LEN);
2785 } else {
2786 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2787 name, ver_off);
2788 strcpy(dest, "unavailable");
2789 }
2790}
2791
2792static int gaudi_init_cpu(struct hl_device *hdev)
2793{
2794 struct gaudi_device *gaudi = hdev->asic_specific;
2795 int rc;
2796
2797 if (!hdev->cpu_enable)
2798 return 0;
2799
2800 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2801 return 0;
2802
2803 /*
2804 * The device CPU works with 40 bits addresses.
2805 * This register sets the extension to 50 bits.
2806 */
2807 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2808
2809 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2810 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2811 mmCPU_CMD_STATUS_TO_HOST,
2812 mmCPU_BOOT_ERR0,
2813 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2814 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2815
2816 if (rc)
2817 return rc;
2818
2819 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2820
2821 return 0;
2822}
2823
2824static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2825{
2826 struct gaudi_device *gaudi = hdev->asic_specific;
2827 struct hl_eq *eq;
2828 u32 status;
2829 struct hl_hw_queue *cpu_pq =
2830 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2831 int err;
2832
2833 if (!hdev->cpu_queues_enable)
2834 return 0;
2835
2836 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2837 return 0;
2838
2839 eq = &hdev->event_queue;
2840
2841 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2842 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2843
2844 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2845 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2846
2847 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2848 lower_32_bits(hdev->cpu_accessible_dma_address));
2849 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2850 upper_32_bits(hdev->cpu_accessible_dma_address));
2851
2852 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2853 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2854 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2855
2856 /* Used for EQ CI */
2857 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2858
2859 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2860
2861 if (gaudi->multi_msi_mode)
2862 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2863 else
2864 WREG32(mmCPU_IF_QUEUE_INIT,
2865 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2866
2867 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2868
2869 err = hl_poll_timeout(
2870 hdev,
2871 mmCPU_IF_QUEUE_INIT,
2872 status,
2873 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2874 1000,
2875 cpu_timeout);
2876
2877 if (err) {
2878 dev_err(hdev->dev,
2879 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
2880 return -EIO;
2881 }
2882
2883 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2884 return 0;
2885}
2886
2887static void gaudi_pre_hw_init(struct hl_device *hdev)
2888{
2889 /* Perform read from the device to make sure device is up */
2890 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2891
2892 /*
2893 * Let's mark in the H/W that we have reached this point. We check
2894 * this value in the reset_before_init function to understand whether
2895 * we need to reset the chip before doing H/W init. This register is
2896 * cleared by the H/W upon H/W reset
2897 */
2898 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2899
2900 /* Set the access through PCI bars (Linux driver only) as secured */
2901 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2902 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2903
2904 /* Perform read to flush the waiting writes to ensure configuration
2905 * was set in the device
2906 */
2907 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2908
2909 if (hdev->axi_drain) {
2910 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2911 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2912 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2913 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2914
2915 /* Perform read to flush the DRAIN cfg */
2916 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2917 } else {
2918 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2919 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2920
2921 /* Perform read to flush the DRAIN cfg */
2922 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2923 }
2924
2925 /* Configure the reset registers. Must be done as early as possible
2926 * in case we fail during H/W initialization
2927 */
2928 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2929 (CFG_RST_H_DMA_MASK |
2930 CFG_RST_H_MME_MASK |
2931 CFG_RST_H_SM_MASK |
2932 CFG_RST_H_TPC_MASK));
2933
2934 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2935
2936 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2937 (CFG_RST_H_HBM_MASK |
2938 CFG_RST_H_TPC_MASK |
2939 CFG_RST_H_NIC_MASK |
2940 CFG_RST_H_SM_MASK |
2941 CFG_RST_H_DMA_MASK |
2942 CFG_RST_H_MME_MASK |
2943 CFG_RST_H_CPU_MASK |
2944 CFG_RST_H_MMU_MASK));
2945
2946 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2947 (CFG_RST_L_IF_MASK |
2948 CFG_RST_L_PSOC_MASK |
2949 CFG_RST_L_TPC_MASK));
2950}
2951
2952static int gaudi_hw_init(struct hl_device *hdev)
2953{
2954 int rc;
2955
2956 dev_info(hdev->dev, "Starting initialization of H/W\n");
2957
2958 gaudi_pre_hw_init(hdev);
2959
2960 gaudi_init_pci_dma_qmans(hdev);
2961
2962 gaudi_init_hbm_dma_qmans(hdev);
2963
2964 /*
2965 * Before pushing u-boot/linux to device, need to set the hbm bar to
2966 * base address of dram
2967 */
2968 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2969 dev_err(hdev->dev,
2970 "failed to map HBM bar to DRAM base address\n");
2971 return -EIO;
2972 }
2973
2974 rc = gaudi_init_cpu(hdev);
2975 if (rc) {
2976 dev_err(hdev->dev, "failed to initialize CPU\n");
2977 return rc;
2978 }
2979
2980 /* SRAM scrambler must be initialized after CPU is running from HBM */
2981 gaudi_init_scrambler_sram(hdev);
2982
2983 /* This is here just in case we are working without CPU */
2984 gaudi_init_scrambler_hbm(hdev);
2985
2986 gaudi_init_golden_registers(hdev);
2987
2988 rc = gaudi_mmu_init(hdev);
2989 if (rc)
2990 return rc;
2991
Omer Shpigelman3a3a5bf12020-05-11 10:45:12 +03002992 gaudi_init_security(hdev);
2993
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03002994 gaudi_init_mme_qmans(hdev);
2995
2996 gaudi_init_tpc_qmans(hdev);
2997
2998 gaudi_enable_clock_gating(hdev);
2999
3000 gaudi_enable_timestamp(hdev);
3001
3002 /* MSI must be enabled before CPU queues are initialized */
3003 rc = gaudi_enable_msi(hdev);
3004 if (rc)
3005 goto disable_queues;
3006
3007 /* must be called after MSI was enabled */
3008 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3009 if (rc) {
3010 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3011 rc);
3012 goto disable_msi;
3013 }
3014
3015 /* Perform read from the device to flush all configuration */
3016 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3017
3018 return 0;
3019
3020disable_msi:
3021 gaudi_disable_msi(hdev);
3022disable_queues:
3023 gaudi_disable_mme_qmans(hdev);
3024 gaudi_disable_pci_dma_qmans(hdev);
3025
3026 return rc;
3027}
3028
3029static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3030{
3031 struct gaudi_device *gaudi = hdev->asic_specific;
3032 u32 status, reset_timeout_ms, boot_strap = 0;
3033
3034 if (hdev->pldm) {
3035 if (hard_reset)
3036 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3037 else
3038 reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC;
3039 } else {
3040 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3041 }
3042
3043 if (hard_reset) {
3044 /* Tell ASIC not to re-initialize PCIe */
3045 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3046
3047 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3048 /* H/W bug WA:
3049 * rdata[31:0] = strap_read_val;
3050 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3051 */
3052 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3053 (boot_strap & 0x001FFFFF));
3054 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3055
3056 /* Restart BTL/BLR upon hard-reset */
3057 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3058
3059 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3060 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3061 dev_info(hdev->dev,
3062 "Issued HARD reset command, going to wait %dms\n",
3063 reset_timeout_ms);
3064 } else {
3065 /* Don't restart BTL/BLR upon soft-reset */
3066 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0);
3067
3068 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST,
3069 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT);
3070 dev_info(hdev->dev,
3071 "Issued SOFT reset command, going to wait %dms\n",
3072 reset_timeout_ms);
3073 }
3074
3075 /*
3076 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3077 * itself is in reset. Need to wait until the reset is deasserted
3078 */
3079 msleep(reset_timeout_ms);
3080
3081 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3082 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3083 dev_err(hdev->dev,
3084 "Timeout while waiting for device to reset 0x%x\n",
3085 status);
3086
3087 if (!hard_reset) {
3088 gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME |
3089 HW_CAP_TPC_MASK |
3090 HW_CAP_HBM_DMA);
3091
3092 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3093 GAUDI_EVENT_SOFT_RESET);
3094 return;
3095 }
3096
3097 /* We continue here only for hard-reset */
3098
3099 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3100
3101 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3102 HW_CAP_HBM | HW_CAP_PCI_DMA |
3103 HW_CAP_MME | HW_CAP_TPC_MASK |
3104 HW_CAP_HBM_DMA | HW_CAP_PLL |
3105 HW_CAP_MMU |
3106 HW_CAP_SRAM_SCRAMBLER |
3107 HW_CAP_HBM_SCRAMBLER);
3108 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3109}
3110
3111static int gaudi_suspend(struct hl_device *hdev)
3112{
3113 int rc;
3114
3115 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3116 if (rc)
3117 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3118
3119 return rc;
3120}
3121
3122static int gaudi_resume(struct hl_device *hdev)
3123{
3124 return gaudi_init_iatu(hdev);
3125}
3126
3127static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3128 u64 kaddress, phys_addr_t paddress, u32 size)
3129{
3130 int rc;
3131
3132 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3133 VM_DONTCOPY | VM_NORESERVE;
3134
3135 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3136 size, vma->vm_page_prot);
3137 if (rc)
3138 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3139
3140 return rc;
3141}
3142
3143static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3144{
3145 struct gaudi_device *gaudi = hdev->asic_specific;
3146 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3147 int dma_id;
3148 bool invalid_queue = false;
3149
3150 switch (hw_queue_id) {
3151 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3152 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3153 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3154 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3155 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3156 break;
3157
3158 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3159 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3160 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3161 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3162 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3163 break;
3164
3165 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3166 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3167 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3168 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3169 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3170 break;
3171
3172 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3173 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3174 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3175 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3176 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3177 break;
3178
3179 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3180 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3181 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3182 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3183 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3184 break;
3185
3186 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3187 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3188 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3189 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3190 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3191 break;
3192
3193 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3194 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3195 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3196 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3197 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3198 break;
3199
3200 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3201 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3202 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3203 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3204 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3205 break;
3206
3207 case GAUDI_QUEUE_ID_CPU_PQ:
3208 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3209 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3210 else
3211 invalid_queue = true;
3212 break;
3213
3214 case GAUDI_QUEUE_ID_MME_0_0:
3215 db_reg_offset = mmMME2_QM_PQ_PI_0;
3216 break;
3217
3218 case GAUDI_QUEUE_ID_MME_0_1:
3219 db_reg_offset = mmMME2_QM_PQ_PI_1;
3220 break;
3221
3222 case GAUDI_QUEUE_ID_MME_0_2:
3223 db_reg_offset = mmMME2_QM_PQ_PI_2;
3224 break;
3225
3226 case GAUDI_QUEUE_ID_MME_0_3:
3227 db_reg_offset = mmMME2_QM_PQ_PI_3;
3228 break;
3229
3230 case GAUDI_QUEUE_ID_MME_1_0:
3231 db_reg_offset = mmMME0_QM_PQ_PI_0;
3232 break;
3233
3234 case GAUDI_QUEUE_ID_MME_1_1:
3235 db_reg_offset = mmMME0_QM_PQ_PI_1;
3236 break;
3237
3238 case GAUDI_QUEUE_ID_MME_1_2:
3239 db_reg_offset = mmMME0_QM_PQ_PI_2;
3240 break;
3241
3242 case GAUDI_QUEUE_ID_MME_1_3:
3243 db_reg_offset = mmMME0_QM_PQ_PI_3;
3244 break;
3245
3246 case GAUDI_QUEUE_ID_TPC_0_0:
3247 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3248 break;
3249
3250 case GAUDI_QUEUE_ID_TPC_0_1:
3251 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3252 break;
3253
3254 case GAUDI_QUEUE_ID_TPC_0_2:
3255 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3256 break;
3257
3258 case GAUDI_QUEUE_ID_TPC_0_3:
3259 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3260 break;
3261
3262 case GAUDI_QUEUE_ID_TPC_1_0:
3263 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3264 break;
3265
3266 case GAUDI_QUEUE_ID_TPC_1_1:
3267 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3268 break;
3269
3270 case GAUDI_QUEUE_ID_TPC_1_2:
3271 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3272 break;
3273
3274 case GAUDI_QUEUE_ID_TPC_1_3:
3275 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3276 break;
3277
3278 case GAUDI_QUEUE_ID_TPC_2_0:
3279 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3280 break;
3281
3282 case GAUDI_QUEUE_ID_TPC_2_1:
3283 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3284 break;
3285
3286 case GAUDI_QUEUE_ID_TPC_2_2:
3287 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3288 break;
3289
3290 case GAUDI_QUEUE_ID_TPC_2_3:
3291 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3292 break;
3293
3294 case GAUDI_QUEUE_ID_TPC_3_0:
3295 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3296 break;
3297
3298 case GAUDI_QUEUE_ID_TPC_3_1:
3299 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3300 break;
3301
3302 case GAUDI_QUEUE_ID_TPC_3_2:
3303 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3304 break;
3305
3306 case GAUDI_QUEUE_ID_TPC_3_3:
3307 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3308 break;
3309
3310 case GAUDI_QUEUE_ID_TPC_4_0:
3311 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3312 break;
3313
3314 case GAUDI_QUEUE_ID_TPC_4_1:
3315 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3316 break;
3317
3318 case GAUDI_QUEUE_ID_TPC_4_2:
3319 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3320 break;
3321
3322 case GAUDI_QUEUE_ID_TPC_4_3:
3323 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3324 break;
3325
3326 case GAUDI_QUEUE_ID_TPC_5_0:
3327 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3328 break;
3329
3330 case GAUDI_QUEUE_ID_TPC_5_1:
3331 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3332 break;
3333
3334 case GAUDI_QUEUE_ID_TPC_5_2:
3335 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3336 break;
3337
3338 case GAUDI_QUEUE_ID_TPC_5_3:
3339 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3340 break;
3341
3342 case GAUDI_QUEUE_ID_TPC_6_0:
3343 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3344 break;
3345
3346 case GAUDI_QUEUE_ID_TPC_6_1:
3347 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3348 break;
3349
3350 case GAUDI_QUEUE_ID_TPC_6_2:
3351 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3352 break;
3353
3354 case GAUDI_QUEUE_ID_TPC_6_3:
3355 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3356 break;
3357
3358 case GAUDI_QUEUE_ID_TPC_7_0:
3359 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3360 break;
3361
3362 case GAUDI_QUEUE_ID_TPC_7_1:
3363 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3364 break;
3365
3366 case GAUDI_QUEUE_ID_TPC_7_2:
3367 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3368 break;
3369
3370 case GAUDI_QUEUE_ID_TPC_7_3:
3371 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3372 break;
3373
3374 default:
3375 invalid_queue = true;
3376 }
3377
3378 if (invalid_queue) {
3379 /* Should never get here */
3380 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3381 hw_queue_id);
3382 return;
3383 }
3384
3385 db_value = pi;
3386
3387 /* ring the doorbell */
3388 WREG32(db_reg_offset, db_value);
3389
3390 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3391 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3392 GAUDI_EVENT_PI_UPDATE);
3393}
3394
3395static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3396 struct hl_bd *bd)
3397{
3398 __le64 *pbd = (__le64 *) bd;
3399
3400 /* The QMANs are on the host memory so a simple copy suffice */
3401 pqe[0] = pbd[0];
3402 pqe[1] = pbd[1];
3403}
3404
3405static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3406 dma_addr_t *dma_handle, gfp_t flags)
3407{
3408 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3409 dma_handle, flags);
3410
3411 /* Shift to the device's base physical address of host memory */
3412 if (kernel_addr)
3413 *dma_handle += HOST_PHYS_BASE;
3414
3415 return kernel_addr;
3416}
3417
3418static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3419 void *cpu_addr, dma_addr_t dma_handle)
3420{
3421 /* Cancel the device's base physical address of host memory */
3422 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3423
3424 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3425}
3426
3427static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3428 u32 queue_id, dma_addr_t *dma_handle,
3429 u16 *queue_len)
3430{
3431 struct gaudi_device *gaudi = hdev->asic_specific;
3432 struct gaudi_internal_qman_info *q;
3433
3434 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3435 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3436 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3437 return NULL;
3438 }
3439
3440 q = &gaudi->internal_qmans[queue_id];
3441 *dma_handle = q->pq_dma_addr;
3442 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3443
3444 return q->pq_kernel_addr;
3445}
3446
3447static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3448 u16 len, u32 timeout, long *result)
3449{
3450 struct gaudi_device *gaudi = hdev->asic_specific;
3451
3452 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3453 if (result)
3454 *result = 0;
3455 return 0;
3456 }
3457
3458 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3459 timeout, result);
3460}
3461
3462static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3463{
3464 struct packet_msg_prot *fence_pkt;
3465 dma_addr_t pkt_dma_addr;
3466 u32 fence_val, tmp, timeout_usec;
3467 dma_addr_t fence_dma_addr;
3468 u32 *fence_ptr;
3469 int rc;
3470
3471 if (hdev->pldm)
3472 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3473 else
3474 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3475
3476 fence_val = GAUDI_QMAN0_FENCE_VAL;
3477
3478 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3479 &fence_dma_addr);
3480 if (!fence_ptr) {
3481 dev_err(hdev->dev,
3482 "Failed to allocate memory for queue testing\n");
3483 return -ENOMEM;
3484 }
3485
3486 *fence_ptr = 0;
3487
3488 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3489 sizeof(struct packet_msg_prot),
3490 GFP_KERNEL, &pkt_dma_addr);
3491 if (!fence_pkt) {
3492 dev_err(hdev->dev,
3493 "Failed to allocate packet for queue testing\n");
3494 rc = -ENOMEM;
3495 goto free_fence_ptr;
3496 }
3497
3498 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
3499 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
3500 (1 << GAUDI_PKT_CTL_MB_SHIFT);
3501 fence_pkt->ctl = cpu_to_le32(tmp);
3502 fence_pkt->value = cpu_to_le32(fence_val);
3503 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3504
3505 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3506 sizeof(struct packet_msg_prot),
3507 pkt_dma_addr);
3508 if (rc) {
3509 dev_err(hdev->dev,
3510 "Failed to send fence packet\n");
3511 goto free_pkt;
3512 }
3513
3514 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3515 1000, timeout_usec, true);
3516
3517 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3518
3519 if (rc == -ETIMEDOUT) {
3520 dev_err(hdev->dev,
3521 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3522 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3523 rc = -EIO;
3524 }
3525
3526free_pkt:
3527 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3528 pkt_dma_addr);
3529free_fence_ptr:
3530 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3531 fence_dma_addr);
3532 return rc;
3533}
3534
3535static int gaudi_test_cpu_queue(struct hl_device *hdev)
3536{
3537 struct gaudi_device *gaudi = hdev->asic_specific;
3538
3539 /*
3540 * check capability here as send_cpu_message() won't update the result
3541 * value if no capability
3542 */
3543 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3544 return 0;
3545
3546 return hl_fw_test_cpu_queue(hdev);
3547}
3548
3549static int gaudi_test_queues(struct hl_device *hdev)
3550{
3551 int i, rc, ret_val = 0;
3552
3553 for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
3554 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3555 rc = gaudi_test_queue(hdev, i);
3556 if (rc)
3557 ret_val = -EINVAL;
3558 }
3559 }
3560
3561 rc = gaudi_test_cpu_queue(hdev);
3562 if (rc)
3563 ret_val = -EINVAL;
3564
3565 return ret_val;
3566}
3567
3568static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3569 gfp_t mem_flags, dma_addr_t *dma_handle)
3570{
3571 void *kernel_addr;
3572
3573 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3574 return NULL;
3575
3576 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3577
3578 /* Shift to the device's base physical address of host memory */
3579 if (kernel_addr)
3580 *dma_handle += HOST_PHYS_BASE;
3581
3582 return kernel_addr;
3583}
3584
3585static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3586 dma_addr_t dma_addr)
3587{
3588 /* Cancel the device's base physical address of host memory */
3589 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3590
3591 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3592}
3593
3594static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3595 size_t size, dma_addr_t *dma_handle)
3596{
3597 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3598}
3599
3600static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3601 size_t size, void *vaddr)
3602{
3603 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3604}
3605
3606static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3607 int nents, enum dma_data_direction dir)
3608{
3609 struct scatterlist *sg;
3610 int i;
3611
3612 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3613 return -ENOMEM;
3614
3615 /* Shift to the device's base physical address of host memory */
3616 for_each_sg(sgl, sg, nents, i)
3617 sg->dma_address += HOST_PHYS_BASE;
3618
3619 return 0;
3620}
3621
3622static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3623 int nents, enum dma_data_direction dir)
3624{
3625 struct scatterlist *sg;
3626 int i;
3627
3628 /* Cancel the device's base physical address of host memory */
3629 for_each_sg(sgl, sg, nents, i)
3630 sg->dma_address -= HOST_PHYS_BASE;
3631
3632 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3633}
3634
3635static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3636 struct sg_table *sgt)
3637{
3638 struct scatterlist *sg, *sg_next_iter;
3639 u32 count, dma_desc_cnt;
3640 u64 len, len_next;
3641 dma_addr_t addr, addr_next;
3642
3643 dma_desc_cnt = 0;
3644
3645 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3646
3647 len = sg_dma_len(sg);
3648 addr = sg_dma_address(sg);
3649
3650 if (len == 0)
3651 break;
3652
3653 while ((count + 1) < sgt->nents) {
3654 sg_next_iter = sg_next(sg);
3655 len_next = sg_dma_len(sg_next_iter);
3656 addr_next = sg_dma_address(sg_next_iter);
3657
3658 if (len_next == 0)
3659 break;
3660
3661 if ((addr + len == addr_next) &&
3662 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3663 len += len_next;
3664 count++;
3665 sg = sg_next_iter;
3666 } else {
3667 break;
3668 }
3669 }
3670
3671 dma_desc_cnt++;
3672 }
3673
3674 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3675}
3676
3677static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3678 struct hl_cs_parser *parser,
3679 struct packet_lin_dma *user_dma_pkt,
3680 u64 addr, enum dma_data_direction dir)
3681{
3682 struct hl_userptr *userptr;
3683 int rc;
3684
3685 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3686 parser->job_userptr_list, &userptr))
3687 goto already_pinned;
3688
3689 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3690 if (!userptr)
3691 return -ENOMEM;
3692
3693 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3694 userptr);
3695 if (rc)
3696 goto free_userptr;
3697
3698 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3699
3700 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3701 userptr->sgt->nents, dir);
3702 if (rc) {
3703 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3704 goto unpin_memory;
3705 }
3706
3707 userptr->dma_mapped = true;
3708 userptr->dir = dir;
3709
3710already_pinned:
3711 parser->patched_cb_size +=
3712 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3713
3714 return 0;
3715
3716unpin_memory:
3717 hl_unpin_host_memory(hdev, userptr);
3718free_userptr:
3719 kfree(userptr);
3720 return rc;
3721}
3722
3723static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3724 struct hl_cs_parser *parser,
3725 struct packet_lin_dma *user_dma_pkt,
3726 bool src_in_host)
3727{
3728 enum dma_data_direction dir;
3729 bool skip_host_mem_pin = false, user_memset;
3730 u64 addr;
3731 int rc = 0;
3732
3733 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3734 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3735 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3736
3737 if (src_in_host) {
3738 if (user_memset)
3739 skip_host_mem_pin = true;
3740
3741 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3742 dir = DMA_TO_DEVICE;
3743 addr = le64_to_cpu(user_dma_pkt->src_addr);
3744 } else {
3745 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3746 dir = DMA_FROM_DEVICE;
3747 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3748 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3749 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3750 }
3751
3752 if (skip_host_mem_pin)
3753 parser->patched_cb_size += sizeof(*user_dma_pkt);
3754 else
3755 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3756 addr, dir);
3757
3758 return rc;
3759}
3760
3761static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3762 struct hl_cs_parser *parser,
3763 struct packet_lin_dma *user_dma_pkt)
3764{
3765 bool src_in_host = false;
3766 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3767 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3768 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3769
3770 dev_dbg(hdev->dev, "DMA packet details:\n");
3771 dev_dbg(hdev->dev, "source == 0x%llx\n",
3772 le64_to_cpu(user_dma_pkt->src_addr));
3773 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3774 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3775
3776 /*
3777 * Special handling for DMA with size 0. Bypass all validations
3778 * because no transactions will be done except for WR_COMP, which
3779 * is not a security issue
3780 */
3781 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3782 parser->patched_cb_size += sizeof(*user_dma_pkt);
3783 return 0;
3784 }
3785
3786 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3787 src_in_host = true;
3788
3789 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3790 src_in_host);
3791}
3792
3793static int gaudi_validate_cb(struct hl_device *hdev,
3794 struct hl_cs_parser *parser, bool is_mmu)
3795{
3796 u32 cb_parsed_length = 0;
3797 int rc = 0;
3798
3799 parser->patched_cb_size = 0;
3800
3801 /* cb_user_size is more than 0 so loop will always be executed */
3802 while (cb_parsed_length < parser->user_cb_size) {
3803 enum packet_id pkt_id;
3804 u16 pkt_size;
3805 struct gaudi_packet *user_pkt;
3806
3807 user_pkt = (struct gaudi_packet *) (uintptr_t)
3808 (parser->user_cb->kernel_address + cb_parsed_length);
3809
3810 pkt_id = (enum packet_id) (
3811 (le64_to_cpu(user_pkt->header) &
3812 PACKET_HEADER_PACKET_ID_MASK) >>
3813 PACKET_HEADER_PACKET_ID_SHIFT);
3814
3815 pkt_size = gaudi_packet_sizes[pkt_id];
3816 cb_parsed_length += pkt_size;
3817 if (cb_parsed_length > parser->user_cb_size) {
3818 dev_err(hdev->dev,
3819 "packet 0x%x is out of CB boundary\n", pkt_id);
3820 rc = -EINVAL;
3821 break;
3822 }
3823
3824 switch (pkt_id) {
3825 case PACKET_MSG_PROT:
3826 dev_err(hdev->dev,
3827 "User not allowed to use MSG_PROT\n");
3828 rc = -EPERM;
3829 break;
3830
3831 case PACKET_CP_DMA:
3832 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3833 rc = -EPERM;
3834 break;
3835
3836 case PACKET_STOP:
3837 dev_err(hdev->dev, "User not allowed to use STOP\n");
3838 rc = -EPERM;
3839 break;
3840
3841 case PACKET_LIN_DMA:
3842 parser->contains_dma_pkt = true;
3843 if (is_mmu)
3844 parser->patched_cb_size += pkt_size;
3845 else
3846 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3847 (struct packet_lin_dma *) user_pkt);
3848 break;
3849
3850 case PACKET_WREG_32:
3851 case PACKET_WREG_BULK:
3852 case PACKET_MSG_LONG:
3853 case PACKET_MSG_SHORT:
3854 case PACKET_REPEAT:
3855 case PACKET_FENCE:
3856 case PACKET_NOP:
3857 case PACKET_ARB_POINT:
3858 case PACKET_LOAD_AND_EXE:
3859 parser->patched_cb_size += pkt_size;
3860 break;
3861
3862 default:
3863 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3864 pkt_id);
3865 rc = -EINVAL;
3866 break;
3867 }
3868
3869 if (rc)
3870 break;
3871 }
3872
3873 /*
3874 * The new CB should have space at the end for two MSG_PROT packets:
3875 * 1. A packet that will act as a completion packet
3876 * 2. A packet that will generate MSI-X interrupt
3877 */
3878 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3879
3880 return rc;
3881}
3882
3883static int gaudi_patch_dma_packet(struct hl_device *hdev,
3884 struct hl_cs_parser *parser,
3885 struct packet_lin_dma *user_dma_pkt,
3886 struct packet_lin_dma *new_dma_pkt,
3887 u32 *new_dma_pkt_size)
3888{
3889 struct hl_userptr *userptr;
3890 struct scatterlist *sg, *sg_next_iter;
3891 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3892 u64 len, len_next;
3893 dma_addr_t dma_addr, dma_addr_next;
3894 u64 device_memory_addr, addr;
3895 enum dma_data_direction dir;
3896 struct sg_table *sgt;
3897 bool src_in_host = false;
3898 bool skip_host_mem_pin = false;
3899 bool user_memset;
3900
3901 ctl = le32_to_cpu(user_dma_pkt->ctl);
3902
3903 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3904 src_in_host = true;
3905
3906 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3907 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3908
3909 if (src_in_host) {
3910 addr = le64_to_cpu(user_dma_pkt->src_addr);
3911 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3912 dir = DMA_TO_DEVICE;
3913 if (user_memset)
3914 skip_host_mem_pin = true;
3915 } else {
3916 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3917 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3918 dir = DMA_FROM_DEVICE;
3919 }
3920
3921 if ((!skip_host_mem_pin) &&
3922 (!hl_userptr_is_pinned(hdev, addr,
3923 le32_to_cpu(user_dma_pkt->tsize),
3924 parser->job_userptr_list, &userptr))) {
3925 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3926 addr, user_dma_pkt->tsize);
3927 return -EFAULT;
3928 }
3929
3930 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3931 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3932 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3933 return 0;
3934 }
3935
3936 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3937
3938 sgt = userptr->sgt;
3939 dma_desc_cnt = 0;
3940
3941 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3942 len = sg_dma_len(sg);
3943 dma_addr = sg_dma_address(sg);
3944
3945 if (len == 0)
3946 break;
3947
3948 while ((count + 1) < sgt->nents) {
3949 sg_next_iter = sg_next(sg);
3950 len_next = sg_dma_len(sg_next_iter);
3951 dma_addr_next = sg_dma_address(sg_next_iter);
3952
3953 if (len_next == 0)
3954 break;
3955
3956 if ((dma_addr + len == dma_addr_next) &&
3957 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3958 len += len_next;
3959 count++;
3960 sg = sg_next_iter;
3961 } else {
3962 break;
3963 }
3964 }
3965
3966 new_dma_pkt->ctl = user_dma_pkt->ctl;
3967
3968 ctl = le32_to_cpu(user_dma_pkt->ctl);
3969 if (likely(dma_desc_cnt))
3970 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
3971 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3972 new_dma_pkt->ctl = cpu_to_le32(ctl);
3973 new_dma_pkt->tsize = cpu_to_le32(len);
3974
3975 if (dir == DMA_TO_DEVICE) {
3976 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
3977 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
3978 } else {
3979 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
3980 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
3981 }
3982
3983 if (!user_memset)
3984 device_memory_addr += len;
3985 dma_desc_cnt++;
3986 new_dma_pkt++;
3987 }
3988
3989 if (!dma_desc_cnt) {
3990 dev_err(hdev->dev,
3991 "Error of 0 SG entries when patching DMA packet\n");
3992 return -EFAULT;
3993 }
3994
3995 /* Fix the last dma packet - wrcomp must be as user set it */
3996 new_dma_pkt--;
3997 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
3998
3999 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4000
4001 return 0;
4002}
4003
4004static int gaudi_patch_cb(struct hl_device *hdev,
4005 struct hl_cs_parser *parser)
4006{
4007 u32 cb_parsed_length = 0;
4008 u32 cb_patched_cur_length = 0;
4009 int rc = 0;
4010
4011 /* cb_user_size is more than 0 so loop will always be executed */
4012 while (cb_parsed_length < parser->user_cb_size) {
4013 enum packet_id pkt_id;
4014 u16 pkt_size;
4015 u32 new_pkt_size = 0;
4016 struct gaudi_packet *user_pkt, *kernel_pkt;
4017
4018 user_pkt = (struct gaudi_packet *) (uintptr_t)
4019 (parser->user_cb->kernel_address + cb_parsed_length);
4020 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4021 (parser->patched_cb->kernel_address +
4022 cb_patched_cur_length);
4023
4024 pkt_id = (enum packet_id) (
4025 (le64_to_cpu(user_pkt->header) &
4026 PACKET_HEADER_PACKET_ID_MASK) >>
4027 PACKET_HEADER_PACKET_ID_SHIFT);
4028
4029 pkt_size = gaudi_packet_sizes[pkt_id];
4030 cb_parsed_length += pkt_size;
4031 if (cb_parsed_length > parser->user_cb_size) {
4032 dev_err(hdev->dev,
4033 "packet 0x%x is out of CB boundary\n", pkt_id);
4034 rc = -EINVAL;
4035 break;
4036 }
4037
4038 switch (pkt_id) {
4039 case PACKET_LIN_DMA:
4040 rc = gaudi_patch_dma_packet(hdev, parser,
4041 (struct packet_lin_dma *) user_pkt,
4042 (struct packet_lin_dma *) kernel_pkt,
4043 &new_pkt_size);
4044 cb_patched_cur_length += new_pkt_size;
4045 break;
4046
4047 case PACKET_MSG_PROT:
4048 dev_err(hdev->dev,
4049 "User not allowed to use MSG_PROT\n");
4050 rc = -EPERM;
4051 break;
4052
4053 case PACKET_CP_DMA:
4054 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4055 rc = -EPERM;
4056 break;
4057
4058 case PACKET_STOP:
4059 dev_err(hdev->dev, "User not allowed to use STOP\n");
4060 rc = -EPERM;
4061 break;
4062
4063 case PACKET_WREG_32:
4064 case PACKET_WREG_BULK:
4065 case PACKET_MSG_LONG:
4066 case PACKET_MSG_SHORT:
4067 case PACKET_REPEAT:
4068 case PACKET_FENCE:
4069 case PACKET_NOP:
4070 case PACKET_ARB_POINT:
4071 case PACKET_LOAD_AND_EXE:
4072 memcpy(kernel_pkt, user_pkt, pkt_size);
4073 cb_patched_cur_length += pkt_size;
4074 break;
4075
4076 default:
4077 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4078 pkt_id);
4079 rc = -EINVAL;
4080 break;
4081 }
4082
4083 if (rc)
4084 break;
4085 }
4086
4087 return rc;
4088}
4089
4090static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4091 struct hl_cs_parser *parser)
4092{
4093 u64 patched_cb_handle;
4094 u32 patched_cb_size;
4095 struct hl_cb *user_cb;
4096 int rc;
4097
4098 /*
4099 * The new CB should have space at the end for two MSG_PROT pkt:
4100 * 1. A packet that will act as a completion packet
4101 * 2. A packet that will generate MSI interrupt
4102 */
4103 parser->patched_cb_size = parser->user_cb_size +
4104 sizeof(struct packet_msg_prot) * 2;
4105
4106 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4107 parser->patched_cb_size,
4108 &patched_cb_handle, HL_KERNEL_ASID_ID);
4109
4110 if (rc) {
4111 dev_err(hdev->dev,
4112 "Failed to allocate patched CB for DMA CS %d\n",
4113 rc);
4114 return rc;
4115 }
4116
4117 patched_cb_handle >>= PAGE_SHIFT;
4118 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4119 (u32) patched_cb_handle);
4120 /* hl_cb_get should never fail here so use kernel WARN */
4121 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4122 (u32) patched_cb_handle);
4123 if (!parser->patched_cb) {
4124 rc = -EFAULT;
4125 goto out;
4126 }
4127
4128 /*
4129 * The check that parser->user_cb_size <= parser->user_cb->size was done
4130 * in validate_queue_index().
4131 */
4132 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4133 (void *) (uintptr_t) parser->user_cb->kernel_address,
4134 parser->user_cb_size);
4135
4136 patched_cb_size = parser->patched_cb_size;
4137
4138 /* Validate patched CB instead of user CB */
4139 user_cb = parser->user_cb;
4140 parser->user_cb = parser->patched_cb;
4141 rc = gaudi_validate_cb(hdev, parser, true);
4142 parser->user_cb = user_cb;
4143
4144 if (rc) {
4145 hl_cb_put(parser->patched_cb);
4146 goto out;
4147 }
4148
4149 if (patched_cb_size != parser->patched_cb_size) {
4150 dev_err(hdev->dev, "user CB size mismatch\n");
4151 hl_cb_put(parser->patched_cb);
4152 rc = -EINVAL;
4153 goto out;
4154 }
4155
4156out:
4157 /*
4158 * Always call cb destroy here because we still have 1 reference
4159 * to it by calling cb_get earlier. After the job will be completed,
4160 * cb_put will release it, but here we want to remove it from the
4161 * idr
4162 */
4163 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4164 patched_cb_handle << PAGE_SHIFT);
4165
4166 return rc;
4167}
4168
4169static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4170 struct hl_cs_parser *parser)
4171{
4172 u64 patched_cb_handle;
4173 int rc;
4174
4175 rc = gaudi_validate_cb(hdev, parser, false);
4176
4177 if (rc)
4178 goto free_userptr;
4179
4180 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4181 parser->patched_cb_size,
4182 &patched_cb_handle, HL_KERNEL_ASID_ID);
4183 if (rc) {
4184 dev_err(hdev->dev,
4185 "Failed to allocate patched CB for DMA CS %d\n", rc);
4186 goto free_userptr;
4187 }
4188
4189 patched_cb_handle >>= PAGE_SHIFT;
4190 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4191 (u32) patched_cb_handle);
4192 /* hl_cb_get should never fail here so use kernel WARN */
4193 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4194 (u32) patched_cb_handle);
4195 if (!parser->patched_cb) {
4196 rc = -EFAULT;
4197 goto out;
4198 }
4199
4200 rc = gaudi_patch_cb(hdev, parser);
4201
4202 if (rc)
4203 hl_cb_put(parser->patched_cb);
4204
4205out:
4206 /*
4207 * Always call cb destroy here because we still have 1 reference
4208 * to it by calling cb_get earlier. After the job will be completed,
4209 * cb_put will release it, but here we want to remove it from the
4210 * idr
4211 */
4212 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4213 patched_cb_handle << PAGE_SHIFT);
4214
4215free_userptr:
4216 if (rc)
4217 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4218 return rc;
4219}
4220
4221static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4222 struct hl_cs_parser *parser)
4223{
4224 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4225
4226 /* For internal queue jobs just check if CB address is valid */
4227 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4228 parser->user_cb_size,
4229 asic_prop->sram_user_base_address,
4230 asic_prop->sram_end_address))
4231 return 0;
4232
4233 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4234 parser->user_cb_size,
4235 asic_prop->dram_user_base_address,
4236 asic_prop->dram_end_address))
4237 return 0;
4238
4239 /* PMMU and HPMMU addresses are equal, check only one of them */
4240 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4241 parser->user_cb_size,
4242 asic_prop->pmmu.start_addr,
4243 asic_prop->pmmu.end_addr))
4244 return 0;
4245
4246 dev_err(hdev->dev,
4247 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4248 parser->user_cb, parser->user_cb_size);
4249
4250 return -EFAULT;
4251}
4252
4253static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4254{
4255 struct gaudi_device *gaudi = hdev->asic_specific;
4256
4257 if (parser->queue_type == QUEUE_TYPE_INT)
4258 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4259
4260 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4261 return gaudi_parse_cb_mmu(hdev, parser);
4262 else
4263 return gaudi_parse_cb_no_mmu(hdev, parser);
4264}
4265
4266static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4267 u64 kernel_address, u32 len,
4268 u64 cq_addr, u32 cq_val, u32 msi_vec,
4269 bool eb)
4270{
4271 struct gaudi_device *gaudi = hdev->asic_specific;
4272 struct packet_msg_prot *cq_pkt;
4273 u32 tmp;
4274
4275 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4276 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4277
4278 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4279 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4280
4281 if (eb)
4282 tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
4283
4284 cq_pkt->ctl = cpu_to_le32(tmp);
4285 cq_pkt->value = cpu_to_le32(cq_val);
4286 cq_pkt->addr = cpu_to_le64(cq_addr);
4287
4288 cq_pkt++;
4289
4290 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4291 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4292 cq_pkt->ctl = cpu_to_le32(tmp);
4293 cq_pkt->value = cpu_to_le32(1);
4294
4295 if (!gaudi->multi_msi_mode)
4296 msi_vec = 0;
4297
4298 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4299}
4300
4301static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4302{
4303 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4304}
4305
4306static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4307 u32 size, u64 val)
4308{
4309 struct packet_lin_dma *lin_dma_pkt;
4310 struct hl_cs_job *job;
4311 u32 cb_size, ctl;
4312 struct hl_cb *cb;
4313 int rc;
4314
4315 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4316 if (!cb)
4317 return -EFAULT;
4318
4319 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4320 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4321 cb_size = sizeof(*lin_dma_pkt);
4322
4323 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4324 (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4325 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
4326 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
4327 (1 << GAUDI_PKT_CTL_MB_SHIFT));
4328 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4329 lin_dma_pkt->src_addr = cpu_to_le64(val);
4330 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4331 lin_dma_pkt->tsize = cpu_to_le32(size);
4332
4333 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4334 if (!job) {
4335 dev_err(hdev->dev, "Failed to allocate a new job\n");
4336 rc = -ENOMEM;
4337 goto release_cb;
4338 }
4339
4340 job->id = 0;
4341 job->user_cb = cb;
4342 job->user_cb->cs_cnt++;
4343 job->user_cb_size = cb_size;
4344 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4345 job->patched_cb = job->user_cb;
4346 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4347
4348 hl_debugfs_add_job(hdev, job);
4349
4350 rc = gaudi_send_job_on_qman0(hdev, job);
4351
4352 hl_debugfs_remove_job(hdev, job);
4353 kfree(job);
4354 cb->cs_cnt--;
4355
4356release_cb:
4357 hl_cb_put(cb);
4358 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4359
4360 return rc;
4361}
4362
4363static void gaudi_restore_sm_registers(struct hl_device *hdev)
4364{
4365 int i;
4366
4367 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4368 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4369 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4370 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4371 }
4372
4373 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4374 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4375 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4376 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4377 }
4378
4379 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4380
4381 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4382 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4383
4384 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4385
4386 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4387 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4388}
4389
4390static void gaudi_restore_dma_registers(struct hl_device *hdev)
4391{
4392 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4393 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4394 int i;
4395
4396 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4397 u64 sob_addr = CFG_BASE +
4398 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4399 (i * sob_delta);
4400 u32 dma_offset = i * DMA_CORE_OFFSET;
4401
4402 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4403 lower_32_bits(sob_addr));
4404 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4405 upper_32_bits(sob_addr));
4406 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4407
4408 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4409 * modified by the user for SRAM reduction
4410 */
4411 if (i > 1)
4412 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4413 0x00000001);
4414 }
4415}
4416
4417static void gaudi_restore_qm_registers(struct hl_device *hdev)
4418{
4419 u32 qman_offset;
4420 int i;
4421
4422 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4423 qman_offset = i * DMA_QMAN_OFFSET;
4424 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4425 }
4426
4427 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4428 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4429 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4430 }
4431
4432 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4433 qman_offset = i * TPC_QMAN_OFFSET;
4434 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4435 }
4436}
4437
4438static void gaudi_restore_user_registers(struct hl_device *hdev)
4439{
4440 gaudi_restore_sm_registers(hdev);
4441 gaudi_restore_dma_registers(hdev);
4442 gaudi_restore_qm_registers(hdev);
4443}
4444
4445static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4446{
4447 struct asic_fixed_properties *prop = &hdev->asic_prop;
4448 u64 addr = prop->sram_user_base_address;
4449 u32 size = hdev->pldm ? 0x10000 :
4450 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4451 u64 val = 0x7777777777777777ull;
4452 int rc;
4453
4454 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4455 if (rc) {
4456 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4457 return rc;
4458 }
4459
4460 gaudi_mmu_prepare(hdev, asid);
4461
4462 gaudi_restore_user_registers(hdev);
4463
4464 return 0;
4465}
4466
4467static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4468{
4469 struct asic_fixed_properties *prop = &hdev->asic_prop;
4470 struct gaudi_device *gaudi = hdev->asic_specific;
4471 u64 addr = prop->mmu_pgt_addr;
4472 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4473
4474 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4475 return 0;
4476
4477 return gaudi_memset_device_memory(hdev, addr, size, 0);
4478}
4479
4480static void gaudi_restore_phase_topology(struct hl_device *hdev)
4481{
4482
4483}
4484
4485static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4486{
4487 struct asic_fixed_properties *prop = &hdev->asic_prop;
4488 struct gaudi_device *gaudi = hdev->asic_specific;
4489 u64 hbm_bar_addr;
4490 int rc = 0;
4491
4492 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4493 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4494 dev_err_ratelimited(hdev->dev,
4495 "Can't read register - clock gating is enabled!\n");
4496 rc = -EFAULT;
4497 } else {
4498 *val = RREG32(addr - CFG_BASE);
4499 }
4500 } else if ((addr >= SRAM_BASE_ADDR) &&
4501 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4502 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4503 (addr - SRAM_BASE_ADDR));
4504 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4505 u64 bar_base_addr = DRAM_PHYS_BASE +
4506 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4507
4508 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4509 if (hbm_bar_addr != U64_MAX) {
4510 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4511 (addr - bar_base_addr));
4512
4513 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4514 hbm_bar_addr);
4515 }
4516 if (hbm_bar_addr == U64_MAX)
4517 rc = -EIO;
4518 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4519 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4520 } else {
4521 rc = -EFAULT;
4522 }
4523
4524 return rc;
4525}
4526
4527static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4528{
4529 struct asic_fixed_properties *prop = &hdev->asic_prop;
4530 struct gaudi_device *gaudi = hdev->asic_specific;
4531 u64 hbm_bar_addr;
4532 int rc = 0;
4533
4534 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4535 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4536 dev_err_ratelimited(hdev->dev,
4537 "Can't write register - clock gating is enabled!\n");
4538 rc = -EFAULT;
4539 } else {
4540 WREG32(addr - CFG_BASE, val);
4541 }
4542 } else if ((addr >= SRAM_BASE_ADDR) &&
4543 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4544 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4545 (addr - SRAM_BASE_ADDR));
4546 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4547 u64 bar_base_addr = DRAM_PHYS_BASE +
4548 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4549
4550 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4551 if (hbm_bar_addr != U64_MAX) {
4552 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4553 (addr - bar_base_addr));
4554
4555 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4556 hbm_bar_addr);
4557 }
4558 if (hbm_bar_addr == U64_MAX)
4559 rc = -EIO;
4560 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4561 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4562 } else {
4563 rc = -EFAULT;
4564 }
4565
4566 return rc;
4567}
4568
4569static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4570{
4571 struct asic_fixed_properties *prop = &hdev->asic_prop;
4572 struct gaudi_device *gaudi = hdev->asic_specific;
4573 u64 hbm_bar_addr;
4574 int rc = 0;
4575
4576 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4577 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4578 dev_err_ratelimited(hdev->dev,
4579 "Can't read register - clock gating is enabled!\n");
4580 rc = -EFAULT;
4581 } else {
4582 u32 val_l = RREG32(addr - CFG_BASE);
4583 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4584
4585 *val = (((u64) val_h) << 32) | val_l;
4586 }
4587 } else if ((addr >= SRAM_BASE_ADDR) &&
4588 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4589 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4590 (addr - SRAM_BASE_ADDR));
4591 } else if (addr <=
4592 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4593 u64 bar_base_addr = DRAM_PHYS_BASE +
4594 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4595
4596 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4597 if (hbm_bar_addr != U64_MAX) {
4598 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4599 (addr - bar_base_addr));
4600
4601 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4602 hbm_bar_addr);
4603 }
4604 if (hbm_bar_addr == U64_MAX)
4605 rc = -EIO;
4606 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4607 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4608 } else {
4609 rc = -EFAULT;
4610 }
4611
4612 return rc;
4613}
4614
4615static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4616{
4617 struct asic_fixed_properties *prop = &hdev->asic_prop;
4618 struct gaudi_device *gaudi = hdev->asic_specific;
4619 u64 hbm_bar_addr;
4620 int rc = 0;
4621
4622 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4623 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4624 dev_err_ratelimited(hdev->dev,
4625 "Can't write register - clock gating is enabled!\n");
4626 rc = -EFAULT;
4627 } else {
4628 WREG32(addr - CFG_BASE, lower_32_bits(val));
4629 WREG32(addr + sizeof(u32) - CFG_BASE,
4630 upper_32_bits(val));
4631 }
4632 } else if ((addr >= SRAM_BASE_ADDR) &&
4633 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4634 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4635 (addr - SRAM_BASE_ADDR));
4636 } else if (addr <=
4637 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4638 u64 bar_base_addr = DRAM_PHYS_BASE +
4639 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4640
4641 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4642 if (hbm_bar_addr != U64_MAX) {
4643 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4644 (addr - bar_base_addr));
4645
4646 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4647 hbm_bar_addr);
4648 }
4649 if (hbm_bar_addr == U64_MAX)
4650 rc = -EIO;
4651 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4652 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4653 } else {
4654 rc = -EFAULT;
4655 }
4656
4657 return rc;
4658}
4659
4660static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4661{
4662 struct gaudi_device *gaudi = hdev->asic_specific;
4663
4664 if (hdev->hard_reset_pending)
4665 return U64_MAX;
4666
4667 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4668 (addr - gaudi->hbm_bar_cur_addr));
4669}
4670
4671static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4672{
4673 struct gaudi_device *gaudi = hdev->asic_specific;
4674
4675 if (hdev->hard_reset_pending)
4676 return;
4677
4678 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4679 (addr - gaudi->hbm_bar_cur_addr));
4680}
4681
4682static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4683{
4684 /* mask to zero the MMBP and ASID bits */
4685 WREG32_AND(reg, ~0x7FF);
4686 WREG32_OR(reg, asid);
4687}
4688
4689static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4690{
4691 struct gaudi_device *gaudi = hdev->asic_specific;
4692
4693 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4694 return;
4695
4696 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4697 WARN(1, "asid %u is too big\n", asid);
4698 return;
4699 }
4700
4701 mutex_lock(&gaudi->clk_gate_mutex);
4702
4703 hdev->asic_funcs->disable_clock_gating(hdev);
4704
4705 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4706 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4707 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4708 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4709 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4710
4711 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4712 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4713 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4714 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4715 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4716
4717 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4718 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4719 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4720 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4721 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4722
4723 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4724 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4725 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4726 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4727 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4728
4729 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4730 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4731 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4732 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4733 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4734
4735 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4736 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4737 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4738 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4739 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4740
4741 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4742 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4743 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4744 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4745 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4746
4747 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4748 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4749 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4750 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4751 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4752
4753 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4754 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4755 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4756 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4757 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4758 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4759 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4761
4762 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4763 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4764 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4765 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4766 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4767 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4768 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4769
4770 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4771 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4772 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4773 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4774 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4775 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4776 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4777
4778 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4779 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4780 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4781 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4782 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4785
4786 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4787 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4788 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4790 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4791 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4792 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4793
4794 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4797 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4798 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4799 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4800 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4801
4802 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4803 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4804 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4805 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4806 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4807 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4809
4810 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4811 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4812 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4814 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4815 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4816 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4817
4818 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4822 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4823 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4824 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4825
4826 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4828 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4829 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4830 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4831 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4832 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4833 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4836
4837 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4838 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4839 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4840 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4841 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4845 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4846 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4847 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4848 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4849
4850 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4852
4853 hdev->asic_funcs->enable_clock_gating(hdev);
4854
4855 mutex_unlock(&gaudi->clk_gate_mutex);
4856}
4857
4858static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4859 struct hl_cs_job *job)
4860{
4861 struct packet_msg_prot *fence_pkt;
4862 u32 *fence_ptr;
4863 dma_addr_t fence_dma_addr;
4864 struct hl_cb *cb;
4865 u32 tmp, timeout, dma_offset;
4866 int rc;
4867
4868 if (hdev->pldm)
4869 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4870 else
4871 timeout = HL_DEVICE_TIMEOUT_USEC;
4872
4873 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4874 dev_err_ratelimited(hdev->dev,
4875 "Can't send driver job on QMAN0 because the device is not idle\n");
4876 return -EBUSY;
4877 }
4878
4879 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4880 &fence_dma_addr);
4881 if (!fence_ptr) {
4882 dev_err(hdev->dev,
4883 "Failed to allocate fence memory for QMAN0\n");
4884 return -ENOMEM;
4885 }
4886
4887 cb = job->patched_cb;
4888
4889 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4890 job->job_cb_size - sizeof(struct packet_msg_prot));
4891
4892 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4893 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
4894 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4895 fence_pkt->ctl = cpu_to_le32(tmp);
4896 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4897 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4898
4899 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4900
4901 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4902
4903 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4904 job->job_cb_size, cb->bus_address);
4905 if (rc) {
4906 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4907 goto free_fence_ptr;
4908 }
4909
4910 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4911 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4912 timeout, true);
4913
4914 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4915
4916 if (rc == -ETIMEDOUT) {
4917 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4918 goto free_fence_ptr;
4919 }
4920
4921free_fence_ptr:
4922 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4923 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4924
4925 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4926 fence_dma_addr);
4927 return rc;
4928}
4929
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004930static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4931{
Ofir Bittonebd8d122020-05-10 13:41:28 +03004932 if (event_type >= GAUDI_EVENT_SIZE)
4933 goto event_not_supported;
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004934
Ofir Bittonebd8d122020-05-10 13:41:28 +03004935 if (!gaudi_irq_map_table[event_type].valid)
4936 goto event_not_supported;
4937
4938 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4939
4940 return;
4941
4942event_not_supported:
4943 snprintf(desc, size, "N/A");
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03004944}
4945
4946static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
4947 u32 x_y, bool is_write)
4948{
4949 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
4950
4951 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
4952 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
4953
4954 switch (x_y) {
4955 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4956 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4957 dma_id[0] = 0;
4958 dma_id[1] = 2;
4959 break;
4960 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4961 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4962 dma_id[0] = 1;
4963 dma_id[1] = 3;
4964 break;
4965 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4966 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4967 dma_id[0] = 4;
4968 dma_id[1] = 6;
4969 break;
4970 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
4971 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
4972 dma_id[0] = 5;
4973 dma_id[1] = 7;
4974 break;
4975 default:
4976 goto unknown_initiator;
4977 }
4978
4979 for (i = 0 ; i < 2 ; i++) {
4980 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
4981 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
4982 }
4983
4984 switch (x_y) {
4985 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4986 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4987 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4988 return "DMA0";
4989 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4990 return "DMA2";
4991 else
4992 return "DMA0 or DMA2";
4993 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4994 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4995 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
4996 return "DMA1";
4997 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
4998 return "DMA3";
4999 else
5000 return "DMA1 or DMA3";
5001 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5002 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5003 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5004 return "DMA4";
5005 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5006 return "DMA6";
5007 else
5008 return "DMA4 or DMA6";
5009 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5010 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5011 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5012 return "DMA5";
5013 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5014 return "DMA7";
5015 else
5016 return "DMA5 or DMA7";
5017 }
5018
5019unknown_initiator:
5020 return "unknown initiator";
5021}
5022
5023static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5024 bool is_write)
5025{
5026 u32 val, x_y, axi_id;
5027
5028 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5029 RREG32(mmMMU_UP_RAZWI_READ_ID);
5030 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5031 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5032 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5033 RAZWI_INITIATOR_AXI_ID_SHIFT);
5034
5035 switch (x_y) {
5036 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5037 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5038 return "TPC0";
5039 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5040 return "NIC0";
5041 break;
5042 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5043 return "TPC1";
5044 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5045 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5046 return "MME0";
5047 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5048 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5049 return "MME1";
5050 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5051 return "TPC2";
5052 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5053 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5054 return "TPC3";
5055 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5056 return "PCI";
5057 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5058 return "CPU";
5059 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5060 return "PSOC";
5061 break;
5062 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5063 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5064 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5065 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5066 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5067 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5068 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5069 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5070 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5071 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5072 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5073 return "TPC4";
5074 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5075 return "NIC1";
5076 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5077 return "NIC2";
5078 break;
5079 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5080 return "TPC5";
5081 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5082 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5083 return "MME2";
5084 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5085 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5086 return "MME3";
5087 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5088 return "TPC6";
5089 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5090 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5091 return "TPC7";
5092 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5093 return "NIC4";
5094 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5095 return "NIC5";
5096 break;
5097 default:
5098 break;
5099 }
5100
5101 dev_err(hdev->dev,
5102 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5103 val,
5104 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5105 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5106 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5107 RAZWI_INITIATOR_AXI_ID_MASK);
5108
5109 return "unknown initiator";
5110}
5111
5112static void gaudi_print_razwi_info(struct hl_device *hdev)
5113{
5114 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5115 dev_err_ratelimited(hdev->dev,
5116 "RAZWI event caused by illegal write of %s\n",
5117 gaudi_get_razwi_initiator_name(hdev, true));
5118 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5119 }
5120
5121 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5122 dev_err_ratelimited(hdev->dev,
5123 "RAZWI event caused by illegal read of %s\n",
5124 gaudi_get_razwi_initiator_name(hdev, false));
5125 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5126 }
5127}
5128
5129static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5130{
5131 struct gaudi_device *gaudi = hdev->asic_specific;
5132 u64 addr;
5133 u32 val;
5134
5135 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5136 return;
5137
5138 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5139 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5140 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5141 addr <<= 32;
5142 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5143
5144 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5145 addr);
5146
5147 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5148 }
5149
5150 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5151 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5152 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5153 addr <<= 32;
5154 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5155
5156 dev_err_ratelimited(hdev->dev,
5157 "MMU access error on va 0x%llx\n", addr);
5158
5159 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5160 }
5161}
5162
5163/*
5164 * +-------------------+------------------------------------------------------+
5165 * | Configuration Reg | Description |
5166 * | Address | |
5167 * +-------------------+------------------------------------------------------+
5168 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5169 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5170 * | |0xF34 memory wrappers 63:32 |
5171 * | |0xF38 memory wrappers 95:64 |
5172 * | |0xF3C memory wrappers 127:96 |
5173 * +-------------------+------------------------------------------------------+
5174 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5175 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5176 * | |0xF44 memory wrappers 63:32 |
5177 * | |0xF48 memory wrappers 95:64 |
5178 * | |0xF4C memory wrappers 127:96 |
5179 * +-------------------+------------------------------------------------------+
5180 */
5181static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
5182 const char *block_name,
5183 u64 block_address, int num_memories,
5184 bool derr, bool disable_clock_gating)
5185{
5186 struct gaudi_device *gaudi = hdev->asic_specific;
5187 int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0);
5188
5189 if (block_address >= CFG_BASE)
5190 block_address -= CFG_BASE;
5191
5192 if (derr)
5193 block_address += GAUDI_ECC_DERR0_OFFSET;
5194 else
5195 block_address += GAUDI_ECC_SERR0_OFFSET;
5196
5197 if (disable_clock_gating) {
5198 mutex_lock(&gaudi->clk_gate_mutex);
5199 hdev->asic_funcs->disable_clock_gating(hdev);
5200 }
5201
5202 switch (num_mem_regs) {
5203 case 1:
5204 dev_err(hdev->dev,
5205 "%s ECC indication: 0x%08x\n",
5206 block_name, RREG32(block_address));
5207 break;
5208 case 2:
5209 dev_err(hdev->dev,
5210 "%s ECC indication: 0x%08x 0x%08x\n",
5211 block_name,
5212 RREG32(block_address), RREG32(block_address + 4));
5213 break;
5214 case 3:
5215 dev_err(hdev->dev,
5216 "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
5217 block_name,
5218 RREG32(block_address), RREG32(block_address + 4),
5219 RREG32(block_address + 8));
5220 break;
5221 case 4:
5222 dev_err(hdev->dev,
5223 "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
5224 block_name,
5225 RREG32(block_address), RREG32(block_address + 4),
5226 RREG32(block_address + 8), RREG32(block_address + 0xc));
5227 break;
5228 default:
5229 break;
5230
5231 }
5232
5233 if (disable_clock_gating) {
5234 hdev->asic_funcs->enable_clock_gating(hdev);
5235 mutex_unlock(&gaudi->clk_gate_mutex);
5236 }
5237}
5238
5239static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5240 const char *qm_name,
5241 u64 glbl_sts_addr,
5242 u64 arb_err_addr)
5243{
5244 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5245 char reg_desc[32];
5246
5247 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5248 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5249 glbl_sts_clr_val = 0;
5250 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5251
5252 if (!glbl_sts_val)
5253 continue;
5254
5255 if (i == QMAN_STREAMS)
5256 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5257 else
5258 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5259
5260 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5261 if (glbl_sts_val & BIT(j)) {
5262 dev_err_ratelimited(hdev->dev,
5263 "%s %s. err cause: %s\n",
5264 qm_name, reg_desc,
5265 gaudi_qman_error_cause[j]);
5266 glbl_sts_clr_val |= BIT(j);
5267 }
5268 }
5269
5270 /* Write 1 clear errors */
5271 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5272 }
5273
5274 arb_err_val = RREG32(arb_err_addr);
5275
5276 if (!arb_err_val)
5277 return;
5278
5279 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5280 if (arb_err_val & BIT(j)) {
5281 dev_err_ratelimited(hdev->dev,
5282 "%s ARB_ERR. err cause: %s\n",
5283 qm_name,
5284 gaudi_qman_arb_error_cause[j]);
5285 }
5286 }
5287}
5288
5289static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type)
5290{
5291 u64 block_address;
5292 u8 index;
5293 int num_memories;
5294 char desc[32];
5295 bool derr;
5296 bool disable_clock_gating;
5297
5298 switch (event_type) {
5299 case GAUDI_EVENT_PCIE_CORE_SERR:
5300 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5301 block_address = mmPCIE_CORE_BASE;
5302 num_memories = 51;
5303 derr = false;
5304 disable_clock_gating = false;
5305 break;
5306 case GAUDI_EVENT_PCIE_CORE_DERR:
5307 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5308 block_address = mmPCIE_CORE_BASE;
5309 num_memories = 51;
5310 derr = true;
5311 disable_clock_gating = false;
5312 break;
5313 case GAUDI_EVENT_PCIE_IF_SERR:
5314 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5315 block_address = mmPCIE_WRAP_BASE;
5316 num_memories = 11;
5317 derr = false;
5318 disable_clock_gating = false;
5319 break;
5320 case GAUDI_EVENT_PCIE_IF_DERR:
5321 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5322 block_address = mmPCIE_WRAP_BASE;
5323 num_memories = 11;
5324 derr = true;
5325 disable_clock_gating = false;
5326 break;
5327 case GAUDI_EVENT_PCIE_PHY_SERR:
5328 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5329 block_address = mmPCIE_PHY_BASE;
5330 num_memories = 4;
5331 derr = false;
5332 disable_clock_gating = false;
5333 break;
5334 case GAUDI_EVENT_PCIE_PHY_DERR:
5335 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5336 block_address = mmPCIE_PHY_BASE;
5337 num_memories = 4;
5338 derr = true;
5339 disable_clock_gating = false;
5340 break;
5341 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5342 index = event_type - GAUDI_EVENT_TPC0_SERR;
5343 block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5344 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5345 num_memories = 90;
5346 derr = false;
5347 disable_clock_gating = true;
5348 break;
5349 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5350 index = event_type - GAUDI_EVENT_TPC0_DERR;
5351 block_address =
5352 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5353 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5354 num_memories = 90;
5355 derr = true;
5356 disable_clock_gating = true;
5357 break;
5358 case GAUDI_EVENT_MME0_ACC_SERR:
5359 case GAUDI_EVENT_MME1_ACC_SERR:
5360 case GAUDI_EVENT_MME2_ACC_SERR:
5361 case GAUDI_EVENT_MME3_ACC_SERR:
5362 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5363 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5364 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5365 num_memories = 128;
5366 derr = false;
5367 disable_clock_gating = true;
5368 break;
5369 case GAUDI_EVENT_MME0_ACC_DERR:
5370 case GAUDI_EVENT_MME1_ACC_DERR:
5371 case GAUDI_EVENT_MME2_ACC_DERR:
5372 case GAUDI_EVENT_MME3_ACC_DERR:
5373 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5374 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5375 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5376 num_memories = 128;
5377 derr = true;
5378 disable_clock_gating = true;
5379 break;
5380 case GAUDI_EVENT_MME0_SBAB_SERR:
5381 case GAUDI_EVENT_MME1_SBAB_SERR:
5382 case GAUDI_EVENT_MME2_SBAB_SERR:
5383 case GAUDI_EVENT_MME3_SBAB_SERR:
5384 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5385 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5386 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5387 num_memories = 33;
5388 derr = false;
5389 disable_clock_gating = true;
5390 break;
5391 case GAUDI_EVENT_MME0_SBAB_DERR:
5392 case GAUDI_EVENT_MME1_SBAB_DERR:
5393 case GAUDI_EVENT_MME2_SBAB_DERR:
5394 case GAUDI_EVENT_MME3_SBAB_DERR:
5395 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5396 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5397 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5398 num_memories = 33;
5399 derr = true;
5400 disable_clock_gating = true;
5401 break;
5402 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5403 index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
5404 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5405 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5406 num_memories = 16;
5407 derr = false;
5408 disable_clock_gating = false;
5409 break;
5410 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5411 index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
5412 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5413 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5414 num_memories = 16;
5415 derr = true;
5416 disable_clock_gating = false;
5417 break;
5418 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5419 block_address = mmCPU_IF_BASE;
5420 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5421 num_memories = 4;
5422 derr = false;
5423 disable_clock_gating = false;
5424 break;
5425 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5426 block_address = mmCPU_IF_BASE;
5427 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5428 num_memories = 4;
5429 derr = true;
5430 disable_clock_gating = false;
5431 break;
5432 case GAUDI_EVENT_PSOC_MEM_SERR:
5433 block_address = mmPSOC_GLOBAL_CONF_BASE;
5434 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5435 num_memories = 4;
5436 derr = false;
5437 disable_clock_gating = false;
5438 break;
5439 case GAUDI_EVENT_PSOC_MEM_DERR:
5440 block_address = mmPSOC_GLOBAL_CONF_BASE;
5441 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5442 num_memories = 4;
5443 derr = true;
5444 disable_clock_gating = false;
5445 break;
5446 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5447 block_address = mmPSOC_CS_TRACE_BASE;
5448 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5449 num_memories = 2;
5450 derr = false;
5451 disable_clock_gating = false;
5452 break;
5453 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5454 block_address = mmPSOC_CS_TRACE_BASE;
5455 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5456 num_memories = 2;
5457 derr = true;
5458 disable_clock_gating = false;
5459 break;
5460 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5461 index = event_type - GAUDI_EVENT_SRAM0_SERR;
5462 block_address =
5463 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5464 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5465 num_memories = 2;
5466 derr = false;
5467 disable_clock_gating = false;
5468 break;
5469 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5470 index = event_type - GAUDI_EVENT_SRAM0_DERR;
5471 block_address =
5472 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5473 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5474 num_memories = 2;
5475 derr = true;
5476 disable_clock_gating = false;
5477 break;
5478 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5479 index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
5480 block_address = mmDMA_IF_W_S_BASE +
5481 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5482 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5483 num_memories = 60;
5484 derr = false;
5485 disable_clock_gating = false;
5486 break;
5487 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5488 index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
5489 block_address = mmDMA_IF_W_S_BASE +
5490 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5491 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5492 derr = true;
5493 num_memories = 60;
5494 disable_clock_gating = false;
5495 break;
5496 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5497 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5498 /* HBM Registers are at different offsets */
5499 block_address = mmHBM0_BASE + 0x8000 +
5500 index * (mmHBM1_BASE - mmHBM0_BASE);
5501 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5502 derr = false;
5503 num_memories = 64;
5504 disable_clock_gating = false;
5505 break;
5506 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5507 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5508 /* HBM Registers are at different offsets */
5509 block_address = mmHBM0_BASE + 0x8000 +
5510 index * (mmHBM1_BASE - mmHBM0_BASE);
5511 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5512 derr = true;
5513 num_memories = 64;
5514 disable_clock_gating = false;
5515 break;
5516 default:
5517 return;
5518 }
5519
5520 gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories,
5521 derr, disable_clock_gating);
5522}
5523
5524static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5525{
5526 u64 glbl_sts_addr, arb_err_addr;
5527 u8 index;
5528 char desc[32];
5529
5530 switch (event_type) {
5531 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5532 index = event_type - GAUDI_EVENT_TPC0_QM;
5533 glbl_sts_addr =
5534 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5535 arb_err_addr =
5536 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5537 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5538 break;
5539 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5540 index = event_type - GAUDI_EVENT_MME0_QM;
5541 glbl_sts_addr =
5542 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5543 arb_err_addr =
5544 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5545 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5546 break;
5547 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5548 index = event_type - GAUDI_EVENT_DMA0_QM;
5549 glbl_sts_addr =
5550 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5551 arb_err_addr =
5552 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5553 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5554 break;
5555 default:
5556 return;
5557 }
5558
5559 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5560}
5561
5562static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5563 bool razwi)
5564{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005565 char desc[64] = "";
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005566
5567 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5568 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5569 event_type, desc);
5570
5571 gaudi_print_ecc_info(hdev, event_type);
5572
5573 if (razwi) {
5574 gaudi_print_razwi_info(hdev);
5575 gaudi_print_mmu_error_info(hdev);
5576 }
5577}
5578
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005579static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5580{
Ofir Bittonebd8d122020-05-10 13:41:28 +03005581 struct gaudi_device *gaudi = hdev->asic_specific;
5582
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005583 /* Unmask all IRQs since some could have been received
5584 * during the soft reset
5585 */
Ofir Bittonebd8d122020-05-10 13:41:28 +03005586 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005587}
5588
5589static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5590{
5591 int ch, err = 0;
5592 u32 base, val, val2;
5593
5594 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5595 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5596 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5597 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5598 if (val) {
5599 err = 1;
5600 dev_err(hdev->dev,
5601 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5602 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5603 (val >> 2) & 0x1, (val >> 3) & 0x1,
5604 (val >> 4) & 0x1);
5605
5606 val2 = RREG32(base + ch * 0x1000 + 0x060);
5607 dev_err(hdev->dev,
5608 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5609 device, ch * 2,
5610 RREG32(base + ch * 0x1000 + 0x064),
5611 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5612 (val2 & 0xFF0000) >> 16,
5613 (val2 & 0xFF000000) >> 24);
5614 }
5615
5616 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5617 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5618 if (val) {
5619 err = 1;
5620 dev_err(hdev->dev,
5621 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5622 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5623 (val >> 2) & 0x1, (val >> 3) & 0x1,
5624 (val >> 4) & 0x1);
5625
5626 val2 = RREG32(base + ch * 0x1000 + 0x070);
5627 dev_err(hdev->dev,
5628 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5629 device, ch * 2 + 1,
5630 RREG32(base + ch * 0x1000 + 0x074),
5631 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5632 (val2 & 0xFF0000) >> 16,
5633 (val2 & 0xFF000000) >> 24);
5634 }
5635
5636 /* Clear interrupts */
5637 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5638 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5639 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5640 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5641 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5642 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5643 }
5644
5645 val = RREG32(base + 0x8F30);
5646 val2 = RREG32(base + 0x8F34);
5647 if (val | val2) {
5648 err = 1;
5649 dev_err(hdev->dev,
5650 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5651 device, val, val2);
5652 }
5653 val = RREG32(base + 0x8F40);
5654 val2 = RREG32(base + 0x8F44);
5655 if (val | val2) {
5656 err = 1;
5657 dev_err(hdev->dev,
5658 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5659 device, val, val2);
5660 }
5661
5662 return err;
5663}
5664
5665static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5666{
5667 switch (hbm_event_type) {
5668 case GAUDI_EVENT_HBM0_SPI_0:
5669 case GAUDI_EVENT_HBM0_SPI_1:
5670 return 0;
5671 case GAUDI_EVENT_HBM1_SPI_0:
5672 case GAUDI_EVENT_HBM1_SPI_1:
5673 return 1;
5674 case GAUDI_EVENT_HBM2_SPI_0:
5675 case GAUDI_EVENT_HBM2_SPI_1:
5676 return 2;
5677 case GAUDI_EVENT_HBM3_SPI_0:
5678 case GAUDI_EVENT_HBM3_SPI_1:
5679 return 3;
5680 default:
5681 break;
5682 }
5683
5684 /* Should never happen */
5685 return 0;
5686}
5687
5688static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5689 char *interrupt_name)
5690{
5691 struct gaudi_device *gaudi = hdev->asic_specific;
5692 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5693 bool soft_reset_required = false;
5694
5695 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5696 * gating, and thus cannot be done in ArmCP and should be done instead
5697 * by the driver.
5698 */
5699
5700 mutex_lock(&gaudi->clk_gate_mutex);
5701
5702 hdev->asic_funcs->disable_clock_gating(hdev);
5703
5704 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5705 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5706
5707 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5708 if (tpc_interrupts_cause & BIT(i)) {
5709 dev_err_ratelimited(hdev->dev,
5710 "TPC%d_%s interrupt cause: %s\n",
5711 tpc_id, interrupt_name,
5712 gaudi_tpc_interrupts_cause[i]);
5713 /* If this is QM error, we need to soft-reset */
5714 if (i == 15)
5715 soft_reset_required = true;
5716 }
5717
5718 /* Clear interrupts */
5719 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5720
5721 hdev->asic_funcs->enable_clock_gating(hdev);
5722
5723 mutex_unlock(&gaudi->clk_gate_mutex);
5724
5725 return soft_reset_required;
5726}
5727
5728static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5729{
5730 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5731}
5732
5733static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5734{
5735 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5736}
5737
5738static void gaudi_print_clk_change_info(struct hl_device *hdev,
5739 u16 event_type)
5740{
5741 switch (event_type) {
5742 case GAUDI_EVENT_FIX_POWER_ENV_S:
5743 dev_info_ratelimited(hdev->dev,
5744 "Clock throttling due to power consumption\n");
5745 break;
5746
5747 case GAUDI_EVENT_FIX_POWER_ENV_E:
5748 dev_info_ratelimited(hdev->dev,
5749 "Power envelop is safe, back to optimal clock\n");
5750 break;
5751
5752 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5753 dev_info_ratelimited(hdev->dev,
5754 "Clock throttling due to overheating\n");
5755 break;
5756
5757 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5758 dev_info_ratelimited(hdev->dev,
5759 "Thermal envelop is safe, back to optimal clock\n");
5760 break;
5761
5762 default:
5763 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5764 event_type);
5765 break;
5766 }
5767}
5768
5769static void gaudi_handle_eqe(struct hl_device *hdev,
5770 struct hl_eq_entry *eq_entry)
5771{
5772 struct gaudi_device *gaudi = hdev->asic_specific;
5773 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5774 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5775 >> EQ_CTL_EVENT_TYPE_SHIFT);
5776 u8 cause;
5777 bool soft_reset_required;
5778
5779 gaudi->events_stat[event_type]++;
5780 gaudi->events_stat_aggregate[event_type]++;
5781
5782 switch (event_type) {
5783 case GAUDI_EVENT_PCIE_CORE_DERR:
5784 case GAUDI_EVENT_PCIE_IF_DERR:
5785 case GAUDI_EVENT_PCIE_PHY_DERR:
5786 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5787 case GAUDI_EVENT_MME0_ACC_DERR:
5788 case GAUDI_EVENT_MME0_SBAB_DERR:
5789 case GAUDI_EVENT_MME1_ACC_DERR:
5790 case GAUDI_EVENT_MME1_SBAB_DERR:
5791 case GAUDI_EVENT_MME2_ACC_DERR:
5792 case GAUDI_EVENT_MME2_SBAB_DERR:
5793 case GAUDI_EVENT_MME3_ACC_DERR:
5794 case GAUDI_EVENT_MME3_SBAB_DERR:
5795 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5796 fallthrough;
5797 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5798 case GAUDI_EVENT_PSOC_MEM_DERR:
5799 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5800 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5801 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5802 fallthrough;
5803 case GAUDI_EVENT_GIC500:
5804 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5805 case GAUDI_EVENT_MMU_DERR:
5806 case GAUDI_EVENT_AXI_ECC:
5807 case GAUDI_EVENT_L2_RAM_ECC:
5808 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5809 gaudi_print_irq_info(hdev, event_type, false);
5810 if (hdev->hard_reset_on_fw_events)
5811 hl_device_reset(hdev, true, false);
5812 break;
5813
5814 case GAUDI_EVENT_HBM0_SPI_0:
5815 case GAUDI_EVENT_HBM1_SPI_0:
5816 case GAUDI_EVENT_HBM2_SPI_0:
5817 case GAUDI_EVENT_HBM3_SPI_0:
5818 gaudi_print_irq_info(hdev, event_type, false);
5819 gaudi_hbm_read_interrupts(hdev,
5820 gaudi_hbm_event_to_dev(event_type));
5821 if (hdev->hard_reset_on_fw_events)
5822 hl_device_reset(hdev, true, false);
5823 break;
5824
5825 case GAUDI_EVENT_HBM0_SPI_1:
5826 case GAUDI_EVENT_HBM1_SPI_1:
5827 case GAUDI_EVENT_HBM2_SPI_1:
5828 case GAUDI_EVENT_HBM3_SPI_1:
5829 gaudi_print_irq_info(hdev, event_type, false);
5830 gaudi_hbm_read_interrupts(hdev,
5831 gaudi_hbm_event_to_dev(event_type));
5832 break;
5833
5834 case GAUDI_EVENT_TPC0_DEC:
5835 case GAUDI_EVENT_TPC1_DEC:
5836 case GAUDI_EVENT_TPC2_DEC:
5837 case GAUDI_EVENT_TPC3_DEC:
5838 case GAUDI_EVENT_TPC4_DEC:
5839 case GAUDI_EVENT_TPC5_DEC:
5840 case GAUDI_EVENT_TPC6_DEC:
5841 case GAUDI_EVENT_TPC7_DEC:
5842 gaudi_print_irq_info(hdev, event_type, true);
5843 soft_reset_required = gaudi_tpc_read_interrupts(hdev,
5844 tpc_dec_event_to_tpc_id(event_type),
5845 "AXI_SLV_DEC_Error");
Omer Shpigelmand7985072020-05-17 23:01:22 +03005846 if (soft_reset_required) {
5847 dev_err_ratelimited(hdev->dev,
5848 "soft reset required due to %s\n",
5849 gaudi_irq_map_table[event_type].name);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005850 hl_device_reset(hdev, false, false);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005851 }
Ofir Bittonebd8d122020-05-10 13:41:28 +03005852 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005853 break;
5854
5855 case GAUDI_EVENT_TPC0_KRN_ERR:
5856 case GAUDI_EVENT_TPC1_KRN_ERR:
5857 case GAUDI_EVENT_TPC2_KRN_ERR:
5858 case GAUDI_EVENT_TPC3_KRN_ERR:
5859 case GAUDI_EVENT_TPC4_KRN_ERR:
5860 case GAUDI_EVENT_TPC5_KRN_ERR:
5861 case GAUDI_EVENT_TPC6_KRN_ERR:
5862 case GAUDI_EVENT_TPC7_KRN_ERR:
5863 gaudi_print_irq_info(hdev, event_type, true);
5864 soft_reset_required = gaudi_tpc_read_interrupts(hdev,
5865 tpc_krn_event_to_tpc_id(event_type),
5866 "KRN_ERR");
Omer Shpigelmand7985072020-05-17 23:01:22 +03005867 if (soft_reset_required) {
5868 dev_err_ratelimited(hdev->dev,
5869 "soft reset required due to %s\n",
5870 gaudi_irq_map_table[event_type].name);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005871 hl_device_reset(hdev, false, false);
Omer Shpigelmand7985072020-05-17 23:01:22 +03005872 }
Ofir Bittonebd8d122020-05-10 13:41:28 +03005873 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005874 break;
5875
5876 case GAUDI_EVENT_PCIE_CORE_SERR:
5877 case GAUDI_EVENT_PCIE_IF_SERR:
5878 case GAUDI_EVENT_PCIE_PHY_SERR:
5879 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5880 case GAUDI_EVENT_MME0_ACC_SERR:
5881 case GAUDI_EVENT_MME0_SBAB_SERR:
5882 case GAUDI_EVENT_MME1_ACC_SERR:
5883 case GAUDI_EVENT_MME1_SBAB_SERR:
5884 case GAUDI_EVENT_MME2_ACC_SERR:
5885 case GAUDI_EVENT_MME2_SBAB_SERR:
5886 case GAUDI_EVENT_MME3_ACC_SERR:
5887 case GAUDI_EVENT_MME3_SBAB_SERR:
5888 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5889 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5890 case GAUDI_EVENT_PSOC_MEM_SERR:
5891 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5892 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5893 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5894 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5895 fallthrough;
5896 case GAUDI_EVENT_MMU_SERR:
5897 case GAUDI_EVENT_PCIE_DEC:
5898 case GAUDI_EVENT_MME0_WBC_RSP:
5899 case GAUDI_EVENT_MME0_SBAB0_RSP:
5900 case GAUDI_EVENT_MME1_WBC_RSP:
5901 case GAUDI_EVENT_MME1_SBAB0_RSP:
5902 case GAUDI_EVENT_MME2_WBC_RSP:
5903 case GAUDI_EVENT_MME2_SBAB0_RSP:
5904 case GAUDI_EVENT_MME3_WBC_RSP:
5905 case GAUDI_EVENT_MME3_SBAB0_RSP:
5906 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5907 case GAUDI_EVENT_PSOC_AXI_DEC:
5908 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5909 case GAUDI_EVENT_MMU_PAGE_FAULT:
5910 case GAUDI_EVENT_MMU_WR_PERM:
5911 case GAUDI_EVENT_RAZWI_OR_ADC:
5912 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5913 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5914 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5915 fallthrough;
5916 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5917 gaudi_print_irq_info(hdev, event_type, true);
5918 gaudi_handle_qman_err(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005919 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005920 break;
5921
5922 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5923 gaudi_print_irq_info(hdev, event_type, true);
5924 hl_device_reset(hdev, false, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005925 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005926 break;
5927
5928 case GAUDI_EVENT_TPC0_BMON_SPMU:
5929 case GAUDI_EVENT_TPC1_BMON_SPMU:
5930 case GAUDI_EVENT_TPC2_BMON_SPMU:
5931 case GAUDI_EVENT_TPC3_BMON_SPMU:
5932 case GAUDI_EVENT_TPC4_BMON_SPMU:
5933 case GAUDI_EVENT_TPC5_BMON_SPMU:
5934 case GAUDI_EVENT_TPC6_BMON_SPMU:
5935 case GAUDI_EVENT_TPC7_BMON_SPMU:
5936 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5937 gaudi_print_irq_info(hdev, event_type, false);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005938 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005939 break;
5940
5941 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5942 gaudi_print_clk_change_info(hdev, event_type);
Ofir Bittonebd8d122020-05-10 13:41:28 +03005943 hl_fw_unmask_irq(hdev, event_type);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005944 break;
5945
5946 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5947 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5948 dev_err(hdev->dev,
5949 "Received high temp H/W interrupt %d (cause %d)\n",
5950 event_type, cause);
5951 break;
5952
5953 default:
5954 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5955 event_type);
5956 break;
5957 }
5958}
5959
5960static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5961 u32 *size)
5962{
5963 struct gaudi_device *gaudi = hdev->asic_specific;
5964
5965 if (aggregate) {
5966 *size = (u32) sizeof(gaudi->events_stat_aggregate);
5967 return gaudi->events_stat_aggregate;
5968 }
5969
5970 *size = (u32) sizeof(gaudi->events_stat);
5971 return gaudi->events_stat;
5972}
5973
5974static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
5975 u32 flags)
5976{
5977 struct gaudi_device *gaudi = hdev->asic_specific;
5978 u32 status, timeout_usec;
5979 int rc;
5980
5981 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
5982 hdev->hard_reset_pending)
5983 return;
5984
5985 mutex_lock(&hdev->mmu_cache_lock);
5986
5987 if (hdev->pldm)
5988 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
5989 else
5990 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5991
5992 /* L0 & L1 invalidation */
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005993 WREG32(mmSTLB_INV_PS, 2);
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005994
5995 rc = hl_poll_timeout(
5996 hdev,
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03005997 mmSTLB_INV_PS,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03005998 status,
5999 !status,
6000 1000,
6001 timeout_usec);
6002
Omer Shpigelman42d0b0b2020-05-17 17:35:39 +03006003 WREG32(mmSTLB_INV_SET, 0);
6004
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006005 if (rc)
6006 dev_notice_ratelimited(hdev->dev,
6007 "Timeout when waiting for MMU cache invalidation\n");
6008
6009 mutex_unlock(&hdev->mmu_cache_lock);
6010}
6011
6012static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
6013 bool is_hard, u32 asid, u64 va, u64 size)
6014{
6015 struct gaudi_device *gaudi = hdev->asic_specific;
6016 u32 status, timeout_usec;
6017 u32 inv_data;
6018 u32 pi;
6019 int rc;
6020
6021 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
6022 hdev->hard_reset_pending)
6023 return;
6024
6025 mutex_lock(&hdev->mmu_cache_lock);
6026
6027 if (hdev->pldm)
6028 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6029 else
6030 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6031
6032 /*
6033 * TODO: currently invalidate entire L0 & L1 as in regular hard
6034 * invalidation. Need to apply invalidation of specific cache
6035 * lines with mask of ASID & VA & size.
6036 * Note that L1 with be flushed entirely in any case.
6037 */
6038
6039 /* L0 & L1 invalidation */
6040 inv_data = RREG32(mmSTLB_CACHE_INV);
6041 /* PI is 8 bit */
6042 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6043 WREG32(mmSTLB_CACHE_INV,
6044 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6045
6046 rc = hl_poll_timeout(
6047 hdev,
6048 mmSTLB_INV_CONSUMER_INDEX,
6049 status,
6050 status == pi,
6051 1000,
6052 timeout_usec);
6053
6054 if (rc)
6055 dev_notice_ratelimited(hdev->dev,
6056 "Timeout when waiting for MMU cache invalidation\n");
6057
6058 mutex_unlock(&hdev->mmu_cache_lock);
6059}
6060
6061static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6062 u32 asid, u64 phys_addr)
6063{
6064 u32 status, timeout_usec;
6065 int rc;
6066
6067 if (hdev->pldm)
6068 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6069 else
6070 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6071
6072 WREG32(MMU_ASID, asid);
6073 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6074 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6075 WREG32(MMU_BUSY, 0x80000000);
6076
6077 rc = hl_poll_timeout(
6078 hdev,
6079 MMU_BUSY,
6080 status,
6081 !(status & 0x80000000),
6082 1000,
6083 timeout_usec);
6084
6085 if (rc) {
6086 dev_err(hdev->dev,
6087 "Timeout during MMU hop0 config of asid %d\n", asid);
6088 return rc;
6089 }
6090
6091 return 0;
6092}
6093
6094static int gaudi_send_heartbeat(struct hl_device *hdev)
6095{
6096 struct gaudi_device *gaudi = hdev->asic_specific;
6097
6098 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6099 return 0;
6100
6101 return hl_fw_send_heartbeat(hdev);
6102}
6103
6104static int gaudi_armcp_info_get(struct hl_device *hdev)
6105{
6106 struct gaudi_device *gaudi = hdev->asic_specific;
6107 struct asic_fixed_properties *prop = &hdev->asic_prop;
6108 int rc;
6109
6110 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6111 return 0;
6112
6113 rc = hl_fw_armcp_info_get(hdev);
6114 if (rc)
6115 return rc;
6116
6117 if (!strlen(prop->armcp_info.card_name))
6118 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6119 CARD_NAME_MAX_LEN);
6120
6121 return 0;
6122}
6123
6124static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6125 struct seq_file *s)
6126{
6127 struct gaudi_device *gaudi = hdev->asic_specific;
6128 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6129 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6130 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6131 bool is_idle = true, is_eng_idle, is_slave;
6132 u64 offset;
6133 int i, dma_id;
6134
6135 mutex_lock(&gaudi->clk_gate_mutex);
6136
6137 hdev->asic_funcs->disable_clock_gating(hdev);
6138
6139 if (s)
6140 seq_puts(s,
6141 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6142 "--- ------- ------------ ---------- -------------\n");
6143
6144 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6145 dma_id = gaudi_dma_assignment[i];
6146 offset = dma_id * DMA_QMAN_OFFSET;
6147
6148 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6149 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6150 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6151 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6152 IS_DMA_IDLE(dma_core_sts0);
6153 is_idle &= is_eng_idle;
6154
6155 if (mask)
6156 *mask |= !is_eng_idle <<
6157 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6158 if (s)
6159 seq_printf(s, fmt, dma_id,
6160 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6161 qm_cgm_sts, dma_core_sts0);
6162 }
6163
6164 if (s)
6165 seq_puts(s,
6166 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6167 "--- ------- ------------ ---------- ----------\n");
6168
6169 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6170 offset = i * TPC_QMAN_OFFSET;
6171 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6172 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6173 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6174 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6175 IS_TPC_IDLE(tpc_cfg_sts);
6176 is_idle &= is_eng_idle;
6177
6178 if (mask)
6179 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6180 if (s)
6181 seq_printf(s, fmt, i,
6182 is_eng_idle ? "Y" : "N",
6183 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6184 }
6185
6186 if (s)
6187 seq_puts(s,
6188 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6189 "--- ------- ------------ ---------- -----------\n");
6190
6191 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6192 offset = i * MME_QMAN_OFFSET;
6193 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6194 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6195
6196 /* MME 1 & 3 are slaves, no need to check their QMANs */
6197 is_slave = i % 2;
6198 if (!is_slave) {
6199 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6200 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6201 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6202 }
6203
6204 is_idle &= is_eng_idle;
6205
6206 if (mask)
6207 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6208 if (s) {
6209 if (!is_slave)
6210 seq_printf(s, fmt, i,
6211 is_eng_idle ? "Y" : "N",
6212 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6213 else
6214 seq_printf(s, mme_slave_fmt, i,
6215 is_eng_idle ? "Y" : "N", "-",
6216 "-", mme_arch_sts);
6217 }
6218 }
6219
6220 if (s)
6221 seq_puts(s, "\n");
6222
6223 hdev->asic_funcs->enable_clock_gating(hdev);
6224
6225 mutex_unlock(&gaudi->clk_gate_mutex);
6226
6227 return is_idle;
6228}
6229
6230static void gaudi_hw_queues_lock(struct hl_device *hdev)
6231 __acquires(&gaudi->hw_queues_lock)
6232{
6233 struct gaudi_device *gaudi = hdev->asic_specific;
6234
6235 spin_lock(&gaudi->hw_queues_lock);
6236}
6237
6238static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6239 __releases(&gaudi->hw_queues_lock)
6240{
6241 struct gaudi_device *gaudi = hdev->asic_specific;
6242
6243 spin_unlock(&gaudi->hw_queues_lock);
6244}
6245
6246static u32 gaudi_get_pci_id(struct hl_device *hdev)
6247{
6248 return hdev->pdev->device;
6249}
6250
6251static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6252 size_t max_size)
6253{
6254 struct gaudi_device *gaudi = hdev->asic_specific;
6255
6256 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6257 return 0;
6258
6259 return hl_fw_get_eeprom_data(hdev, data, max_size);
6260}
6261
6262/*
6263 * this function should be used only during initialization and/or after reset,
6264 * when there are no active users.
6265 */
6266static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6267 u32 tpc_id)
6268{
6269 struct gaudi_device *gaudi = hdev->asic_specific;
6270 u64 kernel_timeout;
6271 u32 status, offset;
6272 int rc;
6273
6274 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6275
6276 if (hdev->pldm)
6277 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6278 else
6279 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6280
6281 mutex_lock(&gaudi->clk_gate_mutex);
6282
6283 hdev->asic_funcs->disable_clock_gating(hdev);
6284
6285 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6286 lower_32_bits(tpc_kernel));
6287 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6288 upper_32_bits(tpc_kernel));
6289
6290 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6291 lower_32_bits(tpc_kernel));
6292 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6293 upper_32_bits(tpc_kernel));
6294 /* set a valid LUT pointer, content is of no significance */
6295 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6296 lower_32_bits(tpc_kernel));
6297 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6298 upper_32_bits(tpc_kernel));
6299
6300 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6301 lower_32_bits(CFG_BASE +
6302 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6303
6304 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6305 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6306 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6307 /* wait a bit for the engine to start executing */
6308 usleep_range(1000, 1500);
6309
6310 /* wait until engine has finished executing */
6311 rc = hl_poll_timeout(
6312 hdev,
6313 mmTPC0_CFG_STATUS + offset,
6314 status,
6315 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6316 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6317 1000,
6318 kernel_timeout);
6319
6320 if (rc) {
6321 dev_err(hdev->dev,
6322 "Timeout while waiting for TPC%d icache prefetch\n",
6323 tpc_id);
6324 hdev->asic_funcs->enable_clock_gating(hdev);
6325 mutex_unlock(&gaudi->clk_gate_mutex);
6326 return -EIO;
6327 }
6328
6329 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6330 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6331
6332 /* wait a bit for the engine to start executing */
6333 usleep_range(1000, 1500);
6334
6335 /* wait until engine has finished executing */
6336 rc = hl_poll_timeout(
6337 hdev,
6338 mmTPC0_CFG_STATUS + offset,
6339 status,
6340 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6341 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6342 1000,
6343 kernel_timeout);
6344
6345 rc = hl_poll_timeout(
6346 hdev,
6347 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6348 status,
6349 (status == 0),
6350 1000,
6351 kernel_timeout);
6352
6353 hdev->asic_funcs->enable_clock_gating(hdev);
6354 mutex_unlock(&gaudi->clk_gate_mutex);
6355
6356 if (rc) {
6357 dev_err(hdev->dev,
6358 "Timeout while waiting for TPC%d kernel to execute\n",
6359 tpc_id);
6360 return -EIO;
6361 }
6362
6363 return 0;
6364}
6365
6366static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6367{
6368 return RREG32(mmHW_STATE);
6369}
6370
6371static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6372{
6373 return gaudi_cq_assignment[cq_idx];
6374}
6375
6376static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
6377{
6378 struct gaudi_device *gaudi = hdev->asic_specific;
6379 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6380 struct hl_hw_sob *hw_sob;
6381 int sob, ext_idx = gaudi->ext_queue_idx++;
6382
6383 /*
6384 * The external queues might not sit sequentially, hence use the
6385 * real external queue index for the SOB/MON base id.
6386 */
6387 hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
6388 hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
6389 hw_queue->next_sob_val = 1;
6390 hw_queue->curr_sob_offset = 0;
6391
6392 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
6393 hw_sob = &hw_queue->hw_sob[sob];
6394 hw_sob->hdev = hdev;
6395 hw_sob->sob_id = hw_queue->base_sob_id + sob;
6396 hw_sob->q_idx = q_idx;
6397 kref_init(&hw_sob->kref);
6398 }
6399}
6400
6401static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
6402{
6403 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6404
6405 /*
6406 * In case we got here due to a stuck CS, the refcnt might be bigger
6407 * than 1 and therefore we reset it.
6408 */
6409 kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
6410 hw_queue->curr_sob_offset = 0;
6411 hw_queue->next_sob_val = 1;
6412}
6413
6414static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6415{
6416 return sizeof(struct packet_msg_short) +
6417 sizeof(struct packet_msg_prot) * 2;
6418}
6419
6420static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6421{
6422 return sizeof(struct packet_msg_short) * 4 +
6423 sizeof(struct packet_fence) +
6424 sizeof(struct packet_msg_prot) * 2;
6425}
6426
6427static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6428{
6429 struct hl_cb *cb = (struct hl_cb *) data;
6430 struct packet_msg_short *pkt;
6431 u32 value, ctl;
6432
6433 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6434 memset(pkt, 0, sizeof(*pkt));
6435
6436 value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
6437 value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
6438
6439 ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
6440 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6441 ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
6442 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6443 ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6444 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6445 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6446
6447 pkt->value = cpu_to_le32(value);
6448 pkt->ctl = cpu_to_le32(ctl);
6449}
6450
6451static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6452 u16 addr)
6453{
6454 u32 ctl, pkt_size = sizeof(*pkt);
6455
6456 memset(pkt, 0, pkt_size);
6457
6458 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6459 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6460 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6461 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6462 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6463 ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
6464
6465 pkt->value = cpu_to_le32(value);
6466 pkt->ctl = cpu_to_le32(ctl);
6467
6468 return pkt_size;
6469}
6470
6471static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6472 u16 sob_val, u16 addr)
6473{
6474 u32 ctl, value, pkt_size = sizeof(*pkt);
6475 u8 mask = ~(1 << (sob_id & 0x7));
6476
6477 memset(pkt, 0, pkt_size);
6478
6479 value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
6480 value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
6481 value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
6482 value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
6483
6484 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6485 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6486 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6487 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6488 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6489 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6490 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6491
6492 pkt->value = cpu_to_le32(value);
6493 pkt->ctl = cpu_to_le32(ctl);
6494
6495 return pkt_size;
6496}
6497
6498static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6499{
6500 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6501
6502 memset(pkt, 0, pkt_size);
6503
6504 cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
6505 cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
6506 cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
6507
6508 ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
6509 ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
6510 ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
6511 ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
6512 ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
6513
6514 pkt->cfg = cpu_to_le32(cfg);
6515 pkt->ctl = cpu_to_le32(ctl);
6516
6517 return pkt_size;
6518}
6519
6520static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6521 u16 sob_val, u16 mon_id, u32 q_idx)
6522{
6523 struct hl_cb *cb = (struct hl_cb *) data;
6524 void *buf = (void *) (uintptr_t) cb->kernel_address;
6525 u64 monitor_base, fence_addr = 0;
6526 u32 size = 0;
6527 u16 msg_addr_offset;
6528
6529 switch (q_idx) {
6530 case GAUDI_QUEUE_ID_DMA_0_0:
6531 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6532 break;
6533 case GAUDI_QUEUE_ID_DMA_0_1:
6534 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6535 break;
6536 case GAUDI_QUEUE_ID_DMA_0_2:
6537 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6538 break;
6539 case GAUDI_QUEUE_ID_DMA_0_3:
6540 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6541 break;
6542 case GAUDI_QUEUE_ID_DMA_1_0:
6543 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6544 break;
6545 case GAUDI_QUEUE_ID_DMA_1_1:
6546 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6547 break;
6548 case GAUDI_QUEUE_ID_DMA_1_2:
6549 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6550 break;
6551 case GAUDI_QUEUE_ID_DMA_1_3:
6552 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6553 break;
6554 case GAUDI_QUEUE_ID_DMA_5_0:
6555 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6556 break;
6557 case GAUDI_QUEUE_ID_DMA_5_1:
6558 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6559 break;
6560 case GAUDI_QUEUE_ID_DMA_5_2:
6561 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6562 break;
6563 case GAUDI_QUEUE_ID_DMA_5_3:
6564 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6565 break;
6566 default:
6567 /* queue index should be valid here */
6568 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6569 q_idx);
6570 return;
6571 }
6572
6573 fence_addr += CFG_BASE;
6574
6575 /*
6576 * monitor_base should be the content of the base0 address registers,
6577 * so it will be added to the msg short offsets
6578 */
6579 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6580
6581 /* First monitor config packet: low address of the sync */
6582 msg_addr_offset =
6583 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6584 monitor_base;
6585
6586 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6587 msg_addr_offset);
6588
6589 /* Second monitor config packet: high address of the sync */
6590 msg_addr_offset =
6591 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6592 monitor_base;
6593
6594 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6595 msg_addr_offset);
6596
6597 /*
6598 * Third monitor config packet: the payload, i.e. what to write when the
6599 * sync triggers
6600 */
6601 msg_addr_offset =
6602 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6603 monitor_base;
6604
6605 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6606
6607 /* Fourth monitor config packet: bind the monitor to a sync object */
6608 msg_addr_offset =
6609 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6610 monitor_base;
6611 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6612 msg_addr_offset);
6613
6614 /* Fence packet */
6615 size += gaudi_add_fence_pkt(buf + size);
6616}
6617
6618static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6619{
6620 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6621
6622 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6623 hw_sob->sob_id);
6624
6625 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6626 0);
6627
6628 kref_init(&hw_sob->kref);
6629}
6630
6631static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6632{
6633 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6634 HL_POWER9_HOST_MAGIC) {
6635 hdev->power9_64bit_dma_enable = 1;
6636 hdev->dma_mask = 64;
6637 } else {
6638 hdev->power9_64bit_dma_enable = 0;
6639 hdev->dma_mask = 48;
6640 }
6641}
6642
6643static u64 gaudi_get_device_time(struct hl_device *hdev)
6644{
6645 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6646
6647 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6648}
6649
6650static const struct hl_asic_funcs gaudi_funcs = {
6651 .early_init = gaudi_early_init,
6652 .early_fini = gaudi_early_fini,
6653 .late_init = gaudi_late_init,
6654 .late_fini = gaudi_late_fini,
6655 .sw_init = gaudi_sw_init,
6656 .sw_fini = gaudi_sw_fini,
6657 .hw_init = gaudi_hw_init,
6658 .hw_fini = gaudi_hw_fini,
6659 .halt_engines = gaudi_halt_engines,
6660 .suspend = gaudi_suspend,
6661 .resume = gaudi_resume,
6662 .cb_mmap = gaudi_cb_mmap,
6663 .ring_doorbell = gaudi_ring_doorbell,
6664 .pqe_write = gaudi_pqe_write,
6665 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6666 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6667 .get_int_queue_base = gaudi_get_int_queue_base,
6668 .test_queues = gaudi_test_queues,
6669 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6670 .asic_dma_pool_free = gaudi_dma_pool_free,
6671 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6672 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6673 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6674 .cs_parser = gaudi_cs_parser,
6675 .asic_dma_map_sg = gaudi_dma_map_sg,
6676 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6677 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6678 .update_eq_ci = gaudi_update_eq_ci,
6679 .context_switch = gaudi_context_switch,
6680 .restore_phase_topology = gaudi_restore_phase_topology,
6681 .debugfs_read32 = gaudi_debugfs_read32,
6682 .debugfs_write32 = gaudi_debugfs_write32,
6683 .debugfs_read64 = gaudi_debugfs_read64,
6684 .debugfs_write64 = gaudi_debugfs_write64,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006685 .add_device_attr = gaudi_add_device_attr,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006686 .handle_eqe = gaudi_handle_eqe,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006687 .set_pll_profile = gaudi_set_pll_profile,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006688 .get_events_stat = gaudi_get_events_stat,
6689 .read_pte = gaudi_read_pte,
6690 .write_pte = gaudi_write_pte,
6691 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6692 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6693 .send_heartbeat = gaudi_send_heartbeat,
6694 .enable_clock_gating = gaudi_enable_clock_gating,
6695 .disable_clock_gating = gaudi_disable_clock_gating,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006696 .debug_coresight = gaudi_debug_coresight,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006697 .is_device_idle = gaudi_is_device_idle,
6698 .soft_reset_late_init = gaudi_soft_reset_late_init,
6699 .hw_queues_lock = gaudi_hw_queues_lock,
6700 .hw_queues_unlock = gaudi_hw_queues_unlock,
6701 .get_pci_id = gaudi_get_pci_id,
6702 .get_eeprom_data = gaudi_get_eeprom_data,
6703 .send_cpu_message = gaudi_send_cpu_message,
6704 .get_hw_state = gaudi_get_hw_state,
6705 .pci_bars_map = gaudi_pci_bars_map,
6706 .set_dram_bar_base = gaudi_set_hbm_bar_base,
6707 .init_iatu = gaudi_init_iatu,
6708 .rreg = hl_rreg,
6709 .wreg = hl_wreg,
Omer Shpigelman79fc7a92020-05-11 10:46:29 +03006710 .halt_coresight = gaudi_halt_coresight,
Oded Gabbaybcaf4152020-05-11 10:41:37 +03006711 .get_clk_rate = gaudi_get_clk_rate,
Oded Gabbayac0ae6a2020-05-11 10:29:27 +03006712 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6713 .read_device_fw_version = gaudi_read_device_fw_version,
6714 .load_firmware_to_device = gaudi_load_firmware_to_device,
6715 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6716 .ext_queue_init = gaudi_ext_queue_init,
6717 .ext_queue_reset = gaudi_ext_queue_reset,
6718 .get_signal_cb_size = gaudi_get_signal_cb_size,
6719 .get_wait_cb_size = gaudi_get_wait_cb_size,
6720 .gen_signal_cb = gaudi_gen_signal_cb,
6721 .gen_wait_cb = gaudi_gen_wait_cb,
6722 .reset_sob = gaudi_reset_sob,
6723 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6724 .get_device_time = gaudi_get_device_time
6725};
6726
6727/**
6728 * gaudi_set_asic_funcs - set GAUDI function pointers
6729 *
6730 * @*hdev: pointer to hl_device structure
6731 *
6732 */
6733void gaudi_set_asic_funcs(struct hl_device *hdev)
6734{
6735 hdev->asic_funcs = &gaudi_funcs;
6736}