blob: fc53cdb38f23e80b64fe91b7a857211d9fcbfd36 [file] [log] [blame]
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02001// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "goyaP.h"
Omer Shpigelman0feaf862019-02-16 00:39:22 +02009#include "include/hw_ip/mmu/mmu_general.h"
10#include "include/hw_ip/mmu/mmu_v1_0.h"
Oded Gabbay99b9d7b2019-02-16 00:39:13 +020011#include "include/goya/asic_reg/goya_masks.h"
12
13#include <linux/pci.h>
Oded Gabbay99b9d7b2019-02-16 00:39:13 +020014#include <linux/genalloc.h>
Oded Gabbay839c4802019-02-16 00:39:16 +020015#include <linux/firmware.h>
Oded Gabbayd91389b2019-02-16 00:39:19 +020016#include <linux/hwmon.h>
Oded Gabbay839c4802019-02-16 00:39:16 +020017#include <linux/io-64-nonatomic-lo-hi.h>
18#include <linux/io-64-nonatomic-hi-lo.h>
Oded Gabbay99b9d7b2019-02-16 00:39:13 +020019
20/*
21 * GOYA security scheme:
22 *
23 * 1. Host is protected by:
24 * - Range registers (When MMU is enabled, DMA RR does NOT protect host)
25 * - MMU
26 *
27 * 2. DRAM is protected by:
28 * - Range registers (protect the first 512MB)
29 * - MMU (isolation between users)
30 *
31 * 3. Configuration is protected by:
32 * - Range registers
33 * - Protection bits
34 *
35 * When MMU is disabled:
36 *
37 * QMAN DMA: PQ, CQ, CP, DMA are secured.
38 * PQ, CB and the data are on the host.
39 *
40 * QMAN TPC/MME:
41 * PQ, CQ and CP are not secured.
42 * PQ, CB and the data are on the SRAM/DRAM.
43 *
44 * Since QMAN DMA is secured, KMD is parsing the DMA CB:
45 * - KMD checks DMA pointer
46 * - WREG, MSG_PROT are not allowed.
47 * - MSG_LONG/SHORT are allowed.
48 *
49 * A read/write transaction by the QMAN to a protected area will succeed if
50 * and only if the QMAN's CP is secured and MSG_PROT is used
51 *
52 *
53 * When MMU is enabled:
54 *
55 * QMAN DMA: PQ, CQ and CP are secured.
56 * MMU is set to bypass on the Secure props register of the QMAN.
57 * The reasons we don't enable MMU for PQ, CQ and CP are:
58 * - PQ entry is in kernel address space and KMD doesn't map it.
59 * - CP writes to MSIX register and to kernel address space (completion
60 * queue).
61 *
62 * DMA is not secured but because CP is secured, KMD still needs to parse the
63 * CB, but doesn't need to check the DMA addresses.
64 *
65 * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
66 * doesn't map memory in MMU.
67 *
68 * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
69 *
70 * DMA RR does NOT protect host because DMA is not secured
71 *
72 */
73
74#define GOYA_MMU_REGS_NUM 61
75
76#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
77
78#define GOYA_RESET_TIMEOUT_MSEC 500 /* 500ms */
79#define GOYA_PLDM_RESET_TIMEOUT_MSEC 20000 /* 20s */
80#define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
81#define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
82#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
83#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */
84#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
Omer Shpigelman0feaf862019-02-16 00:39:22 +020085#define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
Oded Gabbay99b9d7b2019-02-16 00:39:13 +020086
87#define GOYA_QMAN0_FENCE_VAL 0xD169B243
88
89#define GOYA_MAX_INITIATORS 20
90
Oded Gabbay1251f232019-02-16 00:39:18 +020091#define GOYA_MAX_STRING_LEN 20
92
Oded Gabbaybe5d9262019-02-16 00:39:15 +020093#define GOYA_CB_POOL_CB_CNT 512
94#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */
95
Oded Gabbay1251f232019-02-16 00:39:18 +020096static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
97 "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
98 "goya cq 4", "goya cpu eq"
99};
100
Oded Gabbayeff6f4a2019-02-16 00:39:21 +0200101static u16 goya_packet_sizes[MAX_PACKET_ID] = {
102 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
103 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
104 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
105 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
106 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
107 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
108 [PACKET_FENCE] = sizeof(struct packet_fence),
109 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
110 [PACKET_NOP] = sizeof(struct packet_nop),
111 [PACKET_STOP] = sizeof(struct packet_stop)
112};
113
Oded Gabbay1251f232019-02-16 00:39:18 +0200114static const char *goya_axi_name[GOYA_MAX_INITIATORS] = {
115 "MME0",
116 "MME1",
117 "MME2",
118 "MME3",
119 "MME4",
120 "MME5",
121 "TPC0",
122 "TPC1",
123 "TPC2",
124 "TPC3",
125 "TPC4",
126 "TPC5",
127 "TPC6",
128 "TPC7",
129 "PCI",
130 "DMA", /* HBW */
131 "DMA", /* LBW */
132 "PSOC",
133 "CPU",
134 "MMU"
135};
136
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200137static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
138 mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
139 mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
140 mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
141 mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
142 mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
143 mmTPC0_QM_GLBL_SECURE_PROPS,
144 mmTPC0_QM_GLBL_NON_SECURE_PROPS,
145 mmTPC0_CMDQ_GLBL_SECURE_PROPS,
146 mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
147 mmTPC0_CFG_ARUSER,
148 mmTPC0_CFG_AWUSER,
149 mmTPC1_QM_GLBL_SECURE_PROPS,
150 mmTPC1_QM_GLBL_NON_SECURE_PROPS,
151 mmTPC1_CMDQ_GLBL_SECURE_PROPS,
152 mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
153 mmTPC1_CFG_ARUSER,
154 mmTPC1_CFG_AWUSER,
155 mmTPC2_QM_GLBL_SECURE_PROPS,
156 mmTPC2_QM_GLBL_NON_SECURE_PROPS,
157 mmTPC2_CMDQ_GLBL_SECURE_PROPS,
158 mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
159 mmTPC2_CFG_ARUSER,
160 mmTPC2_CFG_AWUSER,
161 mmTPC3_QM_GLBL_SECURE_PROPS,
162 mmTPC3_QM_GLBL_NON_SECURE_PROPS,
163 mmTPC3_CMDQ_GLBL_SECURE_PROPS,
164 mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
165 mmTPC3_CFG_ARUSER,
166 mmTPC3_CFG_AWUSER,
167 mmTPC4_QM_GLBL_SECURE_PROPS,
168 mmTPC4_QM_GLBL_NON_SECURE_PROPS,
169 mmTPC4_CMDQ_GLBL_SECURE_PROPS,
170 mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
171 mmTPC4_CFG_ARUSER,
172 mmTPC4_CFG_AWUSER,
173 mmTPC5_QM_GLBL_SECURE_PROPS,
174 mmTPC5_QM_GLBL_NON_SECURE_PROPS,
175 mmTPC5_CMDQ_GLBL_SECURE_PROPS,
176 mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
177 mmTPC5_CFG_ARUSER,
178 mmTPC5_CFG_AWUSER,
179 mmTPC6_QM_GLBL_SECURE_PROPS,
180 mmTPC6_QM_GLBL_NON_SECURE_PROPS,
181 mmTPC6_CMDQ_GLBL_SECURE_PROPS,
182 mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
183 mmTPC6_CFG_ARUSER,
184 mmTPC6_CFG_AWUSER,
185 mmTPC7_QM_GLBL_SECURE_PROPS,
186 mmTPC7_QM_GLBL_NON_SECURE_PROPS,
187 mmTPC7_CMDQ_GLBL_SECURE_PROPS,
188 mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
189 mmTPC7_CFG_ARUSER,
190 mmTPC7_CFG_AWUSER,
191 mmMME_QM_GLBL_SECURE_PROPS,
192 mmMME_QM_GLBL_NON_SECURE_PROPS,
193 mmMME_CMDQ_GLBL_SECURE_PROPS,
194 mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
195 mmMME_SBA_CONTROL_DATA,
196 mmMME_SBB_CONTROL_DATA,
197 mmMME_SBC_CONTROL_DATA,
198 mmMME_WBC_CONTROL_DATA
199};
200
Oded Gabbay1251f232019-02-16 00:39:18 +0200201#define GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE 121
202
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +0200203static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
204 GOYA_ASYNC_EVENT_ID_PCIE_IF,
205 GOYA_ASYNC_EVENT_ID_TPC0_ECC,
206 GOYA_ASYNC_EVENT_ID_TPC1_ECC,
207 GOYA_ASYNC_EVENT_ID_TPC2_ECC,
208 GOYA_ASYNC_EVENT_ID_TPC3_ECC,
209 GOYA_ASYNC_EVENT_ID_TPC4_ECC,
210 GOYA_ASYNC_EVENT_ID_TPC5_ECC,
211 GOYA_ASYNC_EVENT_ID_TPC6_ECC,
212 GOYA_ASYNC_EVENT_ID_TPC7_ECC,
213 GOYA_ASYNC_EVENT_ID_MME_ECC,
214 GOYA_ASYNC_EVENT_ID_MME_ECC_EXT,
215 GOYA_ASYNC_EVENT_ID_MMU_ECC,
216 GOYA_ASYNC_EVENT_ID_DMA_MACRO,
217 GOYA_ASYNC_EVENT_ID_DMA_ECC,
218 GOYA_ASYNC_EVENT_ID_CPU_IF_ECC,
219 GOYA_ASYNC_EVENT_ID_PSOC_MEM,
220 GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT,
221 GOYA_ASYNC_EVENT_ID_SRAM0,
222 GOYA_ASYNC_EVENT_ID_SRAM1,
223 GOYA_ASYNC_EVENT_ID_SRAM2,
224 GOYA_ASYNC_EVENT_ID_SRAM3,
225 GOYA_ASYNC_EVENT_ID_SRAM4,
226 GOYA_ASYNC_EVENT_ID_SRAM5,
227 GOYA_ASYNC_EVENT_ID_SRAM6,
228 GOYA_ASYNC_EVENT_ID_SRAM7,
229 GOYA_ASYNC_EVENT_ID_SRAM8,
230 GOYA_ASYNC_EVENT_ID_SRAM9,
231 GOYA_ASYNC_EVENT_ID_SRAM10,
232 GOYA_ASYNC_EVENT_ID_SRAM11,
233 GOYA_ASYNC_EVENT_ID_SRAM12,
234 GOYA_ASYNC_EVENT_ID_SRAM13,
235 GOYA_ASYNC_EVENT_ID_SRAM14,
236 GOYA_ASYNC_EVENT_ID_SRAM15,
237 GOYA_ASYNC_EVENT_ID_SRAM16,
238 GOYA_ASYNC_EVENT_ID_SRAM17,
239 GOYA_ASYNC_EVENT_ID_SRAM18,
240 GOYA_ASYNC_EVENT_ID_SRAM19,
241 GOYA_ASYNC_EVENT_ID_SRAM20,
242 GOYA_ASYNC_EVENT_ID_SRAM21,
243 GOYA_ASYNC_EVENT_ID_SRAM22,
244 GOYA_ASYNC_EVENT_ID_SRAM23,
245 GOYA_ASYNC_EVENT_ID_SRAM24,
246 GOYA_ASYNC_EVENT_ID_SRAM25,
247 GOYA_ASYNC_EVENT_ID_SRAM26,
248 GOYA_ASYNC_EVENT_ID_SRAM27,
249 GOYA_ASYNC_EVENT_ID_SRAM28,
250 GOYA_ASYNC_EVENT_ID_SRAM29,
251 GOYA_ASYNC_EVENT_ID_GIC500,
252 GOYA_ASYNC_EVENT_ID_PLL0,
253 GOYA_ASYNC_EVENT_ID_PLL1,
254 GOYA_ASYNC_EVENT_ID_PLL3,
255 GOYA_ASYNC_EVENT_ID_PLL4,
256 GOYA_ASYNC_EVENT_ID_PLL5,
257 GOYA_ASYNC_EVENT_ID_PLL6,
258 GOYA_ASYNC_EVENT_ID_AXI_ECC,
259 GOYA_ASYNC_EVENT_ID_L2_RAM_ECC,
260 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET,
261 GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT,
262 GOYA_ASYNC_EVENT_ID_PCIE_DEC,
263 GOYA_ASYNC_EVENT_ID_TPC0_DEC,
264 GOYA_ASYNC_EVENT_ID_TPC1_DEC,
265 GOYA_ASYNC_EVENT_ID_TPC2_DEC,
266 GOYA_ASYNC_EVENT_ID_TPC3_DEC,
267 GOYA_ASYNC_EVENT_ID_TPC4_DEC,
268 GOYA_ASYNC_EVENT_ID_TPC5_DEC,
269 GOYA_ASYNC_EVENT_ID_TPC6_DEC,
270 GOYA_ASYNC_EVENT_ID_TPC7_DEC,
271 GOYA_ASYNC_EVENT_ID_MME_WACS,
272 GOYA_ASYNC_EVENT_ID_MME_WACSD,
273 GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER,
274 GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC,
275 GOYA_ASYNC_EVENT_ID_PSOC,
276 GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR,
277 GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR,
278 GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR,
279 GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR,
280 GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR,
281 GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR,
282 GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR,
283 GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR,
284 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ,
285 GOYA_ASYNC_EVENT_ID_TPC1_CMDQ,
286 GOYA_ASYNC_EVENT_ID_TPC2_CMDQ,
287 GOYA_ASYNC_EVENT_ID_TPC3_CMDQ,
288 GOYA_ASYNC_EVENT_ID_TPC4_CMDQ,
289 GOYA_ASYNC_EVENT_ID_TPC5_CMDQ,
290 GOYA_ASYNC_EVENT_ID_TPC6_CMDQ,
291 GOYA_ASYNC_EVENT_ID_TPC7_CMDQ,
292 GOYA_ASYNC_EVENT_ID_TPC0_QM,
293 GOYA_ASYNC_EVENT_ID_TPC1_QM,
294 GOYA_ASYNC_EVENT_ID_TPC2_QM,
295 GOYA_ASYNC_EVENT_ID_TPC3_QM,
296 GOYA_ASYNC_EVENT_ID_TPC4_QM,
297 GOYA_ASYNC_EVENT_ID_TPC5_QM,
298 GOYA_ASYNC_EVENT_ID_TPC6_QM,
299 GOYA_ASYNC_EVENT_ID_TPC7_QM,
300 GOYA_ASYNC_EVENT_ID_MME_QM,
301 GOYA_ASYNC_EVENT_ID_MME_CMDQ,
302 GOYA_ASYNC_EVENT_ID_DMA0_QM,
303 GOYA_ASYNC_EVENT_ID_DMA1_QM,
304 GOYA_ASYNC_EVENT_ID_DMA2_QM,
305 GOYA_ASYNC_EVENT_ID_DMA3_QM,
306 GOYA_ASYNC_EVENT_ID_DMA4_QM,
307 GOYA_ASYNC_EVENT_ID_DMA0_CH,
308 GOYA_ASYNC_EVENT_ID_DMA1_CH,
309 GOYA_ASYNC_EVENT_ID_DMA2_CH,
310 GOYA_ASYNC_EVENT_ID_DMA3_CH,
311 GOYA_ASYNC_EVENT_ID_DMA4_CH,
312 GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU,
313 GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU,
314 GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU,
315 GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU,
316 GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU,
317 GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU,
318 GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU,
319 GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU,
320 GOYA_ASYNC_EVENT_ID_DMA_BM_CH0,
321 GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
322 GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
323 GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
324 GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
325};
326
Oded Gabbayd91389b2019-02-16 00:39:19 +0200327static int goya_armcp_info_get(struct hl_device *hdev);
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200328static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
329static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
330static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
331 u64 phys_addr);
Oded Gabbayd91389b2019-02-16 00:39:19 +0200332
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200333static void goya_get_fixed_properties(struct hl_device *hdev)
334{
335 struct asic_fixed_properties *prop = &hdev->asic_prop;
Oded Gabbay9494a8d2019-02-16 00:39:17 +0200336 int i;
337
338 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
339 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
340 prop->hw_queues_props[i].kmd_only = 0;
341 }
342
343 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
344 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
345 prop->hw_queues_props[i].kmd_only = 1;
346 }
347
348 for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
349 NUMBER_OF_INT_HW_QUEUES; i++) {
350 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
351 prop->hw_queues_props[i].kmd_only = 0;
352 }
353
354 for (; i < HL_MAX_QUEUES; i++)
355 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200356
357 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
358
359 prop->dram_base_address = DRAM_PHYS_BASE;
360 prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
361 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
362 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
363
364 prop->sram_base_address = SRAM_BASE_ADDR;
365 prop->sram_size = SRAM_SIZE;
366 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
367 prop->sram_user_base_address = prop->sram_base_address +
368 SRAM_USER_BASE_OFFSET;
369
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200370 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
371 if (hdev->pldm)
372 prop->mmu_pgt_size = 0x800000; /* 8MB */
373 else
374 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
375 prop->mmu_pte_size = HL_PTE_SIZE;
376 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
377 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
378 prop->dram_page_size = PAGE_SIZE_2MB;
379
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200380 prop->host_phys_base_address = HOST_PHYS_BASE;
381 prop->va_space_host_start_address = VA_HOST_SPACE_START;
382 prop->va_space_host_end_address = VA_HOST_SPACE_END;
383 prop->va_space_dram_start_address = VA_DDR_SPACE_START;
384 prop->va_space_dram_end_address = VA_DDR_SPACE_END;
385 prop->cfg_size = CFG_SIZE;
386 prop->max_asid = MAX_ASID;
Oded Gabbay1251f232019-02-16 00:39:18 +0200387 prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
Oded Gabbay839c4802019-02-16 00:39:16 +0200388 prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
389 prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
Oded Gabbayd91389b2019-02-16 00:39:19 +0200390 prop->max_power_default = MAX_POWER_DEFAULT;
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200391 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
392
393 prop->high_pll = PLL_HIGH_DEFAULT;
394}
395
Oded Gabbay9494a8d2019-02-16 00:39:17 +0200396int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
397{
398 struct armcp_packet pkt;
399
400 memset(&pkt, 0, sizeof(pkt));
401
402 pkt.ctl = opcode << ARMCP_PKT_CTL_OPCODE_SHIFT;
403
404 return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
405 sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
406}
407
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200408/*
409 * goya_pci_bars_map - Map PCI BARS of Goya device
410 *
411 * @hdev: pointer to hl_device structure
412 *
413 * Request PCI regions and map them to kernel virtual addresses.
414 * Returns 0 on success
415 *
416 */
417int goya_pci_bars_map(struct hl_device *hdev)
418{
419 struct pci_dev *pdev = hdev->pdev;
420 int rc;
421
422 rc = pci_request_regions(pdev, HL_NAME);
423 if (rc) {
424 dev_err(hdev->dev, "Cannot obtain PCI resources\n");
425 return rc;
426 }
427
428 hdev->pcie_bar[SRAM_CFG_BAR_ID] =
429 pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID);
430 if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) {
431 dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n");
432 rc = -ENODEV;
433 goto err_release_regions;
434 }
435
436 hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID);
437 if (!hdev->pcie_bar[MSIX_BAR_ID]) {
438 dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n");
439 rc = -ENODEV;
440 goto err_unmap_sram_cfg;
441 }
442
443 hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID);
444 if (!hdev->pcie_bar[DDR_BAR_ID]) {
445 dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n");
446 rc = -ENODEV;
447 goto err_unmap_msix;
448 }
449
450 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
451 (CFG_BASE - SRAM_BASE_ADDR);
452
453 return 0;
454
455err_unmap_msix:
456 iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
457err_unmap_sram_cfg:
458 iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
459err_release_regions:
460 pci_release_regions(pdev);
461
462 return rc;
463}
464
465/*
466 * goya_pci_bars_unmap - Unmap PCI BARS of Goya device
467 *
468 * @hdev: pointer to hl_device structure
469 *
470 * Release all PCI BARS and unmap their virtual addresses
471 *
472 */
473static void goya_pci_bars_unmap(struct hl_device *hdev)
474{
475 struct pci_dev *pdev = hdev->pdev;
476
477 iounmap(hdev->pcie_bar[DDR_BAR_ID]);
478 iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
479 iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
480 pci_release_regions(pdev);
481}
482
483/*
484 * goya_elbi_write - Write through the ELBI interface
485 *
486 * @hdev: pointer to hl_device structure
487 *
488 * return 0 on success, -1 on failure
489 *
490 */
491static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
492{
493 struct pci_dev *pdev = hdev->pdev;
494 ktime_t timeout;
495 u32 val;
496
497 /* Clear previous status */
498 pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
499
500 pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
501 pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
502 pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
503 PCI_CONFIG_ELBI_CTRL_WRITE);
504
505 timeout = ktime_add_ms(ktime_get(), 10);
506 for (;;) {
507 pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
508 if (val & PCI_CONFIG_ELBI_STS_MASK)
509 break;
510 if (ktime_compare(ktime_get(), timeout) > 0) {
511 pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
512 &val);
513 break;
514 }
515 usleep_range(300, 500);
516 }
517
518 if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
519 return 0;
520
521 if (val & PCI_CONFIG_ELBI_STS_ERR) {
522 dev_err(hdev->dev, "Error writing to ELBI\n");
523 return -EIO;
524 }
525
526 if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
527 dev_err(hdev->dev, "ELBI write didn't finish in time\n");
528 return -EIO;
529 }
530
531 dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
532 return -EIO;
533}
534
535/*
536 * goya_iatu_write - iatu write routine
537 *
538 * @hdev: pointer to hl_device structure
539 *
540 */
541static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
542{
543 u32 dbi_offset;
544 int rc;
545
546 dbi_offset = addr & 0xFFF;
547
548 rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000);
549 rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data);
550
551 if (rc)
552 return -EIO;
553
554 return 0;
555}
556
557void goya_reset_link_through_bridge(struct hl_device *hdev)
558{
559 struct pci_dev *pdev = hdev->pdev;
560 struct pci_dev *parent_port;
561 u16 val;
562
563 parent_port = pdev->bus->self;
564 pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
565 val |= PCI_BRIDGE_CTL_BUS_RESET;
566 pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
567 ssleep(1);
568
569 val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
570 pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
571 ssleep(3);
572}
573
574/*
575 * goya_set_ddr_bar_base - set DDR bar to map specific device address
576 *
577 * @hdev: pointer to hl_device structure
578 * @addr: address in DDR. Must be aligned to DDR bar size
579 *
580 * This function configures the iATU so that the DDR bar will start at the
581 * specified addr.
582 *
583 */
584static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
585{
586 struct goya_device *goya = hdev->asic_specific;
587 int rc;
588
589 if ((goya) && (goya->ddr_bar_cur_addr == addr))
590 return 0;
591
592 /* Inbound Region 1 - Bar 4 - Point to DDR */
593 rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr));
594 rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr));
595 rc |= goya_iatu_write(hdev, 0x300, 0);
596 /* Enable + Bar match + match enable + Bar 4 */
597 rc |= goya_iatu_write(hdev, 0x304, 0xC0080400);
598
599 /* Return the DBI window to the default location */
600 rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
601 rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
602
603 if (rc) {
604 dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr);
605 return -EIO;
606 }
607
608 if (goya)
609 goya->ddr_bar_cur_addr = addr;
610
611 return 0;
612}
613
614/*
615 * goya_init_iatu - Initialize the iATU unit inside the PCI controller
616 *
617 * @hdev: pointer to hl_device structure
618 *
619 * This is needed in case the firmware doesn't initialize the iATU
620 *
621 */
622static int goya_init_iatu(struct hl_device *hdev)
623{
624 int rc;
625
626 /* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */
627 rc = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR));
628 rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR));
629 rc |= goya_iatu_write(hdev, 0x100, 0);
630 /* Enable + Bar match + match enable */
631 rc |= goya_iatu_write(hdev, 0x104, 0xC0080000);
632
633 /* Inbound Region 1 - Bar 4 - Point to DDR */
634 rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
635
636 /* Outbound Region 0 - Point to Host */
637 rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE));
638 rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE));
639 rc |= goya_iatu_write(hdev, 0x010,
640 lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
641 rc |= goya_iatu_write(hdev, 0x014, 0);
642 rc |= goya_iatu_write(hdev, 0x018, 0);
643 rc |= goya_iatu_write(hdev, 0x020,
644 upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
645 /* Increase region size */
646 rc |= goya_iatu_write(hdev, 0x000, 0x00002000);
647 /* Enable */
648 rc |= goya_iatu_write(hdev, 0x004, 0x80000000);
649
650 /* Return the DBI window to the default location */
651 rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
652 rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
653
654 if (rc)
655 return -EIO;
656
657 return 0;
658}
659
660/*
661 * goya_early_init - GOYA early initialization code
662 *
663 * @hdev: pointer to hl_device structure
664 *
665 * Verify PCI bars
666 * Set DMA masks
667 * PCI controller initialization
668 * Map PCI bars
669 *
670 */
671static int goya_early_init(struct hl_device *hdev)
672{
673 struct asic_fixed_properties *prop = &hdev->asic_prop;
674 struct pci_dev *pdev = hdev->pdev;
675 u32 val;
676 int rc;
677
678 goya_get_fixed_properties(hdev);
679
680 /* Check BAR sizes */
681 if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
682 dev_err(hdev->dev,
683 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
684 SRAM_CFG_BAR_ID,
685 (unsigned long long) pci_resource_len(pdev,
686 SRAM_CFG_BAR_ID),
687 CFG_BAR_SIZE);
688 return -ENODEV;
689 }
690
691 if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
692 dev_err(hdev->dev,
693 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
694 MSIX_BAR_ID,
695 (unsigned long long) pci_resource_len(pdev,
696 MSIX_BAR_ID),
697 MSIX_BAR_SIZE);
698 return -ENODEV;
699 }
700
701 prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
702
703 /* set DMA mask for GOYA */
704 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
705 if (rc) {
706 dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n");
707 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
708 if (rc) {
709 dev_err(hdev->dev,
710 "Unable to set pci dma mask to 32 bits\n");
711 return rc;
712 }
713 }
714
715 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
716 if (rc) {
717 dev_warn(hdev->dev,
718 "Unable to set pci consistent dma mask to 39 bits\n");
719 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
720 if (rc) {
721 dev_err(hdev->dev,
722 "Unable to set pci consistent dma mask to 32 bits\n");
723 return rc;
724 }
725 }
726
727 if (hdev->reset_pcilink)
728 goya_reset_link_through_bridge(hdev);
729
730 rc = pci_enable_device_mem(pdev);
731 if (rc) {
732 dev_err(hdev->dev, "can't enable PCI device\n");
733 return rc;
734 }
735
736 pci_set_master(pdev);
737
738 rc = goya_init_iatu(hdev);
739 if (rc) {
740 dev_err(hdev->dev, "Failed to initialize iATU\n");
741 goto disable_device;
742 }
743
744 rc = goya_pci_bars_map(hdev);
745 if (rc) {
746 dev_err(hdev->dev, "Failed to initialize PCI BARS\n");
747 goto disable_device;
748 }
749
Oded Gabbay839c4802019-02-16 00:39:16 +0200750 if (!hdev->pldm) {
751 val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
752 if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
753 dev_warn(hdev->dev,
754 "PCI strap is not configured correctly, PCI bus errors may occur\n");
755 }
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200756
757 return 0;
758
759disable_device:
760 pci_clear_master(pdev);
761 pci_disable_device(pdev);
762
763 return rc;
764}
765
766/*
767 * goya_early_fini - GOYA early finalization code
768 *
769 * @hdev: pointer to hl_device structure
770 *
771 * Unmap PCI bars
772 *
773 */
774int goya_early_fini(struct hl_device *hdev)
775{
776 goya_pci_bars_unmap(hdev);
777
778 pci_clear_master(hdev->pdev);
779 pci_disable_device(hdev->pdev);
780
781 return 0;
782}
783
784/*
Oded Gabbayd91389b2019-02-16 00:39:19 +0200785 * goya_fetch_psoc_frequency - Fetch PSOC frequency values
786 *
787 * @hdev: pointer to hl_device structure
788 *
789 */
790static void goya_fetch_psoc_frequency(struct hl_device *hdev)
791{
792 struct asic_fixed_properties *prop = &hdev->asic_prop;
793
794 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
795 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
796 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
797 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
798}
799
800/*
801 * goya_late_init - GOYA late initialization code
802 *
803 * @hdev: pointer to hl_device structure
804 *
805 * Get ArmCP info and send message to CPU to enable PCI access
806 */
807static int goya_late_init(struct hl_device *hdev)
808{
809 struct asic_fixed_properties *prop = &hdev->asic_prop;
810 struct goya_device *goya = hdev->asic_specific;
811 int rc;
812
813 rc = goya->armcp_info_get(hdev);
814 if (rc) {
815 dev_err(hdev->dev, "Failed to get armcp info\n");
816 return rc;
817 }
818
819 /* Now that we have the DRAM size in ASIC prop, we need to check
820 * its size and configure the DMA_IF DDR wrap protection (which is in
821 * the MMU block) accordingly. The value is the log2 of the DRAM size
822 */
823 WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
824
825 rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
826 if (rc) {
827 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
828 return rc;
829 }
830
831 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
832 GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
833
834 goya_fetch_psoc_frequency(hdev);
835
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200836 rc = goya_mmu_clear_pgt_range(hdev);
837 if (rc) {
838 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
839 goto disable_pci_access;
840 }
841
Oded Gabbayd91389b2019-02-16 00:39:19 +0200842 return 0;
Omer Shpigelman0feaf862019-02-16 00:39:22 +0200843
844disable_pci_access:
845 goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
846
847 return rc;
Oded Gabbayd91389b2019-02-16 00:39:19 +0200848}
849
850/*
851 * goya_late_fini - GOYA late tear-down code
852 *
853 * @hdev: pointer to hl_device structure
854 *
855 * Free sensors allocated structures
856 */
857void goya_late_fini(struct hl_device *hdev)
858{
859 const struct hwmon_channel_info **channel_info_arr;
860 int i = 0;
861
862 if (!hdev->hl_chip_info->info)
863 return;
864
865 channel_info_arr = hdev->hl_chip_info->info;
866
867 while (channel_info_arr[i]) {
868 kfree(channel_info_arr[i]->config);
869 kfree(channel_info_arr[i]);
870 i++;
871 }
872
873 kfree(channel_info_arr);
874
875 hdev->hl_chip_info->info = NULL;
876}
877
878/*
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200879 * goya_sw_init - Goya software initialization code
880 *
881 * @hdev: pointer to hl_device structure
882 *
883 */
884static int goya_sw_init(struct hl_device *hdev)
885{
886 struct goya_device *goya;
887 int rc;
888
889 /* Allocate device structure */
890 goya = kzalloc(sizeof(*goya), GFP_KERNEL);
891 if (!goya)
892 return -ENOMEM;
893
Oded Gabbay9494a8d2019-02-16 00:39:17 +0200894 goya->test_cpu_queue = goya_test_cpu_queue;
Oded Gabbayd91389b2019-02-16 00:39:19 +0200895 goya->armcp_info_get = goya_armcp_info_get;
Oded Gabbay9494a8d2019-02-16 00:39:17 +0200896
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200897 /* according to goya_init_iatu */
898 goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
Oded Gabbayd91389b2019-02-16 00:39:19 +0200899
900 goya->mme_clk = GOYA_PLL_FREQ_LOW;
901 goya->tpc_clk = GOYA_PLL_FREQ_LOW;
902 goya->ic_clk = GOYA_PLL_FREQ_LOW;
903
Oded Gabbay99b9d7b2019-02-16 00:39:13 +0200904 hdev->asic_specific = goya;
905
906 /* Create DMA pool for small allocations */
907 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
908 &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
909 if (!hdev->dma_pool) {
910 dev_err(hdev->dev, "failed to create DMA pool\n");
911 rc = -ENOMEM;
912 goto free_goya_device;
913 }
914
915 hdev->cpu_accessible_dma_mem =
916 hdev->asic_funcs->dma_alloc_coherent(hdev,
917 CPU_ACCESSIBLE_MEM_SIZE,
918 &hdev->cpu_accessible_dma_address,
919 GFP_KERNEL | __GFP_ZERO);
920
921 if (!hdev->cpu_accessible_dma_mem) {
922 dev_err(hdev->dev,
923 "failed to allocate %d of dma memory for CPU accessible memory space\n",
924 CPU_ACCESSIBLE_MEM_SIZE);
925 rc = -ENOMEM;
926 goto free_dma_pool;
927 }
928
929 hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1);
930 if (!hdev->cpu_accessible_dma_pool) {
931 dev_err(hdev->dev,
932 "Failed to create CPU accessible DMA pool\n");
933 rc = -ENOMEM;
934 goto free_cpu_pq_dma_mem;
935 }
936
937 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
938 (uintptr_t) hdev->cpu_accessible_dma_mem,
939 CPU_ACCESSIBLE_MEM_SIZE, -1);
940 if (rc) {
941 dev_err(hdev->dev,
942 "Failed to add memory to CPU accessible DMA pool\n");
943 rc = -EFAULT;
944 goto free_cpu_pq_pool;
945 }
946
947 spin_lock_init(&goya->hw_queues_lock);
948
949 return 0;
950
951free_cpu_pq_pool:
952 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
953free_cpu_pq_dma_mem:
954 hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
955 hdev->cpu_accessible_dma_mem,
956 hdev->cpu_accessible_dma_address);
957free_dma_pool:
958 dma_pool_destroy(hdev->dma_pool);
959free_goya_device:
960 kfree(goya);
961
962 return rc;
963}
964
965/*
966 * goya_sw_fini - Goya software tear-down code
967 *
968 * @hdev: pointer to hl_device structure
969 *
970 */
971int goya_sw_fini(struct hl_device *hdev)
972{
973 struct goya_device *goya = hdev->asic_specific;
974
975 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
976
977 hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
978 hdev->cpu_accessible_dma_mem,
979 hdev->cpu_accessible_dma_address);
980
981 dma_pool_destroy(hdev->dma_pool);
982
983 kfree(goya);
984
985 return 0;
986}
987
Oded Gabbay9494a8d2019-02-16 00:39:17 +0200988static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
989 dma_addr_t bus_address)
990{
991 struct goya_device *goya = hdev->asic_specific;
992 u32 mtr_base_lo, mtr_base_hi;
993 u32 so_base_lo, so_base_hi;
994 u32 gic_base_lo, gic_base_hi;
995 u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
996
997 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
998 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
999 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1000 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1001
1002 gic_base_lo =
1003 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1004 gic_base_hi =
1005 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1006
1007 WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
1008 WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
1009
1010 WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
1011 WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
1012 WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
1013
1014 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1015 WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1016 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1017 WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1018 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1019 WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1020 WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
1021 GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
1022
1023 /* PQ has buffer of 2 cache lines, while CQ has 8 lines */
1024 WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
1025 WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
1026
Oded Gabbay1251f232019-02-16 00:39:18 +02001027 if (goya->hw_cap_initialized & HW_CAP_MMU)
1028 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_PARTLY_TRUSTED);
Oded Gabbay9494a8d2019-02-16 00:39:17 +02001029 else
Oded Gabbay1251f232019-02-16 00:39:18 +02001030 WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
Oded Gabbay9494a8d2019-02-16 00:39:17 +02001031
1032 WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN);
1033 WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
1034}
1035
1036static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
1037{
1038 u32 gic_base_lo, gic_base_hi;
1039 u64 sob_addr;
1040 u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
1041
1042 gic_base_lo =
1043 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1044 gic_base_hi =
1045 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1046
1047 WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
1048 WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
1049 WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
1050 GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
1051
1052 if (dma_id) {
1053 sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
1054 (dma_id - 1) * 4;
1055 WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off,
1056 lower_32_bits(sob_addr));
1057 WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off,
1058 upper_32_bits(sob_addr));
1059 WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
1060 }
1061}
1062
1063/*
1064 * goya_init_dma_qmans - Initialize QMAN DMA registers
1065 *
1066 * @hdev: pointer to hl_device structure
1067 *
1068 * Initialize the H/W registers of the QMAN DMA channels
1069 *
1070 */
1071static void goya_init_dma_qmans(struct hl_device *hdev)
1072{
1073 struct goya_device *goya = hdev->asic_specific;
1074 struct hl_hw_queue *q;
1075 dma_addr_t bus_address;
1076 int i;
1077
1078 if (goya->hw_cap_initialized & HW_CAP_DMA)
1079 return;
1080
1081 q = &hdev->kernel_queues[0];
1082
1083 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
1084 bus_address = q->bus_address +
1085 hdev->asic_prop.host_phys_base_address;
1086
1087 goya_init_dma_qman(hdev, i, bus_address);
1088 goya_init_dma_ch(hdev, i);
1089 }
1090
1091 goya->hw_cap_initialized |= HW_CAP_DMA;
1092}
1093
1094/*
1095 * goya_disable_external_queues - Disable external queues
1096 *
1097 * @hdev: pointer to hl_device structure
1098 *
1099 */
1100static void goya_disable_external_queues(struct hl_device *hdev)
1101{
1102 WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
1103 WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
1104 WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
1105 WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
1106 WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
1107}
1108
1109static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
1110 u32 cp_sts_reg, u32 glbl_sts0_reg)
1111{
1112 int rc;
1113 u32 status;
1114
1115 /* use the values of TPC0 as they are all the same*/
1116
1117 WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
1118
1119 status = RREG32(cp_sts_reg);
1120 if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
1121 rc = hl_poll_timeout(
1122 hdev,
1123 cp_sts_reg,
1124 status,
1125 !(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
1126 1000,
1127 QMAN_FENCE_TIMEOUT_USEC);
1128
1129 /* if QMAN is stuck in fence no need to check for stop */
1130 if (rc)
1131 return 0;
1132 }
1133
1134 rc = hl_poll_timeout(
1135 hdev,
1136 glbl_sts0_reg,
1137 status,
1138 (status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
1139 1000,
1140 QMAN_STOP_TIMEOUT_USEC);
1141
1142 if (rc) {
1143 dev_err(hdev->dev,
1144 "Timeout while waiting for QMAN to stop\n");
1145 return -EINVAL;
1146 }
1147
1148 return 0;
1149}
1150
1151/*
1152 * goya_stop_external_queues - Stop external queues
1153 *
1154 * @hdev: pointer to hl_device structure
1155 *
1156 * Returns 0 on success
1157 *
1158 */
1159static int goya_stop_external_queues(struct hl_device *hdev)
1160{
1161 int rc, retval = 0;
1162
1163 rc = goya_stop_queue(hdev,
1164 mmDMA_QM_0_GLBL_CFG1,
1165 mmDMA_QM_0_CP_STS,
1166 mmDMA_QM_0_GLBL_STS0);
1167
1168 if (rc) {
1169 dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
1170 retval = -EIO;
1171 }
1172
1173 rc = goya_stop_queue(hdev,
1174 mmDMA_QM_1_GLBL_CFG1,
1175 mmDMA_QM_1_CP_STS,
1176 mmDMA_QM_1_GLBL_STS0);
1177
1178 if (rc) {
1179 dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
1180 retval = -EIO;
1181 }
1182
1183 rc = goya_stop_queue(hdev,
1184 mmDMA_QM_2_GLBL_CFG1,
1185 mmDMA_QM_2_CP_STS,
1186 mmDMA_QM_2_GLBL_STS0);
1187
1188 if (rc) {
1189 dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
1190 retval = -EIO;
1191 }
1192
1193 rc = goya_stop_queue(hdev,
1194 mmDMA_QM_3_GLBL_CFG1,
1195 mmDMA_QM_3_CP_STS,
1196 mmDMA_QM_3_GLBL_STS0);
1197
1198 if (rc) {
1199 dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
1200 retval = -EIO;
1201 }
1202
1203 rc = goya_stop_queue(hdev,
1204 mmDMA_QM_4_GLBL_CFG1,
1205 mmDMA_QM_4_CP_STS,
1206 mmDMA_QM_4_GLBL_STS0);
1207
1208 if (rc) {
1209 dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
1210 retval = -EIO;
1211 }
1212
1213 return retval;
1214}
1215
1216static void goya_resume_external_queues(struct hl_device *hdev)
1217{
1218 WREG32(mmDMA_QM_0_GLBL_CFG1, 0);
1219 WREG32(mmDMA_QM_1_GLBL_CFG1, 0);
1220 WREG32(mmDMA_QM_2_GLBL_CFG1, 0);
1221 WREG32(mmDMA_QM_3_GLBL_CFG1, 0);
1222 WREG32(mmDMA_QM_4_GLBL_CFG1, 0);
1223}
1224
1225/*
1226 * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
1227 *
1228 * @hdev: pointer to hl_device structure
1229 *
1230 * Returns 0 on success
1231 *
1232 */
1233int goya_init_cpu_queues(struct hl_device *hdev)
1234{
1235 struct goya_device *goya = hdev->asic_specific;
Oded Gabbay1251f232019-02-16 00:39:18 +02001236 struct hl_eq *eq;
Oded Gabbay9494a8d2019-02-16 00:39:17 +02001237 dma_addr_t bus_address;
1238 u32 status;
1239 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
1240 int err;
1241
1242 if (!hdev->cpu_queues_enable)
1243 return 0;
1244
1245 if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
1246 return 0;
1247
Oded Gabbay1251f232019-02-16 00:39:18 +02001248 eq = &hdev->event_queue;
1249
Oded Gabbay9494a8d2019-02-16 00:39:17 +02001250 bus_address = cpu_pq->bus_address +
1251 hdev->asic_prop.host_phys_base_address;
1252 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0, lower_32_bits(bus_address));
1253 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1, upper_32_bits(bus_address));
1254
Oded Gabbay1251f232019-02-16 00:39:18 +02001255 bus_address = eq->bus_address + hdev->asic_prop.host_phys_base_address;
1256 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(bus_address));
1257 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(bus_address));
1258
Oded Gabbay9494a8d2019-02-16 00:39:17 +02001259 bus_address = hdev->cpu_accessible_dma_address +
1260 hdev->asic_prop.host_phys_base_address;
1261 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, lower_32_bits(bus_address));
1262 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, upper_32_bits(bus_address));
1263
1264 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
Oded Gabbay1251f232019-02-16 00:39:18 +02001265 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
Oded Gabbay9494a8d2019-02-16 00:39:17 +02001266 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, CPU_ACCESSIBLE_MEM_SIZE);
1267
1268 /* Used for EQ CI */
1269 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
1270
1271 WREG32(mmCPU_IF_PF_PQ_PI, 0);
1272
1273 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP);
1274
1275 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
1276 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
1277
1278 err = hl_poll_timeout(
1279 hdev,
1280 mmPSOC_GLOBAL_CONF_SCRATCHPAD_7,
1281 status,
1282 (status == PQ_INIT_STATUS_READY_FOR_HOST),
1283 1000,
1284 GOYA_CPU_TIMEOUT_USEC);
1285
1286 if (err) {
1287 dev_err(hdev->dev,
1288 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
1289 return -EIO;
1290 }
1291
1292 goya->hw_cap_initialized |= HW_CAP_CPU_Q;
1293 return 0;
1294}
1295
Oded Gabbay839c4802019-02-16 00:39:16 +02001296static void goya_set_pll_refclk(struct hl_device *hdev)
1297{
1298 WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
1299 WREG32(mmCPU_PLL_DIV_SEL_1, 0x0);
1300 WREG32(mmCPU_PLL_DIV_SEL_2, 0x0);
1301 WREG32(mmCPU_PLL_DIV_SEL_3, 0x0);
1302
1303 WREG32(mmIC_PLL_DIV_SEL_0, 0x0);
1304 WREG32(mmIC_PLL_DIV_SEL_1, 0x0);
1305 WREG32(mmIC_PLL_DIV_SEL_2, 0x0);
1306 WREG32(mmIC_PLL_DIV_SEL_3, 0x0);
1307
1308 WREG32(mmMC_PLL_DIV_SEL_0, 0x0);
1309 WREG32(mmMC_PLL_DIV_SEL_1, 0x0);
1310 WREG32(mmMC_PLL_DIV_SEL_2, 0x0);
1311 WREG32(mmMC_PLL_DIV_SEL_3, 0x0);
1312
1313 WREG32(mmPSOC_MME_PLL_DIV_SEL_0, 0x0);
1314 WREG32(mmPSOC_MME_PLL_DIV_SEL_1, 0x0);
1315 WREG32(mmPSOC_MME_PLL_DIV_SEL_2, 0x0);
1316 WREG32(mmPSOC_MME_PLL_DIV_SEL_3, 0x0);
1317
1318 WREG32(mmPSOC_PCI_PLL_DIV_SEL_0, 0x0);
1319 WREG32(mmPSOC_PCI_PLL_DIV_SEL_1, 0x0);
1320 WREG32(mmPSOC_PCI_PLL_DIV_SEL_2, 0x0);
1321 WREG32(mmPSOC_PCI_PLL_DIV_SEL_3, 0x0);
1322
1323 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_0, 0x0);
1324 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_1, 0x0);
1325 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_2, 0x0);
1326 WREG32(mmPSOC_EMMC_PLL_DIV_SEL_3, 0x0);
1327
1328 WREG32(mmTPC_PLL_DIV_SEL_0, 0x0);
1329 WREG32(mmTPC_PLL_DIV_SEL_1, 0x0);
1330 WREG32(mmTPC_PLL_DIV_SEL_2, 0x0);
1331 WREG32(mmTPC_PLL_DIV_SEL_3, 0x0);
1332}
1333
1334static void goya_disable_clk_rlx(struct hl_device *hdev)
1335{
1336 WREG32(mmPSOC_MME_PLL_CLK_RLX_0, 0x100010);
1337 WREG32(mmIC_PLL_CLK_RLX_0, 0x100010);
1338}
1339
1340static void _goya_tpc_mbist_workaround(struct hl_device *hdev, u8 tpc_id)
1341{
1342 u64 tpc_eml_address;
1343 u32 val, tpc_offset, tpc_eml_offset, tpc_slm_offset;
1344 int err, slm_index;
1345
1346 tpc_offset = tpc_id * 0x40000;
1347 tpc_eml_offset = tpc_id * 0x200000;
1348 tpc_eml_address = (mmTPC0_EML_CFG_BASE + tpc_eml_offset - CFG_BASE);
1349 tpc_slm_offset = tpc_eml_address + 0x100000;
1350
1351 /*
1352 * Workaround for Bug H2 #2443 :
1353 * "TPC SB is not initialized on chip reset"
1354 */
1355
1356 val = RREG32(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset);
1357 if (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_ACTIVE_MASK)
1358 dev_warn(hdev->dev, "TPC%d MBIST ACTIVE is not cleared\n",
1359 tpc_id);
1360
1361 WREG32(mmTPC0_CFG_FUNC_MBIST_PAT + tpc_offset, val & 0xFFFFF000);
1362
1363 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_0 + tpc_offset, 0x37FF);
1364 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_1 + tpc_offset, 0x303F);
1365 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_2 + tpc_offset, 0x71FF);
1366 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_3 + tpc_offset, 0x71FF);
1367 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_4 + tpc_offset, 0x70FF);
1368 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_5 + tpc_offset, 0x70FF);
1369 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_6 + tpc_offset, 0x70FF);
1370 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_7 + tpc_offset, 0x70FF);
1371 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_8 + tpc_offset, 0x70FF);
1372 WREG32(mmTPC0_CFG_FUNC_MBIST_MEM_9 + tpc_offset, 0x70FF);
1373
1374 WREG32_OR(mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1375 1 << TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_START_SHIFT);
1376
1377 err = hl_poll_timeout(
1378 hdev,
1379 mmTPC0_CFG_FUNC_MBIST_CNTRL + tpc_offset,
1380 val,
1381 (val & TPC0_CFG_FUNC_MBIST_CNTRL_MBIST_DONE_MASK),
1382 1000,
1383 HL_DEVICE_TIMEOUT_USEC);
1384
1385 if (err)
1386 dev_err(hdev->dev,
1387 "Timeout while waiting for TPC%d MBIST DONE\n", tpc_id);
1388
1389 WREG32_OR(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1390 1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT);
1391
1392 msleep(GOYA_RESET_WAIT_MSEC);
1393
1394 WREG32_AND(mmTPC0_EML_CFG_DBG_CNT + tpc_eml_offset,
1395 ~(1 << TPC0_EML_CFG_DBG_CNT_CORE_RST_SHIFT));
1396
1397 msleep(GOYA_RESET_WAIT_MSEC);
1398
1399 for (slm_index = 0 ; slm_index < 256 ; slm_index++)
1400 WREG32(tpc_slm_offset + (slm_index << 2), 0);
1401
1402 val = RREG32(tpc_slm_offset);
1403}
1404
1405static void goya_tpc_mbist_workaround(struct hl_device *hdev)
1406{
1407 struct goya_device *goya = hdev->asic_specific;
1408 int i;
1409
1410 if (hdev->pldm)
1411 return;
1412
1413 if (goya->hw_cap_initialized & HW_CAP_TPC_MBIST)
1414 return;
1415
1416 /* Workaround for H2 #2443 */
1417
1418 for (i = 0 ; i < TPC_MAX_NUM ; i++)
1419 _goya_tpc_mbist_workaround(hdev, i);
1420
1421 goya->hw_cap_initialized |= HW_CAP_TPC_MBIST;
1422}
1423
1424/*
1425 * goya_init_golden_registers - Initialize golden registers
1426 *
1427 * @hdev: pointer to hl_device structure
1428 *
1429 * Initialize the H/W registers of the device
1430 *
1431 */
1432static void goya_init_golden_registers(struct hl_device *hdev)
1433{
1434 struct goya_device *goya = hdev->asic_specific;
1435 u32 polynom[10], tpc_intr_mask, offset;
1436 int i;
1437
1438 if (goya->hw_cap_initialized & HW_CAP_GOLDEN)
1439 return;
1440
1441 polynom[0] = 0x00020080;
1442 polynom[1] = 0x00401000;
1443 polynom[2] = 0x00200800;
1444 polynom[3] = 0x00002000;
1445 polynom[4] = 0x00080200;
1446 polynom[5] = 0x00040100;
1447 polynom[6] = 0x00100400;
1448 polynom[7] = 0x00004000;
1449 polynom[8] = 0x00010000;
1450 polynom[9] = 0x00008000;
1451
1452 /* Mask all arithmetic interrupts from TPC */
1453 tpc_intr_mask = 0x7FFF;
1454
1455 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x20000) {
1456 WREG32(mmSRAM_Y0_X0_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1457 WREG32(mmSRAM_Y0_X1_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1458 WREG32(mmSRAM_Y0_X2_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1459 WREG32(mmSRAM_Y0_X3_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1460 WREG32(mmSRAM_Y0_X4_RTR_HBW_RD_RQ_L_ARB + offset, 0x302);
1461
1462 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_L_ARB + offset, 0x204);
1463 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_L_ARB + offset, 0x204);
1464 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_L_ARB + offset, 0x204);
1465 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_L_ARB + offset, 0x204);
1466 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_L_ARB + offset, 0x204);
1467
1468
1469 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_E_ARB + offset, 0x206);
1470 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_E_ARB + offset, 0x206);
1471 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_E_ARB + offset, 0x206);
1472 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_E_ARB + offset, 0x207);
1473 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_E_ARB + offset, 0x207);
1474
1475 WREG32(mmSRAM_Y0_X0_RTR_HBW_DATA_W_ARB + offset, 0x207);
1476 WREG32(mmSRAM_Y0_X1_RTR_HBW_DATA_W_ARB + offset, 0x207);
1477 WREG32(mmSRAM_Y0_X2_RTR_HBW_DATA_W_ARB + offset, 0x206);
1478 WREG32(mmSRAM_Y0_X3_RTR_HBW_DATA_W_ARB + offset, 0x206);
1479 WREG32(mmSRAM_Y0_X4_RTR_HBW_DATA_W_ARB + offset, 0x206);
1480
1481 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_E_ARB + offset, 0x101);
1482 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_E_ARB + offset, 0x102);
1483 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_E_ARB + offset, 0x103);
1484 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_E_ARB + offset, 0x104);
1485 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_E_ARB + offset, 0x105);
1486
1487 WREG32(mmSRAM_Y0_X0_RTR_HBW_WR_RS_W_ARB + offset, 0x105);
1488 WREG32(mmSRAM_Y0_X1_RTR_HBW_WR_RS_W_ARB + offset, 0x104);
1489 WREG32(mmSRAM_Y0_X2_RTR_HBW_WR_RS_W_ARB + offset, 0x103);
1490 WREG32(mmSRAM_Y0_X3_RTR_HBW_WR_RS_W_ARB + offset, 0x102);
1491 WREG32(mmSRAM_Y0_X4_RTR_HBW_WR_RS_W_ARB + offset, 0x101);
1492 }
1493
1494 WREG32(mmMME_STORE_MAX_CREDIT, 0x21);
1495 WREG32(mmMME_AGU, 0x0f0f0f10);
1496 WREG32(mmMME_SEI_MASK, ~0x0);
1497
1498 WREG32(mmMME6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1499 WREG32(mmMME5_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1500 WREG32(mmMME4_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1501 WREG32(mmMME3_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1502 WREG32(mmMME2_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1503 WREG32(mmMME1_RTR_HBW_RD_RQ_N_ARB, 0x07010701);
1504 WREG32(mmMME6_RTR_HBW_RD_RQ_S_ARB, 0x04010401);
1505 WREG32(mmMME5_RTR_HBW_RD_RQ_S_ARB, 0x04050401);
1506 WREG32(mmMME4_RTR_HBW_RD_RQ_S_ARB, 0x03070301);
1507 WREG32(mmMME3_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1508 WREG32(mmMME2_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1509 WREG32(mmMME1_RTR_HBW_RD_RQ_S_ARB, 0x01050105);
1510 WREG32(mmMME6_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1511 WREG32(mmMME5_RTR_HBW_RD_RQ_W_ARB, 0x01010501);
1512 WREG32(mmMME4_RTR_HBW_RD_RQ_W_ARB, 0x01040301);
1513 WREG32(mmMME3_RTR_HBW_RD_RQ_W_ARB, 0x01030401);
1514 WREG32(mmMME2_RTR_HBW_RD_RQ_W_ARB, 0x01040101);
1515 WREG32(mmMME1_RTR_HBW_RD_RQ_W_ARB, 0x01050101);
1516 WREG32(mmMME6_RTR_HBW_WR_RQ_N_ARB, 0x02020202);
1517 WREG32(mmMME5_RTR_HBW_WR_RQ_N_ARB, 0x01070101);
1518 WREG32(mmMME4_RTR_HBW_WR_RQ_N_ARB, 0x02020201);
1519 WREG32(mmMME3_RTR_HBW_WR_RQ_N_ARB, 0x07020701);
1520 WREG32(mmMME2_RTR_HBW_WR_RQ_N_ARB, 0x01020101);
1521 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1522 WREG32(mmMME6_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1523 WREG32(mmMME5_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1524 WREG32(mmMME4_RTR_HBW_WR_RQ_S_ARB, 0x07020701);
1525 WREG32(mmMME3_RTR_HBW_WR_RQ_S_ARB, 0x02020201);
1526 WREG32(mmMME2_RTR_HBW_WR_RQ_S_ARB, 0x01070101);
1527 WREG32(mmMME1_RTR_HBW_WR_RQ_S_ARB, 0x01020102);
1528 WREG32(mmMME6_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1529 WREG32(mmMME5_RTR_HBW_WR_RQ_W_ARB, 0x01020701);
1530 WREG32(mmMME4_RTR_HBW_WR_RQ_W_ARB, 0x07020707);
1531 WREG32(mmMME3_RTR_HBW_WR_RQ_W_ARB, 0x01020201);
1532 WREG32(mmMME2_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1533 WREG32(mmMME1_RTR_HBW_WR_RQ_W_ARB, 0x01070201);
1534 WREG32(mmMME6_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1535 WREG32(mmMME5_RTR_HBW_RD_RS_N_ARB, 0x01070102);
1536 WREG32(mmMME4_RTR_HBW_RD_RS_N_ARB, 0x01060102);
1537 WREG32(mmMME3_RTR_HBW_RD_RS_N_ARB, 0x01040102);
1538 WREG32(mmMME2_RTR_HBW_RD_RS_N_ARB, 0x01020102);
1539 WREG32(mmMME1_RTR_HBW_RD_RS_N_ARB, 0x01020107);
1540 WREG32(mmMME6_RTR_HBW_RD_RS_S_ARB, 0x01020106);
1541 WREG32(mmMME5_RTR_HBW_RD_RS_S_ARB, 0x01020102);
1542 WREG32(mmMME4_RTR_HBW_RD_RS_S_ARB, 0x01040102);
1543 WREG32(mmMME3_RTR_HBW_RD_RS_S_ARB, 0x01060102);
1544 WREG32(mmMME2_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1545 WREG32(mmMME1_RTR_HBW_RD_RS_S_ARB, 0x01070102);
1546 WREG32(mmMME6_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1547 WREG32(mmMME5_RTR_HBW_RD_RS_E_ARB, 0x01020702);
1548 WREG32(mmMME4_RTR_HBW_RD_RS_E_ARB, 0x01040602);
1549 WREG32(mmMME3_RTR_HBW_RD_RS_E_ARB, 0x01060402);
1550 WREG32(mmMME2_RTR_HBW_RD_RS_E_ARB, 0x01070202);
1551 WREG32(mmMME1_RTR_HBW_RD_RS_E_ARB, 0x01070102);
1552 WREG32(mmMME6_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1553 WREG32(mmMME5_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1554 WREG32(mmMME4_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1555 WREG32(mmMME3_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1556 WREG32(mmMME2_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1557 WREG32(mmMME1_RTR_HBW_RD_RS_W_ARB, 0x01060401);
1558 WREG32(mmMME6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1559 WREG32(mmMME5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1560 WREG32(mmMME4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1561 WREG32(mmMME3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1562 WREG32(mmMME2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1563 WREG32(mmMME1_RTR_HBW_WR_RS_N_ARB, 0x01010107);
1564 WREG32(mmMME6_RTR_HBW_WR_RS_S_ARB, 0x01010107);
1565 WREG32(mmMME5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1566 WREG32(mmMME4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1567 WREG32(mmMME3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1568 WREG32(mmMME2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1569 WREG32(mmMME1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1570 WREG32(mmMME6_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1571 WREG32(mmMME5_RTR_HBW_WR_RS_E_ARB, 0x01010501);
1572 WREG32(mmMME4_RTR_HBW_WR_RS_E_ARB, 0x01040301);
1573 WREG32(mmMME3_RTR_HBW_WR_RS_E_ARB, 0x01030401);
1574 WREG32(mmMME2_RTR_HBW_WR_RS_E_ARB, 0x01040101);
1575 WREG32(mmMME1_RTR_HBW_WR_RS_E_ARB, 0x01050101);
1576 WREG32(mmMME6_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1577 WREG32(mmMME5_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1578 WREG32(mmMME4_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1579 WREG32(mmMME3_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1580 WREG32(mmMME2_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1581 WREG32(mmMME1_RTR_HBW_WR_RS_W_ARB, 0x01010101);
1582
1583 WREG32(mmTPC1_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1584 WREG32(mmTPC1_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1585 WREG32(mmTPC1_RTR_HBW_RD_RQ_E_ARB, 0x01060101);
1586 WREG32(mmTPC1_RTR_HBW_WR_RQ_N_ARB, 0x02020102);
1587 WREG32(mmTPC1_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1588 WREG32(mmTPC1_RTR_HBW_WR_RQ_E_ARB, 0x02070202);
1589 WREG32(mmTPC1_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1590 WREG32(mmTPC1_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1591 WREG32(mmTPC1_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1592 WREG32(mmTPC1_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1593 WREG32(mmTPC1_RTR_HBW_WR_RS_S_ARB, 0x01050101);
1594 WREG32(mmTPC1_RTR_HBW_WR_RS_W_ARB, 0x01050101);
1595
1596 WREG32(mmTPC2_RTR_HBW_RD_RQ_N_ARB, 0x01020101);
1597 WREG32(mmTPC2_RTR_HBW_RD_RQ_S_ARB, 0x01050101);
1598 WREG32(mmTPC2_RTR_HBW_RD_RQ_E_ARB, 0x01010201);
1599 WREG32(mmTPC2_RTR_HBW_WR_RQ_N_ARB, 0x02040102);
1600 WREG32(mmTPC2_RTR_HBW_WR_RQ_S_ARB, 0x01050101);
1601 WREG32(mmTPC2_RTR_HBW_WR_RQ_E_ARB, 0x02060202);
1602 WREG32(mmTPC2_RTR_HBW_RD_RS_N_ARB, 0x01020201);
1603 WREG32(mmTPC2_RTR_HBW_RD_RS_S_ARB, 0x01070201);
1604 WREG32(mmTPC2_RTR_HBW_RD_RS_W_ARB, 0x01070202);
1605 WREG32(mmTPC2_RTR_HBW_WR_RS_N_ARB, 0x01010101);
1606 WREG32(mmTPC2_RTR_HBW_WR_RS_S_ARB, 0x01040101);
1607 WREG32(mmTPC2_RTR_HBW_WR_RS_W_ARB, 0x01040101);
1608
1609 WREG32(mmTPC3_RTR_HBW_RD_RQ_N_ARB, 0x01030101);
1610 WREG32(mmTPC3_RTR_HBW_RD_RQ_S_ARB, 0x01040101);
1611 WREG32(mmTPC3_RTR_HBW_RD_RQ_E_ARB, 0x01040301);
1612 WREG32(mmTPC3_RTR_HBW_WR_RQ_N_ARB, 0x02060102);
1613 WREG32(mmTPC3_RTR_HBW_WR_RQ_S_ARB, 0x01040101);
1614 WREG32(mmTPC3_RTR_HBW_WR_RQ_E_ARB, 0x01040301);
1615 WREG32(mmTPC3_RTR_HBW_RD_RS_N_ARB, 0x01040201);
1616 WREG32(mmTPC3_RTR_HBW_RD_RS_S_ARB, 0x01060201);
1617 WREG32(mmTPC3_RTR_HBW_RD_RS_W_ARB, 0x01060402);
1618 WREG32(mmTPC3_RTR_HBW_WR_RS_N_ARB, 0x01020101);
1619 WREG32(mmTPC3_RTR_HBW_WR_RS_S_ARB, 0x01030101);
1620 WREG32(mmTPC3_RTR_HBW_WR_RS_W_ARB, 0x01030401);
1621
1622 WREG32(mmTPC4_RTR_HBW_RD_RQ_N_ARB, 0x01040101);
1623 WREG32(mmTPC4_RTR_HBW_RD_RQ_S_ARB, 0x01030101);
1624 WREG32(mmTPC4_RTR_HBW_RD_RQ_E_ARB, 0x01030401);
1625 WREG32(mmTPC4_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1626 WREG32(mmTPC4_RTR_HBW_WR_RQ_S_ARB, 0x01030101);
1627 WREG32(mmTPC4_RTR_HBW_WR_RQ_E_ARB, 0x02060702);
1628 WREG32(mmTPC4_RTR_HBW_RD_RS_N_ARB, 0x01060201);
1629 WREG32(mmTPC4_RTR_HBW_RD_RS_S_ARB, 0x01040201);
1630 WREG32(mmTPC4_RTR_HBW_RD_RS_W_ARB, 0x01040602);
1631 WREG32(mmTPC4_RTR_HBW_WR_RS_N_ARB, 0x01030101);
1632 WREG32(mmTPC4_RTR_HBW_WR_RS_S_ARB, 0x01020101);
1633 WREG32(mmTPC4_RTR_HBW_WR_RS_W_ARB, 0x01040301);
1634
1635 WREG32(mmTPC5_RTR_HBW_RD_RQ_N_ARB, 0x01050101);
1636 WREG32(mmTPC5_RTR_HBW_RD_RQ_S_ARB, 0x01020101);
1637 WREG32(mmTPC5_RTR_HBW_RD_RQ_E_ARB, 0x01200501);
1638 WREG32(mmTPC5_RTR_HBW_WR_RQ_N_ARB, 0x02070102);
1639 WREG32(mmTPC5_RTR_HBW_WR_RQ_S_ARB, 0x01020101);
1640 WREG32(mmTPC5_RTR_HBW_WR_RQ_E_ARB, 0x02020602);
1641 WREG32(mmTPC5_RTR_HBW_RD_RS_N_ARB, 0x01070201);
1642 WREG32(mmTPC5_RTR_HBW_RD_RS_S_ARB, 0x01020201);
1643 WREG32(mmTPC5_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1644 WREG32(mmTPC5_RTR_HBW_WR_RS_N_ARB, 0x01040101);
1645 WREG32(mmTPC5_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1646 WREG32(mmTPC5_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1647
1648 WREG32(mmTPC6_RTR_HBW_RD_RQ_N_ARB, 0x01010101);
1649 WREG32(mmTPC6_RTR_HBW_RD_RQ_S_ARB, 0x01010101);
1650 WREG32(mmTPC6_RTR_HBW_RD_RQ_E_ARB, 0x01010601);
1651 WREG32(mmTPC6_RTR_HBW_WR_RQ_N_ARB, 0x01010101);
1652 WREG32(mmTPC6_RTR_HBW_WR_RQ_S_ARB, 0x01010101);
1653 WREG32(mmTPC6_RTR_HBW_WR_RQ_E_ARB, 0x02020702);
1654 WREG32(mmTPC6_RTR_HBW_RD_RS_N_ARB, 0x01010101);
1655 WREG32(mmTPC6_RTR_HBW_RD_RS_S_ARB, 0x01010101);
1656 WREG32(mmTPC6_RTR_HBW_RD_RS_W_ARB, 0x01020702);
1657 WREG32(mmTPC6_RTR_HBW_WR_RS_N_ARB, 0x01050101);
1658 WREG32(mmTPC6_RTR_HBW_WR_RS_S_ARB, 0x01010101);
1659 WREG32(mmTPC6_RTR_HBW_WR_RS_W_ARB, 0x01010501);
1660
1661 for (i = 0, offset = 0 ; i < 10 ; i++, offset += 4) {
1662 WREG32(mmMME1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1663 WREG32(mmMME2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1664 WREG32(mmMME3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1665 WREG32(mmMME4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1666 WREG32(mmMME5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1667 WREG32(mmMME6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1668
1669 WREG32(mmTPC0_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1670 WREG32(mmTPC1_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1671 WREG32(mmTPC2_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1672 WREG32(mmTPC3_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1673 WREG32(mmTPC4_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1674 WREG32(mmTPC5_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1675 WREG32(mmTPC6_RTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1676 WREG32(mmTPC7_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1677
1678 WREG32(mmPCI_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1679 WREG32(mmDMA_NRTR_SPLIT_COEF_0 + offset, polynom[i] >> 7);
1680 }
1681
1682 for (i = 0, offset = 0 ; i < 6 ; i++, offset += 0x40000) {
1683 WREG32(mmMME1_RTR_SCRAMB_EN + offset,
1684 1 << MME1_RTR_SCRAMB_EN_VAL_SHIFT);
1685 WREG32(mmMME1_RTR_NON_LIN_SCRAMB + offset,
1686 1 << MME1_RTR_NON_LIN_SCRAMB_EN_SHIFT);
1687 }
1688
1689 for (i = 0, offset = 0 ; i < 8 ; i++, offset += 0x40000) {
1690 /*
1691 * Workaround for Bug H2 #2441 :
1692 * "ST.NOP set trace event illegal opcode"
1693 */
1694 WREG32(mmTPC0_CFG_TPC_INTR_MASK + offset, tpc_intr_mask);
1695
1696 WREG32(mmTPC0_NRTR_SCRAMB_EN + offset,
1697 1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
1698 WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
1699 1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1700 }
1701
1702 WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
1703 WREG32(mmDMA_NRTR_NON_LIN_SCRAMB,
1704 1 << DMA_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1705
1706 WREG32(mmPCI_NRTR_SCRAMB_EN, 1 << PCI_NRTR_SCRAMB_EN_VAL_SHIFT);
1707 WREG32(mmPCI_NRTR_NON_LIN_SCRAMB,
1708 1 << PCI_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
1709
1710 /*
1711 * Workaround for H2 #HW-23 bug
1712 * Set DMA max outstanding read requests to 240 on DMA CH 1. Set it
1713 * to 16 on KMD DMA
1714 * We need to limit only these DMAs because the user can only read
1715 * from Host using DMA CH 1
1716 */
1717 WREG32(mmDMA_CH_0_CFG0, 0x0fff0010);
1718 WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
1719
1720 goya->hw_cap_initialized |= HW_CAP_GOLDEN;
1721}
1722
Oded Gabbay9494a8d2019-02-16 00:39:17 +02001723static void goya_init_mme_qman(struct hl_device *hdev)
1724{
1725 u32 mtr_base_lo, mtr_base_hi;
1726 u32 so_base_lo, so_base_hi;
1727 u32 gic_base_lo, gic_base_hi;
1728 u64 qman_base_addr;
1729
1730 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1731 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1732 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1733 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1734
1735 gic_base_lo =
1736 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1737 gic_base_hi =
1738 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1739
1740 qman_base_addr = hdev->asic_prop.sram_base_address +
1741 MME_QMAN_BASE_OFFSET;
1742
1743 WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
1744 WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
1745 WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
1746 WREG32(mmMME_QM_PQ_PI, 0);
1747 WREG32(mmMME_QM_PQ_CI, 0);
1748 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
1749 WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
1750 WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
1751 WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
1752
1753 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1754 WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1755 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1756 WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1757
1758 /* QMAN CQ has 8 cache lines */
1759 WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
1760
1761 WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
1762 WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
1763
1764 WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
1765
1766 WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
1767
1768 WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
1769
1770 WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
1771}
1772
1773static void goya_init_mme_cmdq(struct hl_device *hdev)
1774{
1775 u32 mtr_base_lo, mtr_base_hi;
1776 u32 so_base_lo, so_base_hi;
1777 u32 gic_base_lo, gic_base_hi;
1778 u64 qman_base_addr;
1779
1780 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1781 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1782 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1783 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1784
1785 gic_base_lo =
1786 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1787 gic_base_hi =
1788 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1789
1790 qman_base_addr = hdev->asic_prop.sram_base_address +
1791 MME_QMAN_BASE_OFFSET;
1792
1793 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
1794 WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
1795 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO, so_base_lo);
1796 WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
1797
1798 /* CMDQ CQ has 20 cache lines */
1799 WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
1800
1801 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
1802 WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
1803
1804 WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
1805
1806 WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
1807
1808 WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
1809
1810 WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
1811}
1812
1813static void goya_init_mme_qmans(struct hl_device *hdev)
1814{
1815 struct goya_device *goya = hdev->asic_specific;
1816 u32 so_base_lo, so_base_hi;
1817
1818 if (goya->hw_cap_initialized & HW_CAP_MME)
1819 return;
1820
1821 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1822 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1823
1824 WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
1825 WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
1826
1827 goya_init_mme_qman(hdev);
1828 goya_init_mme_cmdq(hdev);
1829
1830 goya->hw_cap_initialized |= HW_CAP_MME;
1831}
1832
1833static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
1834{
1835 u32 mtr_base_lo, mtr_base_hi;
1836 u32 so_base_lo, so_base_hi;
1837 u32 gic_base_lo, gic_base_hi;
1838 u64 qman_base_addr;
1839 u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
1840
1841 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1842 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1843 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1844 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1845
1846 gic_base_lo =
1847 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1848 gic_base_hi =
1849 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1850
1851 qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
1852
1853 WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
1854 WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
1855 WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
1856 WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
1857 WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
1858 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
1859 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
1860 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
1861 WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
1862
1863 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1864 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1865 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1866 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1867
1868 WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
1869
1870 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1871 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1872
1873 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
1874 GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
1875
1876 WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
1877
1878 WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
1879
1880 WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
1881}
1882
1883static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
1884{
1885 u32 mtr_base_lo, mtr_base_hi;
1886 u32 so_base_lo, so_base_hi;
1887 u32 gic_base_lo, gic_base_hi;
1888 u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
1889
1890 mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1891 mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
1892 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1893 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1894
1895 gic_base_lo =
1896 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1897 gic_base_hi =
1898 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
1899
1900 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
1901 WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
1902 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
1903 WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
1904
1905 WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
1906
1907 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
1908 WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
1909
1910 WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
1911 GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
1912
1913 WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
1914
1915 WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
1916
1917 WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
1918}
1919
1920static void goya_init_tpc_qmans(struct hl_device *hdev)
1921{
1922 struct goya_device *goya = hdev->asic_specific;
1923 u32 so_base_lo, so_base_hi;
1924 u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
1925 mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
1926 int i;
1927
1928 if (goya->hw_cap_initialized & HW_CAP_TPC)
1929 return;
1930
1931 so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1932 so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
1933
1934 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
1935 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
1936 so_base_lo);
1937 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
1938 so_base_hi);
1939 }
1940
1941 goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
1942 goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
1943 goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
1944 goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
1945 goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
1946 goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
1947 goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
1948 goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
1949
1950 for (i = 0 ; i < TPC_MAX_NUM ; i++)
1951 goya_init_tpc_cmdq(hdev, i);
1952
1953 goya->hw_cap_initialized |= HW_CAP_TPC;
1954}
1955
1956/*
1957 * goya_disable_internal_queues - Disable internal queues
1958 *
1959 * @hdev: pointer to hl_device structure
1960 *
1961 */
1962static void goya_disable_internal_queues(struct hl_device *hdev)
1963{
1964 WREG32(mmMME_QM_GLBL_CFG0, 0);
1965 WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
1966
1967 WREG32(mmTPC0_QM_GLBL_CFG0, 0);
1968 WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
1969
1970 WREG32(mmTPC1_QM_GLBL_CFG0, 0);
1971 WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
1972
1973 WREG32(mmTPC2_QM_GLBL_CFG0, 0);
1974 WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
1975
1976 WREG32(mmTPC3_QM_GLBL_CFG0, 0);
1977 WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
1978
1979 WREG32(mmTPC4_QM_GLBL_CFG0, 0);
1980 WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
1981
1982 WREG32(mmTPC5_QM_GLBL_CFG0, 0);
1983 WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
1984
1985 WREG32(mmTPC6_QM_GLBL_CFG0, 0);
1986 WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
1987
1988 WREG32(mmTPC7_QM_GLBL_CFG0, 0);
1989 WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
1990}
1991
1992/*
1993 * goya_stop_internal_queues - Stop internal queues
1994 *
1995 * @hdev: pointer to hl_device structure
1996 *
1997 * Returns 0 on success
1998 *
1999 */
2000static int goya_stop_internal_queues(struct hl_device *hdev)
2001{
2002 int rc, retval = 0;
2003
2004 /*
2005 * Each queue (QMAN) is a separate H/W logic. That means that each
2006 * QMAN can be stopped independently and failure to stop one does NOT
2007 * mandate we should not try to stop other QMANs
2008 */
2009
2010 rc = goya_stop_queue(hdev,
2011 mmMME_QM_GLBL_CFG1,
2012 mmMME_QM_CP_STS,
2013 mmMME_QM_GLBL_STS0);
2014
2015 if (rc) {
2016 dev_err(hdev->dev, "failed to stop MME QMAN\n");
2017 retval = -EIO;
2018 }
2019
2020 rc = goya_stop_queue(hdev,
2021 mmMME_CMDQ_GLBL_CFG1,
2022 mmMME_CMDQ_CP_STS,
2023 mmMME_CMDQ_GLBL_STS0);
2024
2025 if (rc) {
2026 dev_err(hdev->dev, "failed to stop MME CMDQ\n");
2027 retval = -EIO;
2028 }
2029
2030 rc = goya_stop_queue(hdev,
2031 mmTPC0_QM_GLBL_CFG1,
2032 mmTPC0_QM_CP_STS,
2033 mmTPC0_QM_GLBL_STS0);
2034
2035 if (rc) {
2036 dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
2037 retval = -EIO;
2038 }
2039
2040 rc = goya_stop_queue(hdev,
2041 mmTPC0_CMDQ_GLBL_CFG1,
2042 mmTPC0_CMDQ_CP_STS,
2043 mmTPC0_CMDQ_GLBL_STS0);
2044
2045 if (rc) {
2046 dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
2047 retval = -EIO;
2048 }
2049
2050 rc = goya_stop_queue(hdev,
2051 mmTPC1_QM_GLBL_CFG1,
2052 mmTPC1_QM_CP_STS,
2053 mmTPC1_QM_GLBL_STS0);
2054
2055 if (rc) {
2056 dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
2057 retval = -EIO;
2058 }
2059
2060 rc = goya_stop_queue(hdev,
2061 mmTPC1_CMDQ_GLBL_CFG1,
2062 mmTPC1_CMDQ_CP_STS,
2063 mmTPC1_CMDQ_GLBL_STS0);
2064
2065 if (rc) {
2066 dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
2067 retval = -EIO;
2068 }
2069
2070 rc = goya_stop_queue(hdev,
2071 mmTPC2_QM_GLBL_CFG1,
2072 mmTPC2_QM_CP_STS,
2073 mmTPC2_QM_GLBL_STS0);
2074
2075 if (rc) {
2076 dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
2077 retval = -EIO;
2078 }
2079
2080 rc = goya_stop_queue(hdev,
2081 mmTPC2_CMDQ_GLBL_CFG1,
2082 mmTPC2_CMDQ_CP_STS,
2083 mmTPC2_CMDQ_GLBL_STS0);
2084
2085 if (rc) {
2086 dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
2087 retval = -EIO;
2088 }
2089
2090 rc = goya_stop_queue(hdev,
2091 mmTPC3_QM_GLBL_CFG1,
2092 mmTPC3_QM_CP_STS,
2093 mmTPC3_QM_GLBL_STS0);
2094
2095 if (rc) {
2096 dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
2097 retval = -EIO;
2098 }
2099
2100 rc = goya_stop_queue(hdev,
2101 mmTPC3_CMDQ_GLBL_CFG1,
2102 mmTPC3_CMDQ_CP_STS,
2103 mmTPC3_CMDQ_GLBL_STS0);
2104
2105 if (rc) {
2106 dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
2107 retval = -EIO;
2108 }
2109
2110 rc = goya_stop_queue(hdev,
2111 mmTPC4_QM_GLBL_CFG1,
2112 mmTPC4_QM_CP_STS,
2113 mmTPC4_QM_GLBL_STS0);
2114
2115 if (rc) {
2116 dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
2117 retval = -EIO;
2118 }
2119
2120 rc = goya_stop_queue(hdev,
2121 mmTPC4_CMDQ_GLBL_CFG1,
2122 mmTPC4_CMDQ_CP_STS,
2123 mmTPC4_CMDQ_GLBL_STS0);
2124
2125 if (rc) {
2126 dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
2127 retval = -EIO;
2128 }
2129
2130 rc = goya_stop_queue(hdev,
2131 mmTPC5_QM_GLBL_CFG1,
2132 mmTPC5_QM_CP_STS,
2133 mmTPC5_QM_GLBL_STS0);
2134
2135 if (rc) {
2136 dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
2137 retval = -EIO;
2138 }
2139
2140 rc = goya_stop_queue(hdev,
2141 mmTPC5_CMDQ_GLBL_CFG1,
2142 mmTPC5_CMDQ_CP_STS,
2143 mmTPC5_CMDQ_GLBL_STS0);
2144
2145 if (rc) {
2146 dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
2147 retval = -EIO;
2148 }
2149
2150 rc = goya_stop_queue(hdev,
2151 mmTPC6_QM_GLBL_CFG1,
2152 mmTPC6_QM_CP_STS,
2153 mmTPC6_QM_GLBL_STS0);
2154
2155 if (rc) {
2156 dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
2157 retval = -EIO;
2158 }
2159
2160 rc = goya_stop_queue(hdev,
2161 mmTPC6_CMDQ_GLBL_CFG1,
2162 mmTPC6_CMDQ_CP_STS,
2163 mmTPC6_CMDQ_GLBL_STS0);
2164
2165 if (rc) {
2166 dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
2167 retval = -EIO;
2168 }
2169
2170 rc = goya_stop_queue(hdev,
2171 mmTPC7_QM_GLBL_CFG1,
2172 mmTPC7_QM_CP_STS,
2173 mmTPC7_QM_GLBL_STS0);
2174
2175 if (rc) {
2176 dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
2177 retval = -EIO;
2178 }
2179
2180 rc = goya_stop_queue(hdev,
2181 mmTPC7_CMDQ_GLBL_CFG1,
2182 mmTPC7_CMDQ_CP_STS,
2183 mmTPC7_CMDQ_GLBL_STS0);
2184
2185 if (rc) {
2186 dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
2187 retval = -EIO;
2188 }
2189
2190 return retval;
2191}
2192
2193static void goya_resume_internal_queues(struct hl_device *hdev)
2194{
2195 WREG32(mmMME_QM_GLBL_CFG1, 0);
2196 WREG32(mmMME_CMDQ_GLBL_CFG1, 0);
2197
2198 WREG32(mmTPC0_QM_GLBL_CFG1, 0);
2199 WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0);
2200
2201 WREG32(mmTPC1_QM_GLBL_CFG1, 0);
2202 WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0);
2203
2204 WREG32(mmTPC2_QM_GLBL_CFG1, 0);
2205 WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0);
2206
2207 WREG32(mmTPC3_QM_GLBL_CFG1, 0);
2208 WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0);
2209
2210 WREG32(mmTPC4_QM_GLBL_CFG1, 0);
2211 WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0);
2212
2213 WREG32(mmTPC5_QM_GLBL_CFG1, 0);
2214 WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0);
2215
2216 WREG32(mmTPC6_QM_GLBL_CFG1, 0);
2217 WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0);
2218
2219 WREG32(mmTPC7_QM_GLBL_CFG1, 0);
2220 WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0);
2221}
2222
Oded Gabbay1251f232019-02-16 00:39:18 +02002223static void goya_dma_stall(struct hl_device *hdev)
2224{
2225 WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT);
2226 WREG32(mmDMA_QM_1_GLBL_CFG1, 1 << DMA_QM_1_GLBL_CFG1_DMA_STOP_SHIFT);
2227 WREG32(mmDMA_QM_2_GLBL_CFG1, 1 << DMA_QM_2_GLBL_CFG1_DMA_STOP_SHIFT);
2228 WREG32(mmDMA_QM_3_GLBL_CFG1, 1 << DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT);
2229 WREG32(mmDMA_QM_4_GLBL_CFG1, 1 << DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT);
2230}
2231
2232static void goya_tpc_stall(struct hl_device *hdev)
2233{
2234 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2235 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC1_CFG_TPC_STALL_V_SHIFT);
2236 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC2_CFG_TPC_STALL_V_SHIFT);
2237 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC3_CFG_TPC_STALL_V_SHIFT);
2238 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC4_CFG_TPC_STALL_V_SHIFT);
2239 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC5_CFG_TPC_STALL_V_SHIFT);
2240 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC6_CFG_TPC_STALL_V_SHIFT);
2241 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC7_CFG_TPC_STALL_V_SHIFT);
2242}
2243
2244static void goya_mme_stall(struct hl_device *hdev)
2245{
2246 WREG32(mmMME_STALL, 0xFFFFFFFF);
2247}
2248
2249static int goya_enable_msix(struct hl_device *hdev)
2250{
2251 struct goya_device *goya = hdev->asic_specific;
2252 int cq_cnt = hdev->asic_prop.completion_queues_count;
2253 int rc, i, irq_cnt_init, irq;
2254
2255 if (goya->hw_cap_initialized & HW_CAP_MSIX)
2256 return 0;
2257
2258 rc = pci_alloc_irq_vectors(hdev->pdev, GOYA_MSIX_ENTRIES,
2259 GOYA_MSIX_ENTRIES, PCI_IRQ_MSIX);
2260 if (rc < 0) {
2261 dev_err(hdev->dev,
2262 "MSI-X: Failed to enable support -- %d/%d\n",
2263 GOYA_MSIX_ENTRIES, rc);
2264 return rc;
2265 }
2266
2267 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
2268 irq = pci_irq_vector(hdev->pdev, i);
2269 rc = request_irq(irq, hl_irq_handler_cq, 0, goya_irq_name[i],
2270 &hdev->completion_queue[i]);
2271 if (rc) {
2272 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2273 goto free_irqs;
2274 }
2275 }
2276
2277 irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX);
2278
2279 rc = request_irq(irq, hl_irq_handler_eq, 0,
2280 goya_irq_name[EVENT_QUEUE_MSIX_IDX],
2281 &hdev->event_queue);
2282 if (rc) {
2283 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
2284 goto free_irqs;
2285 }
2286
2287 goya->hw_cap_initialized |= HW_CAP_MSIX;
2288 return 0;
2289
2290free_irqs:
2291 for (i = 0 ; i < irq_cnt_init ; i++)
2292 free_irq(pci_irq_vector(hdev->pdev, i),
2293 &hdev->completion_queue[i]);
2294
2295 pci_free_irq_vectors(hdev->pdev);
2296 return rc;
2297}
2298
2299static void goya_sync_irqs(struct hl_device *hdev)
2300{
2301 struct goya_device *goya = hdev->asic_specific;
2302 int i;
2303
2304 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2305 return;
2306
2307 /* Wait for all pending IRQs to be finished */
2308 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
2309 synchronize_irq(pci_irq_vector(hdev->pdev, i));
2310
2311 synchronize_irq(pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX));
2312}
2313
2314static void goya_disable_msix(struct hl_device *hdev)
2315{
2316 struct goya_device *goya = hdev->asic_specific;
2317 int i, irq;
2318
2319 if (!(goya->hw_cap_initialized & HW_CAP_MSIX))
2320 return;
2321
2322 goya_sync_irqs(hdev);
2323
2324 irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX);
2325 free_irq(irq, &hdev->event_queue);
2326
2327 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
2328 irq = pci_irq_vector(hdev->pdev, i);
2329 free_irq(irq, &hdev->completion_queue[i]);
2330 }
2331
2332 pci_free_irq_vectors(hdev->pdev);
2333
2334 goya->hw_cap_initialized &= ~HW_CAP_MSIX;
2335}
2336
2337static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
2338{
2339 u32 wait_timeout_ms, cpu_timeout_ms;
2340
2341 dev_info(hdev->dev,
2342 "Halting compute engines and disabling interrupts\n");
2343
2344 if (hdev->pldm) {
2345 wait_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2346 cpu_timeout_ms = GOYA_PLDM_RESET_WAIT_MSEC;
2347 } else {
2348 wait_timeout_ms = GOYA_RESET_WAIT_MSEC;
2349 cpu_timeout_ms = GOYA_CPU_RESET_WAIT_MSEC;
2350 }
2351
2352 if (hard_reset) {
2353 /*
2354 * I don't know what is the state of the CPU so make sure it is
2355 * stopped in any means necessary
2356 */
2357 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_GOTO_WFE);
2358 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2359 GOYA_ASYNC_EVENT_ID_HALT_MACHINE);
2360 msleep(cpu_timeout_ms);
2361 }
2362
2363 goya_stop_external_queues(hdev);
2364 goya_stop_internal_queues(hdev);
2365
2366 msleep(wait_timeout_ms);
2367
2368 goya_dma_stall(hdev);
2369 goya_tpc_stall(hdev);
2370 goya_mme_stall(hdev);
2371
2372 msleep(wait_timeout_ms);
2373
2374 goya_disable_external_queues(hdev);
2375 goya_disable_internal_queues(hdev);
2376
2377 if (hard_reset)
2378 goya_disable_msix(hdev);
2379 else
2380 goya_sync_irqs(hdev);
2381}
Oded Gabbay839c4802019-02-16 00:39:16 +02002382
2383/*
2384 * goya_push_fw_to_device - Push FW code to device
2385 *
2386 * @hdev: pointer to hl_device structure
2387 *
2388 * Copy fw code from firmware file to device memory.
2389 * Returns 0 on success
2390 *
2391 */
2392static int goya_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
2393 void __iomem *dst)
2394{
2395 const struct firmware *fw;
2396 const u64 *fw_data;
2397 size_t fw_size, i;
2398 int rc;
2399
2400 rc = request_firmware(&fw, fw_name, hdev->dev);
2401
2402 if (rc) {
2403 dev_err(hdev->dev, "Failed to request %s\n", fw_name);
2404 goto out;
2405 }
2406
2407 fw_size = fw->size;
2408 if ((fw_size % 4) != 0) {
2409 dev_err(hdev->dev, "illegal %s firmware size %zu\n",
2410 fw_name, fw_size);
2411 rc = -EINVAL;
2412 goto out;
2413 }
2414
2415 dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
2416
2417 fw_data = (const u64 *) fw->data;
2418
2419 if ((fw->size % 8) != 0)
2420 fw_size -= 8;
2421
2422 for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
2423 if (!(i & (0x80000 - 1))) {
2424 dev_dbg(hdev->dev,
2425 "copied so far %zu out of %zu for %s firmware",
2426 i, fw_size, fw_name);
2427 usleep_range(20, 100);
2428 }
2429
2430 writeq(*fw_data, dst);
2431 }
2432
2433 if ((fw->size % 8) != 0)
2434 writel(*(const u32 *) fw_data, dst);
2435
2436out:
2437 release_firmware(fw);
2438 return rc;
2439}
2440
2441static int goya_pldm_init_cpu(struct hl_device *hdev)
2442{
2443 char fw_name[200];
2444 void __iomem *dst;
2445 u32 val, unit_rst_val;
2446 int rc;
2447
2448 /* Must initialize SRAM scrambler before pushing u-boot to SRAM */
2449 goya_init_golden_registers(hdev);
2450
2451 /* Put ARM cores into reset */
2452 WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
2453 val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2454
2455 /* Reset the CA53 MACRO */
2456 unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2457 WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
2458 val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2459 WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
2460 val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
2461
2462 snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
2463 dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
2464 rc = goya_push_fw_to_device(hdev, fw_name, dst);
2465 if (rc)
2466 return rc;
2467
2468 snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
2469 dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2470 rc = goya_push_fw_to_device(hdev, fw_name, dst);
2471 if (rc)
2472 return rc;
2473
2474 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2475 WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT, CPU_BOOT_STATUS_NA);
2476
2477 WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0,
2478 lower_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2479 WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0,
2480 upper_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
2481
2482 /* Release ARM core 0 from reset */
2483 WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
2484 CPU_RESET_CORE0_DEASSERT);
2485 val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
2486
2487 return 0;
2488}
2489
2490/*
2491 * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
2492 * The version string should be located by that offset.
2493 */
2494static void goya_read_device_fw_version(struct hl_device *hdev,
2495 enum goya_fw_component fwc)
2496{
2497 const char *name;
2498 u32 ver_off;
2499 char *dest;
2500
2501 switch (fwc) {
2502 case FW_COMP_UBOOT:
2503 ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
2504 dest = hdev->asic_prop.uboot_ver;
2505 name = "U-Boot";
2506 break;
2507 case FW_COMP_PREBOOT:
2508 ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
2509 dest = hdev->asic_prop.preboot_ver;
2510 name = "Preboot";
2511 break;
2512 default:
2513 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2514 return;
2515 }
2516
2517 ver_off &= ~((u32)SRAM_BASE_ADDR);
2518
2519 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2520 memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
2521 VERSION_MAX_LEN);
2522 } else {
2523 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2524 name, ver_off);
2525 strcpy(dest, "unavailable");
2526 }
2527}
2528
2529static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
2530{
2531 struct goya_device *goya = hdev->asic_specific;
2532 char fw_name[200];
2533 void __iomem *dst;
2534 u32 status;
2535 int rc;
2536
2537 if (!hdev->cpu_enable)
2538 return 0;
2539
2540 if (goya->hw_cap_initialized & HW_CAP_CPU)
2541 return 0;
2542
2543 /*
2544 * Before pushing u-boot/linux to device, need to set the ddr bar to
2545 * base address of dram
2546 */
2547 rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2548 if (rc) {
2549 dev_err(hdev->dev,
2550 "failed to map DDR bar to DRAM base address\n");
2551 return rc;
2552 }
2553
2554 if (hdev->pldm) {
2555 rc = goya_pldm_init_cpu(hdev);
2556 if (rc)
2557 return rc;
2558
2559 goto out;
2560 }
2561
2562 /* Make sure CPU boot-loader is running */
2563 rc = hl_poll_timeout(
2564 hdev,
2565 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2566 status,
2567 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
2568 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2569 10000,
2570 cpu_timeout);
2571
2572 if (rc) {
2573 dev_err(hdev->dev, "Error in ARM u-boot!");
2574 switch (status) {
2575 case CPU_BOOT_STATUS_NA:
2576 dev_err(hdev->dev,
2577 "ARM status %d - BTL did NOT run\n", status);
2578 break;
2579 case CPU_BOOT_STATUS_IN_WFE:
2580 dev_err(hdev->dev,
2581 "ARM status %d - Inside WFE loop\n", status);
2582 break;
2583 case CPU_BOOT_STATUS_IN_BTL:
2584 dev_err(hdev->dev,
2585 "ARM status %d - Stuck in BTL\n", status);
2586 break;
2587 case CPU_BOOT_STATUS_IN_PREBOOT:
2588 dev_err(hdev->dev,
2589 "ARM status %d - Stuck in Preboot\n", status);
2590 break;
2591 case CPU_BOOT_STATUS_IN_SPL:
2592 dev_err(hdev->dev,
2593 "ARM status %d - Stuck in SPL\n", status);
2594 break;
2595 case CPU_BOOT_STATUS_IN_UBOOT:
2596 dev_err(hdev->dev,
2597 "ARM status %d - Stuck in u-boot\n", status);
2598 break;
2599 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
2600 dev_err(hdev->dev,
2601 "ARM status %d - DDR initialization failed\n",
2602 status);
2603 break;
2604 default:
2605 dev_err(hdev->dev,
2606 "ARM status %d - Invalid status code\n",
2607 status);
2608 break;
2609 }
2610 return -EIO;
2611 }
2612
2613 /* Read U-Boot version now in case we will later fail */
2614 goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
2615 goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
2616
2617 if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
2618 goto out;
2619
2620 if (!hdev->fw_loading) {
2621 dev_info(hdev->dev, "Skip loading FW\n");
2622 goto out;
2623 }
2624
2625 snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
2626 dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
2627 rc = goya_push_fw_to_device(hdev, fw_name, dst);
2628 if (rc)
2629 return rc;
2630
2631 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
2632
2633 rc = hl_poll_timeout(
2634 hdev,
2635 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2636 status,
2637 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
2638 10000,
2639 cpu_timeout);
2640
2641 if (rc) {
2642 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
2643 dev_err(hdev->dev,
2644 "ARM u-boot reports FIT image is corrupted\n");
2645 else
2646 dev_err(hdev->dev,
2647 "ARM Linux failed to load, %d\n", status);
2648 WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_NA);
2649 return -EIO;
2650 }
2651
2652 dev_info(hdev->dev, "Successfully loaded firmware to device\n");
2653
2654out:
2655 goya->hw_cap_initialized |= HW_CAP_CPU;
2656
2657 return 0;
2658}
2659
Omer Shpigelman0feaf862019-02-16 00:39:22 +02002660static int goya_mmu_init(struct hl_device *hdev)
2661{
2662 struct asic_fixed_properties *prop = &hdev->asic_prop;
2663 struct goya_device *goya = hdev->asic_specific;
2664 u64 hop0_addr;
2665 int rc, i;
2666
2667 if (!hdev->mmu_enable)
2668 return 0;
2669
2670 if (goya->hw_cap_initialized & HW_CAP_MMU)
2671 return 0;
2672
2673 hdev->dram_supports_virtual_memory = true;
2674
2675 for (i = 0 ; i < prop->max_asid ; i++) {
2676 hop0_addr = prop->mmu_pgt_addr +
2677 (i * prop->mmu_hop_table_size);
2678
2679 rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2680 if (rc) {
2681 dev_err(hdev->dev,
2682 "failed to set hop0 addr for asid %d\n", i);
2683 goto err;
2684 }
2685 }
2686
2687 goya->hw_cap_initialized |= HW_CAP_MMU;
2688
2689 /* init MMU cache manage page */
2690 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2691 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR << 40);
2692
2693 /* Remove follower feature due to performance bug */
2694 WREG32_AND(mmSTLB_STLB_FEATURE_EN,
2695 (~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
2696
2697 hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
2698
2699 WREG32(mmMMU_MMU_ENABLE, 1);
2700 WREG32(mmMMU_SPI_MASK, 0xF);
2701
2702 return 0;
2703
2704err:
2705 return rc;
2706}
2707
Oded Gabbay839c4802019-02-16 00:39:16 +02002708/*
2709 * goya_hw_init - Goya hardware initialization code
2710 *
2711 * @hdev: pointer to hl_device structure
2712 *
2713 * Returns 0 on success
2714 *
2715 */
2716static int goya_hw_init(struct hl_device *hdev)
2717{
2718 struct asic_fixed_properties *prop = &hdev->asic_prop;
2719 u32 val;
2720 int rc;
2721
2722 dev_info(hdev->dev, "Starting initialization of H/W\n");
2723
2724 /* Perform read from the device to make sure device is up */
2725 val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2726
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02002727 /*
2728 * Let's mark in the H/W that we have reached this point. We check
2729 * this value in the reset_before_init function to understand whether
2730 * we need to reset the chip before doing H/W init. This register is
2731 * cleared by the H/W upon H/W reset
2732 */
2733 WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY);
2734
Oded Gabbay839c4802019-02-16 00:39:16 +02002735 rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
2736 if (rc) {
2737 dev_err(hdev->dev, "failed to initialize CPU\n");
2738 return rc;
2739 }
2740
2741 goya_tpc_mbist_workaround(hdev);
2742
2743 goya_init_golden_registers(hdev);
2744
2745 /*
2746 * After CPU initialization is finished, change DDR bar mapping inside
2747 * iATU to point to the start address of the MMU page tables
2748 */
2749 rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
2750 (MMU_PAGE_TABLES_ADDR & ~(prop->dram_pci_bar_size - 0x1ull)));
2751 if (rc) {
2752 dev_err(hdev->dev,
2753 "failed to map DDR bar to MMU page tables\n");
2754 return rc;
2755 }
2756
Omer Shpigelman0feaf862019-02-16 00:39:22 +02002757 rc = goya_mmu_init(hdev);
2758 if (rc)
2759 return rc;
2760
Oded Gabbay839c4802019-02-16 00:39:16 +02002761 goya_init_security(hdev);
2762
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002763 goya_init_dma_qmans(hdev);
2764
2765 goya_init_mme_qmans(hdev);
2766
2767 goya_init_tpc_qmans(hdev);
2768
Oded Gabbay1251f232019-02-16 00:39:18 +02002769 /* MSI-X must be enabled before CPU queues are initialized */
2770 rc = goya_enable_msix(hdev);
2771 if (rc)
2772 goto disable_queues;
2773
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002774 rc = goya_init_cpu_queues(hdev);
2775 if (rc) {
2776 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
2777 rc);
Oded Gabbay1251f232019-02-16 00:39:18 +02002778 goto disable_msix;
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002779 }
2780
Oded Gabbay839c4802019-02-16 00:39:16 +02002781 /* CPU initialization is finished, we can now move to 48 bit DMA mask */
2782 rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(48));
2783 if (rc) {
2784 dev_warn(hdev->dev, "Unable to set pci dma mask to 48 bits\n");
2785 rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(32));
2786 if (rc) {
2787 dev_err(hdev->dev,
2788 "Unable to set pci dma mask to 32 bits\n");
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002789 goto disable_pci_access;
Oded Gabbay839c4802019-02-16 00:39:16 +02002790 }
2791 }
2792
2793 rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(48));
2794 if (rc) {
2795 dev_warn(hdev->dev,
2796 "Unable to set pci consistent dma mask to 48 bits\n");
2797 rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(32));
2798 if (rc) {
2799 dev_err(hdev->dev,
2800 "Unable to set pci consistent dma mask to 32 bits\n");
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002801 goto disable_pci_access;
Oded Gabbay839c4802019-02-16 00:39:16 +02002802 }
2803 }
2804
2805 /* Perform read from the device to flush all MSI-X configuration */
2806 val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2807
2808 return 0;
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002809
2810disable_pci_access:
2811 goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
Oded Gabbay1251f232019-02-16 00:39:18 +02002812disable_msix:
2813 goya_disable_msix(hdev);
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002814disable_queues:
2815 goya_disable_internal_queues(hdev);
2816 goya_disable_external_queues(hdev);
2817
2818 return rc;
Oded Gabbay839c4802019-02-16 00:39:16 +02002819}
2820
2821/*
2822 * goya_hw_fini - Goya hardware tear-down code
2823 *
2824 * @hdev: pointer to hl_device structure
2825 * @hard_reset: should we do hard reset to all engines or just reset the
2826 * compute/dma engines
2827 */
2828static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
2829{
2830 struct goya_device *goya = hdev->asic_specific;
2831 u32 reset_timeout_ms, status;
2832
2833 if (hdev->pldm)
2834 reset_timeout_ms = GOYA_PLDM_RESET_TIMEOUT_MSEC;
2835 else
2836 reset_timeout_ms = GOYA_RESET_TIMEOUT_MSEC;
2837
2838 if (hard_reset) {
2839 goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
2840 goya_disable_clk_rlx(hdev);
2841 goya_set_pll_refclk(hdev);
2842
2843 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, RESET_ALL);
2844 dev_info(hdev->dev,
2845 "Issued HARD reset command, going to wait %dms\n",
2846 reset_timeout_ms);
2847 } else {
2848 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG, DMA_MME_TPC_RESET);
2849 dev_info(hdev->dev,
2850 "Issued SOFT reset command, going to wait %dms\n",
2851 reset_timeout_ms);
2852 }
2853
2854 /*
2855 * After hard reset, we can't poll the BTM_FSM register because the PSOC
2856 * itself is in reset. In either reset we need to wait until the reset
2857 * is deasserted
2858 */
2859 msleep(reset_timeout_ms);
2860
2861 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
2862 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
2863 dev_err(hdev->dev,
2864 "Timeout while waiting for device to reset 0x%x\n",
2865 status);
2866
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02002867 if (!hard_reset) {
2868 goya->hw_cap_initialized &= ~(HW_CAP_DMA | HW_CAP_MME |
2869 HW_CAP_GOLDEN | HW_CAP_TPC);
2870 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2871 GOYA_ASYNC_EVENT_ID_SOFT_RESET);
2872 return;
2873 }
2874
Oded Gabbay839c4802019-02-16 00:39:16 +02002875 /* Chicken bit to re-initiate boot sequencer flow */
2876 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START,
2877 1 << PSOC_GLOBAL_CONF_BOOT_SEQ_RE_START_IND_SHIFT);
2878 /* Move boot manager FSM to pre boot sequencer init state */
2879 WREG32(mmPSOC_GLOBAL_CONF_SW_BTM_FSM,
2880 0xA << PSOC_GLOBAL_CONF_SW_BTM_FSM_CTRL_SHIFT);
2881
2882 goya->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
2883 HW_CAP_DDR_0 | HW_CAP_DDR_1 |
2884 HW_CAP_DMA | HW_CAP_MME |
2885 HW_CAP_MMU | HW_CAP_TPC_MBIST |
2886 HW_CAP_GOLDEN | HW_CAP_TPC);
Oded Gabbay1251f232019-02-16 00:39:18 +02002887 memset(goya->events_stat, 0, sizeof(goya->events_stat));
Oded Gabbay839c4802019-02-16 00:39:16 +02002888
2889 if (!hdev->pldm) {
2890 int rc;
2891 /* In case we are running inside VM and the VM is
2892 * shutting down, we need to make sure CPU boot-loader
2893 * is running before we can continue the VM shutdown.
2894 * That is because the VM will send an FLR signal that
2895 * we must answer
2896 */
2897 dev_info(hdev->dev,
2898 "Going to wait up to %ds for CPU boot loader\n",
2899 GOYA_CPU_TIMEOUT_USEC / 1000 / 1000);
2900
2901 rc = hl_poll_timeout(
2902 hdev,
2903 mmPSOC_GLOBAL_CONF_WARM_REBOOT,
2904 status,
2905 (status == CPU_BOOT_STATUS_DRAM_RDY),
2906 10000,
2907 GOYA_CPU_TIMEOUT_USEC);
2908 if (rc)
2909 dev_err(hdev->dev,
2910 "failed to wait for CPU boot loader\n");
2911 }
2912}
2913
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02002914int goya_suspend(struct hl_device *hdev)
2915{
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002916 int rc;
2917
2918 rc = goya_stop_internal_queues(hdev);
2919
2920 if (rc) {
2921 dev_err(hdev->dev, "failed to stop internal queues\n");
2922 return rc;
2923 }
2924
2925 rc = goya_stop_external_queues(hdev);
2926
2927 if (rc) {
2928 dev_err(hdev->dev, "failed to stop external queues\n");
2929 return rc;
2930 }
2931
2932 rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
2933 if (rc)
2934 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
2935
2936 return rc;
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02002937}
2938
2939int goya_resume(struct hl_device *hdev)
2940{
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002941 int rc;
2942
2943 goya_resume_external_queues(hdev);
2944 goya_resume_internal_queues(hdev);
2945
2946 rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
2947 if (rc)
2948 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
2949 return rc;
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02002950}
2951
Oded Gabbaybe5d9262019-02-16 00:39:15 +02002952int goya_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
2953{
2954 return -EINVAL;
2955}
2956
2957int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
2958 u64 kaddress, phys_addr_t paddress, u32 size)
2959{
2960 int rc;
2961
2962 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
2963 VM_DONTCOPY | VM_NORESERVE;
2964
2965 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
2966 size, vma->vm_page_prot);
2967 if (rc)
2968 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
2969
2970 return rc;
2971}
2972
Oded Gabbay9494a8d2019-02-16 00:39:17 +02002973void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
2974{
2975 u32 db_reg_offset, db_value;
2976 bool invalid_queue = false;
2977
2978 switch (hw_queue_id) {
2979 case GOYA_QUEUE_ID_DMA_0:
2980 db_reg_offset = mmDMA_QM_0_PQ_PI;
2981 break;
2982
2983 case GOYA_QUEUE_ID_DMA_1:
2984 db_reg_offset = mmDMA_QM_1_PQ_PI;
2985 break;
2986
2987 case GOYA_QUEUE_ID_DMA_2:
2988 db_reg_offset = mmDMA_QM_2_PQ_PI;
2989 break;
2990
2991 case GOYA_QUEUE_ID_DMA_3:
2992 db_reg_offset = mmDMA_QM_3_PQ_PI;
2993 break;
2994
2995 case GOYA_QUEUE_ID_DMA_4:
2996 db_reg_offset = mmDMA_QM_4_PQ_PI;
2997 break;
2998
2999 case GOYA_QUEUE_ID_CPU_PQ:
3000 if (hdev->cpu_queues_enable)
3001 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3002 else
3003 invalid_queue = true;
3004 break;
3005
3006 case GOYA_QUEUE_ID_MME:
3007 db_reg_offset = mmMME_QM_PQ_PI;
3008 break;
3009
3010 case GOYA_QUEUE_ID_TPC0:
3011 db_reg_offset = mmTPC0_QM_PQ_PI;
3012 break;
3013
3014 case GOYA_QUEUE_ID_TPC1:
3015 db_reg_offset = mmTPC1_QM_PQ_PI;
3016 break;
3017
3018 case GOYA_QUEUE_ID_TPC2:
3019 db_reg_offset = mmTPC2_QM_PQ_PI;
3020 break;
3021
3022 case GOYA_QUEUE_ID_TPC3:
3023 db_reg_offset = mmTPC3_QM_PQ_PI;
3024 break;
3025
3026 case GOYA_QUEUE_ID_TPC4:
3027 db_reg_offset = mmTPC4_QM_PQ_PI;
3028 break;
3029
3030 case GOYA_QUEUE_ID_TPC5:
3031 db_reg_offset = mmTPC5_QM_PQ_PI;
3032 break;
3033
3034 case GOYA_QUEUE_ID_TPC6:
3035 db_reg_offset = mmTPC6_QM_PQ_PI;
3036 break;
3037
3038 case GOYA_QUEUE_ID_TPC7:
3039 db_reg_offset = mmTPC7_QM_PQ_PI;
3040 break;
3041
3042 default:
3043 invalid_queue = true;
3044 }
3045
3046 if (invalid_queue) {
3047 /* Should never get here */
3048 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3049 hw_queue_id);
3050 return;
3051 }
3052
3053 db_value = pi;
3054
3055 /* ring the doorbell */
3056 WREG32(db_reg_offset, db_value);
3057
3058 if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
3059 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3060 GOYA_ASYNC_EVENT_ID_PI_UPDATE);
3061}
3062
3063void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
3064{
3065 /* Not needed in Goya */
3066}
3067
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02003068void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3069 dma_addr_t *dma_handle, gfp_t flags)
3070{
3071 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
3072}
3073
3074void goya_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
3075 dma_addr_t dma_handle)
3076{
3077 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
3078}
3079
Oded Gabbay9494a8d2019-02-16 00:39:17 +02003080void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
3081 dma_addr_t *dma_handle, u16 *queue_len)
3082{
3083 void *base;
3084 u32 offset;
3085
3086 *dma_handle = hdev->asic_prop.sram_base_address;
3087
3088 base = hdev->pcie_bar[SRAM_CFG_BAR_ID];
3089
3090 switch (queue_id) {
3091 case GOYA_QUEUE_ID_MME:
3092 offset = MME_QMAN_BASE_OFFSET;
3093 *queue_len = MME_QMAN_LENGTH;
3094 break;
3095 case GOYA_QUEUE_ID_TPC0:
3096 offset = TPC0_QMAN_BASE_OFFSET;
3097 *queue_len = TPC_QMAN_LENGTH;
3098 break;
3099 case GOYA_QUEUE_ID_TPC1:
3100 offset = TPC1_QMAN_BASE_OFFSET;
3101 *queue_len = TPC_QMAN_LENGTH;
3102 break;
3103 case GOYA_QUEUE_ID_TPC2:
3104 offset = TPC2_QMAN_BASE_OFFSET;
3105 *queue_len = TPC_QMAN_LENGTH;
3106 break;
3107 case GOYA_QUEUE_ID_TPC3:
3108 offset = TPC3_QMAN_BASE_OFFSET;
3109 *queue_len = TPC_QMAN_LENGTH;
3110 break;
3111 case GOYA_QUEUE_ID_TPC4:
3112 offset = TPC4_QMAN_BASE_OFFSET;
3113 *queue_len = TPC_QMAN_LENGTH;
3114 break;
3115 case GOYA_QUEUE_ID_TPC5:
3116 offset = TPC5_QMAN_BASE_OFFSET;
3117 *queue_len = TPC_QMAN_LENGTH;
3118 break;
3119 case GOYA_QUEUE_ID_TPC6:
3120 offset = TPC6_QMAN_BASE_OFFSET;
3121 *queue_len = TPC_QMAN_LENGTH;
3122 break;
3123 case GOYA_QUEUE_ID_TPC7:
3124 offset = TPC7_QMAN_BASE_OFFSET;
3125 *queue_len = TPC_QMAN_LENGTH;
3126 break;
3127 default:
3128 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3129 return NULL;
3130 }
3131
3132 base += offset;
3133 *dma_handle += offset;
3134
3135 return base;
3136}
3137
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02003138int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
3139{
3140 struct goya_device *goya = hdev->asic_specific;
3141 struct packet_msg_prot *fence_pkt;
3142 u32 *fence_ptr;
3143 dma_addr_t fence_dma_addr;
3144 struct hl_cb *cb;
3145 u32 tmp;
3146 int rc;
3147
3148 if (!hdev->asic_funcs->is_device_idle(hdev)) {
3149 dev_err_ratelimited(hdev->dev,
3150 "Can't send KMD job on QMAN0 if device is not idle\n");
3151 return -EFAULT;
3152 }
3153
3154 fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3155 &fence_dma_addr);
3156 if (!fence_ptr) {
3157 dev_err(hdev->dev,
3158 "Failed to allocate fence memory for QMAN0\n");
3159 return -ENOMEM;
3160 }
3161
3162 *fence_ptr = 0;
3163
3164 if (goya->hw_cap_initialized & HW_CAP_MMU) {
3165 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
3166 RREG32(mmDMA_QM_0_GLBL_PROT);
3167 }
3168
3169 /*
3170 * goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
3171 * synchronized kernel jobs we only need space for 1 packet_msg_prot
3172 */
3173 job->job_cb_size -= sizeof(struct packet_msg_prot);
3174
3175 cb = job->patched_cb;
3176
3177 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
3178 job->job_cb_size - sizeof(struct packet_msg_prot));
3179
3180 fence_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3181 (1 << GOYA_PKT_CTL_EB_SHIFT) |
3182 (1 << GOYA_PKT_CTL_MB_SHIFT);
3183 fence_pkt->value = GOYA_QMAN0_FENCE_VAL;
3184 fence_pkt->addr = fence_dma_addr +
3185 hdev->asic_prop.host_phys_base_address;
3186
3187 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
3188 job->job_cb_size, cb->bus_address);
3189 if (rc) {
3190 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
3191 goto free_fence_ptr;
3192 }
3193
3194 rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
3195 HL_DEVICE_TIMEOUT_USEC, &tmp);
3196
3197 hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
3198
3199 if ((rc) || (tmp != GOYA_QMAN0_FENCE_VAL)) {
3200 dev_err(hdev->dev, "QMAN0 Job hasn't finished in time\n");
3201 rc = -ETIMEDOUT;
3202 }
3203
3204free_fence_ptr:
3205 hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
3206 fence_dma_addr);
3207
3208 if (goya->hw_cap_initialized & HW_CAP_MMU) {
3209 WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
3210 RREG32(mmDMA_QM_0_GLBL_PROT);
3211 }
3212
3213 return rc;
3214}
3215
Oded Gabbay9494a8d2019-02-16 00:39:17 +02003216int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
3217 u32 timeout, long *result)
3218{
3219 struct goya_device *goya = hdev->asic_specific;
3220 struct armcp_packet *pkt;
3221 dma_addr_t pkt_dma_addr;
3222 u32 tmp;
3223 int rc = 0;
3224
3225 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
3226 if (result)
3227 *result = 0;
3228 return 0;
3229 }
3230
3231 if (len > CPU_CB_SIZE) {
3232 dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
3233 len);
3234 return -ENOMEM;
3235 }
3236
3237 pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
3238 &pkt_dma_addr);
3239 if (!pkt) {
3240 dev_err(hdev->dev,
3241 "Failed to allocate DMA memory for packet to CPU\n");
3242 return -ENOMEM;
3243 }
3244
3245 memcpy(pkt, msg, len);
3246
3247 mutex_lock(&hdev->send_cpu_message_lock);
3248
3249 if (hdev->disabled)
3250 goto out;
3251
3252 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_CPU_PQ, len,
3253 pkt_dma_addr);
3254 if (rc) {
3255 dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
3256 goto out;
3257 }
3258
3259 rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
3260 timeout, &tmp);
3261
3262 hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_CPU_PQ);
3263
3264 if (rc == -ETIMEDOUT) {
3265 dev_err(hdev->dev,
3266 "Timeout while waiting for CPU packet fence\n");
3267 goto out;
3268 }
3269
3270 if (tmp == ARMCP_PACKET_FENCE_VAL) {
3271 rc = (pkt->ctl & ARMCP_PKT_CTL_RC_MASK) >>
3272 ARMCP_PKT_CTL_RC_SHIFT;
3273 if (rc) {
3274 dev_err(hdev->dev,
3275 "F/W ERROR %d for CPU packet %d\n",
3276 rc, (pkt->ctl & ARMCP_PKT_CTL_OPCODE_MASK)
3277 >> ARMCP_PKT_CTL_OPCODE_SHIFT);
3278 rc = -EINVAL;
3279 } else if (result) {
3280 *result = pkt->result;
3281 }
3282 } else {
3283 dev_err(hdev->dev, "CPU packet wrong fence value\n");
3284 rc = -EINVAL;
3285 }
3286
3287out:
3288 mutex_unlock(&hdev->send_cpu_message_lock);
3289
3290 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
3291
3292 return rc;
3293}
3294
3295int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3296{
3297 struct packet_msg_prot *fence_pkt;
3298 dma_addr_t pkt_dma_addr;
3299 u32 fence_val, tmp;
3300 dma_addr_t fence_dma_addr;
3301 u32 *fence_ptr;
3302 int rc;
3303
3304 fence_val = GOYA_QMAN0_FENCE_VAL;
3305
3306 fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3307 &fence_dma_addr);
3308 if (!fence_ptr) {
3309 dev_err(hdev->dev,
3310 "Failed to allocate memory for queue testing\n");
3311 return -ENOMEM;
3312 }
3313
3314 *fence_ptr = 0;
3315
3316 fence_pkt = hdev->asic_funcs->dma_pool_zalloc(hdev,
3317 sizeof(struct packet_msg_prot),
3318 GFP_KERNEL, &pkt_dma_addr);
3319 if (!fence_pkt) {
3320 dev_err(hdev->dev,
3321 "Failed to allocate packet for queue testing\n");
3322 rc = -ENOMEM;
3323 goto free_fence_ptr;
3324 }
3325
3326 fence_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
3327 (1 << GOYA_PKT_CTL_EB_SHIFT) |
3328 (1 << GOYA_PKT_CTL_MB_SHIFT);
3329 fence_pkt->value = fence_val;
3330 fence_pkt->addr = fence_dma_addr +
3331 hdev->asic_prop.host_phys_base_address;
3332
3333 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3334 sizeof(struct packet_msg_prot),
3335 pkt_dma_addr);
3336 if (rc) {
3337 dev_err(hdev->dev,
3338 "Failed to send fence packet\n");
3339 goto free_pkt;
3340 }
3341
3342 rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
3343 GOYA_TEST_QUEUE_WAIT_USEC, &tmp);
3344
3345 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3346
3347 if ((!rc) && (tmp == fence_val)) {
3348 dev_info(hdev->dev,
3349 "queue test on H/W queue %d succeeded\n",
3350 hw_queue_id);
3351 } else {
3352 dev_err(hdev->dev,
3353 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3354 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3355 rc = -EINVAL;
3356 }
3357
3358free_pkt:
3359 hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_pkt,
3360 pkt_dma_addr);
3361free_fence_ptr:
3362 hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
3363 fence_dma_addr);
3364 return rc;
3365}
3366
3367int goya_test_cpu_queue(struct hl_device *hdev)
3368{
3369 struct armcp_packet test_pkt;
3370 long result;
3371 int rc;
3372
3373 /* cpu_queues_enable flag is always checked in send cpu message */
3374
3375 memset(&test_pkt, 0, sizeof(test_pkt));
3376
3377 test_pkt.ctl = ARMCP_PACKET_TEST << ARMCP_PKT_CTL_OPCODE_SHIFT;
3378 test_pkt.value = ARMCP_PACKET_FENCE_VAL;
3379
3380 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
3381 sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
3382
Oded Gabbaya507fbb2019-02-22 21:29:58 +02003383 if (!rc) {
3384 if (result == ARMCP_PACKET_FENCE_VAL)
3385 dev_info(hdev->dev,
3386 "queue test on CPU queue succeeded\n");
3387 else
3388 dev_err(hdev->dev,
3389 "CPU queue test failed (0x%08lX)\n", result);
3390 } else {
3391 dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
3392 }
Oded Gabbay9494a8d2019-02-16 00:39:17 +02003393
3394 return rc;
3395}
3396
3397static int goya_test_queues(struct hl_device *hdev)
3398{
3399 struct goya_device *goya = hdev->asic_specific;
3400 int i, rc, ret_val = 0;
3401
3402 for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
3403 rc = goya_test_queue(hdev, i);
3404 if (rc)
3405 ret_val = -EINVAL;
3406 }
3407
3408 if (hdev->cpu_queues_enable) {
3409 rc = goya->test_cpu_queue(hdev);
3410 if (rc)
3411 ret_val = -EINVAL;
3412 }
3413
3414 return ret_val;
3415}
3416
3417void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
3418 dma_addr_t *dma_handle)
3419{
3420 if (size > GOYA_DMA_POOL_BLK_SIZE)
3421 return NULL;
3422
3423 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3424}
3425
3426void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
3427 dma_addr_t dma_addr)
3428{
3429 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
3430}
3431
3432void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
3433 dma_addr_t *dma_handle)
3434{
3435 u64 kernel_addr;
3436
3437 /* roundup to CPU_PKT_SIZE */
3438 size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
3439
3440 kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
3441
3442 *dma_handle = hdev->cpu_accessible_dma_address +
3443 (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
3444
3445 return (void *) (uintptr_t) kernel_addr;
3446}
3447
3448void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
3449 void *vaddr)
3450{
3451 /* roundup to CPU_PKT_SIZE */
3452 size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
3453
3454 gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
3455 size);
3456}
3457
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02003458int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg, int nents,
3459 enum dma_data_direction dir)
3460{
3461 if (!dma_map_sg(&hdev->pdev->dev, sg, nents, dir))
3462 return -ENOMEM;
3463
3464 return 0;
3465}
3466
3467void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sg,
3468 int nents, enum dma_data_direction dir)
3469{
3470 dma_unmap_sg(&hdev->pdev->dev, sg, nents, dir);
3471}
3472
3473u32 goya_get_dma_desc_list_size(struct hl_device *hdev,
3474 struct sg_table *sgt)
3475{
3476 struct scatterlist *sg, *sg_next_iter;
Oded Gabbaye99f16832019-02-24 11:55:26 +02003477 u32 count, dma_desc_cnt;
3478 u64 len, len_next;
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02003479 dma_addr_t addr, addr_next;
3480
3481 dma_desc_cnt = 0;
3482
3483 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3484
3485 len = sg_dma_len(sg);
3486 addr = sg_dma_address(sg);
3487
3488 if (len == 0)
3489 break;
3490
3491 while ((count + 1) < sgt->nents) {
3492 sg_next_iter = sg_next(sg);
3493 len_next = sg_dma_len(sg_next_iter);
3494 addr_next = sg_dma_address(sg_next_iter);
3495
3496 if (len_next == 0)
3497 break;
3498
3499 if ((addr + len == addr_next) &&
3500 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3501 len += len_next;
3502 count++;
3503 sg = sg_next_iter;
3504 } else {
3505 break;
3506 }
3507 }
3508
3509 dma_desc_cnt++;
3510 }
3511
3512 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3513}
3514
3515static int goya_pin_memory_before_cs(struct hl_device *hdev,
3516 struct hl_cs_parser *parser,
3517 struct packet_lin_dma *user_dma_pkt,
3518 u64 addr, enum dma_data_direction dir)
3519{
3520 struct hl_userptr *userptr;
3521 int rc;
3522
3523 if (hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
3524 parser->job_userptr_list, &userptr))
3525 goto already_pinned;
3526
3527 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3528 if (!userptr)
3529 return -ENOMEM;
3530
3531 rc = hl_pin_host_memory(hdev, addr, user_dma_pkt->tsize, userptr);
3532 if (rc)
3533 goto free_userptr;
3534
3535 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3536
3537 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3538 userptr->sgt->nents, dir);
3539 if (rc) {
3540 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3541 goto unpin_memory;
3542 }
3543
3544 userptr->dma_mapped = true;
3545 userptr->dir = dir;
3546
3547already_pinned:
3548 parser->patched_cb_size +=
3549 goya_get_dma_desc_list_size(hdev, userptr->sgt);
3550
3551 return 0;
3552
3553unpin_memory:
3554 hl_unpin_host_memory(hdev, userptr);
3555free_userptr:
3556 kfree(userptr);
3557 return rc;
3558}
3559
3560static int goya_validate_dma_pkt_host(struct hl_device *hdev,
3561 struct hl_cs_parser *parser,
3562 struct packet_lin_dma *user_dma_pkt)
3563{
3564 u64 device_memory_addr, addr;
3565 enum dma_data_direction dir;
3566 enum goya_dma_direction user_dir;
3567 bool sram_addr = true;
3568 bool skip_host_mem_pin = false;
3569 bool user_memset;
3570 int rc = 0;
3571
3572 user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3573 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3574
3575 user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3576 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3577
3578 switch (user_dir) {
3579 case DMA_HOST_TO_DRAM:
3580 dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
3581 dir = DMA_TO_DEVICE;
3582 sram_addr = false;
3583 addr = user_dma_pkt->src_addr;
3584 device_memory_addr = user_dma_pkt->dst_addr;
3585 if (user_memset)
3586 skip_host_mem_pin = true;
3587 break;
3588
3589 case DMA_DRAM_TO_HOST:
3590 dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
3591 dir = DMA_FROM_DEVICE;
3592 sram_addr = false;
3593 addr = user_dma_pkt->dst_addr;
3594 device_memory_addr = user_dma_pkt->src_addr;
3595 break;
3596
3597 case DMA_HOST_TO_SRAM:
3598 dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
3599 dir = DMA_TO_DEVICE;
3600 addr = user_dma_pkt->src_addr;
3601 device_memory_addr = user_dma_pkt->dst_addr;
3602 if (user_memset)
3603 skip_host_mem_pin = true;
3604 break;
3605
3606 case DMA_SRAM_TO_HOST:
3607 dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
3608 dir = DMA_FROM_DEVICE;
3609 addr = user_dma_pkt->dst_addr;
3610 device_memory_addr = user_dma_pkt->src_addr;
3611 break;
3612 default:
3613 dev_err(hdev->dev, "DMA direction is undefined\n");
3614 return -EFAULT;
3615 }
3616
3617 if (parser->ctx_id != HL_KERNEL_ASID_ID) {
3618 if (sram_addr) {
3619 if (!hl_mem_area_inside_range(device_memory_addr,
3620 user_dma_pkt->tsize,
3621 hdev->asic_prop.sram_user_base_address,
3622 hdev->asic_prop.sram_end_address)) {
3623
3624 dev_err(hdev->dev,
3625 "SRAM address 0x%llx + 0x%x is invalid\n",
3626 device_memory_addr,
3627 user_dma_pkt->tsize);
3628 return -EFAULT;
3629 }
3630 } else {
3631 if (!hl_mem_area_inside_range(device_memory_addr,
3632 user_dma_pkt->tsize,
3633 hdev->asic_prop.dram_user_base_address,
3634 hdev->asic_prop.dram_end_address)) {
3635
3636 dev_err(hdev->dev,
3637 "DRAM address 0x%llx + 0x%x is invalid\n",
3638 device_memory_addr,
3639 user_dma_pkt->tsize);
3640 return -EFAULT;
3641 }
3642 }
3643 }
3644
3645 if (skip_host_mem_pin)
3646 parser->patched_cb_size += sizeof(*user_dma_pkt);
3647 else {
3648 if ((dir == DMA_TO_DEVICE) &&
3649 (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
3650 dev_err(hdev->dev,
3651 "Can't DMA from host on queue other then 1\n");
3652 return -EFAULT;
3653 }
3654
3655 rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3656 addr, dir);
3657 }
3658
3659 return rc;
3660}
3661
3662static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
3663 struct hl_cs_parser *parser,
3664 struct packet_lin_dma *user_dma_pkt)
3665{
3666 u64 sram_memory_addr, dram_memory_addr;
3667 enum goya_dma_direction user_dir;
3668
3669 user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3670 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3671
3672 if (user_dir == DMA_DRAM_TO_SRAM) {
3673 dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
3674 dram_memory_addr = user_dma_pkt->src_addr;
3675 sram_memory_addr = user_dma_pkt->dst_addr;
3676 } else {
3677 dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
3678 sram_memory_addr = user_dma_pkt->src_addr;
3679 dram_memory_addr = user_dma_pkt->dst_addr;
3680 }
3681
3682 if (!hl_mem_area_inside_range(sram_memory_addr, user_dma_pkt->tsize,
3683 hdev->asic_prop.sram_user_base_address,
3684 hdev->asic_prop.sram_end_address)) {
3685 dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
3686 sram_memory_addr, user_dma_pkt->tsize);
3687 return -EFAULT;
3688 }
3689
3690 if (!hl_mem_area_inside_range(dram_memory_addr, user_dma_pkt->tsize,
3691 hdev->asic_prop.dram_user_base_address,
3692 hdev->asic_prop.dram_end_address)) {
3693 dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
3694 dram_memory_addr, user_dma_pkt->tsize);
3695 return -EFAULT;
3696 }
3697
3698 parser->patched_cb_size += sizeof(*user_dma_pkt);
3699
3700 return 0;
3701}
3702
3703static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3704 struct hl_cs_parser *parser,
3705 struct packet_lin_dma *user_dma_pkt)
3706{
3707 enum goya_dma_direction user_dir;
3708 int rc;
3709
3710 dev_dbg(hdev->dev, "DMA packet details:\n");
3711 dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
3712 dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
3713 dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
3714
3715 user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3716 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3717
3718 /*
3719 * Special handling for DMA with size 0. The H/W has a bug where
3720 * this can cause the QMAN DMA to get stuck, so block it here.
3721 */
3722 if (user_dma_pkt->tsize == 0) {
3723 dev_err(hdev->dev,
3724 "Got DMA with size 0, might reset the device\n");
3725 return -EINVAL;
3726 }
3727
3728 if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
3729 rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
3730 else
3731 rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
3732
3733 return rc;
3734}
3735
3736static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
3737 struct hl_cs_parser *parser,
3738 struct packet_lin_dma *user_dma_pkt)
3739{
3740 dev_dbg(hdev->dev, "DMA packet details:\n");
3741 dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
3742 dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
3743 dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
3744
3745 /*
3746 * WA for HW-23.
3747 * We can't allow user to read from Host using QMANs other than 1.
3748 */
3749 if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
3750 hl_mem_area_inside_range(user_dma_pkt->src_addr,
3751 user_dma_pkt->tsize,
3752 hdev->asic_prop.va_space_host_start_address,
3753 hdev->asic_prop.va_space_host_end_address)) {
3754 dev_err(hdev->dev,
3755 "Can't DMA from host on queue other then 1\n");
3756 return -EFAULT;
3757 }
3758
3759 if (user_dma_pkt->tsize == 0) {
3760 dev_err(hdev->dev,
3761 "Got DMA with size 0, might reset the device\n");
3762 return -EINVAL;
3763 }
3764
3765 parser->patched_cb_size += sizeof(*user_dma_pkt);
3766
3767 return 0;
3768}
3769
3770static int goya_validate_wreg32(struct hl_device *hdev,
3771 struct hl_cs_parser *parser,
3772 struct packet_wreg32 *wreg_pkt)
3773{
3774 struct goya_device *goya = hdev->asic_specific;
3775 u32 sob_start_addr, sob_end_addr;
3776 u16 reg_offset;
3777
3778 reg_offset = wreg_pkt->ctl & GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
3779
3780 dev_dbg(hdev->dev, "WREG32 packet details:\n");
3781 dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
3782 dev_dbg(hdev->dev, "value == 0x%x\n", wreg_pkt->value);
3783
3784 if (reg_offset != (mmDMA_CH_1_WR_COMP_ADDR_LO & 0xFFFF)) {
3785 dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
3786 reg_offset);
3787 return -EPERM;
3788 }
3789
3790 /*
3791 * With MMU, DMA channels are not secured, so it doesn't matter where
3792 * the WR COMP will be written to because it will go out with
3793 * non-secured property
3794 */
3795 if (goya->hw_cap_initialized & HW_CAP_MMU)
3796 return 0;
3797
3798 sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
3799 sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
3800
3801 if ((wreg_pkt->value < sob_start_addr) ||
3802 (wreg_pkt->value > sob_end_addr)) {
3803
3804 dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
3805 wreg_pkt->value);
3806 return -EPERM;
3807 }
3808
3809 return 0;
3810}
3811
3812static int goya_validate_cb(struct hl_device *hdev,
3813 struct hl_cs_parser *parser, bool is_mmu)
3814{
3815 u32 cb_parsed_length = 0;
3816 int rc = 0;
3817
3818 parser->patched_cb_size = 0;
3819
3820 /* cb_user_size is more than 0 so loop will always be executed */
3821 while (cb_parsed_length < parser->user_cb_size) {
3822 enum packet_id pkt_id;
3823 u16 pkt_size;
3824 void *user_pkt;
3825
3826 user_pkt = (void *) (uintptr_t)
3827 (parser->user_cb->kernel_address + cb_parsed_length);
3828
3829 pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
3830 PACKET_HEADER_PACKET_ID_MASK) >>
3831 PACKET_HEADER_PACKET_ID_SHIFT);
3832
3833 pkt_size = goya_packet_sizes[pkt_id];
3834 cb_parsed_length += pkt_size;
3835 if (cb_parsed_length > parser->user_cb_size) {
3836 dev_err(hdev->dev,
3837 "packet 0x%x is out of CB boundary\n", pkt_id);
3838 rc = -EINVAL;
3839 break;
3840 }
3841
3842 switch (pkt_id) {
3843 case PACKET_WREG_32:
3844 /*
3845 * Although it is validated after copy in patch_cb(),
3846 * need to validate here as well because patch_cb() is
3847 * not called in MMU path while this function is called
3848 */
3849 rc = goya_validate_wreg32(hdev, parser, user_pkt);
3850 break;
3851
3852 case PACKET_WREG_BULK:
3853 dev_err(hdev->dev,
3854 "User not allowed to use WREG_BULK\n");
3855 rc = -EPERM;
3856 break;
3857
3858 case PACKET_MSG_PROT:
3859 dev_err(hdev->dev,
3860 "User not allowed to use MSG_PROT\n");
3861 rc = -EPERM;
3862 break;
3863
3864 case PACKET_CP_DMA:
3865 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3866 rc = -EPERM;
3867 break;
3868
3869 case PACKET_STOP:
3870 dev_err(hdev->dev, "User not allowed to use STOP\n");
3871 rc = -EPERM;
3872 break;
3873
3874 case PACKET_LIN_DMA:
3875 if (is_mmu)
3876 rc = goya_validate_dma_pkt_mmu(hdev, parser,
3877 user_pkt);
3878 else
3879 rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
3880 user_pkt);
3881 break;
3882
3883 case PACKET_MSG_LONG:
3884 case PACKET_MSG_SHORT:
3885 case PACKET_FENCE:
3886 case PACKET_NOP:
3887 parser->patched_cb_size += pkt_size;
3888 break;
3889
3890 default:
3891 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3892 pkt_id);
3893 rc = -EINVAL;
3894 break;
3895 }
3896
3897 if (rc)
3898 break;
3899 }
3900
3901 /*
3902 * The new CB should have space at the end for two MSG_PROT packets:
3903 * 1. A packet that will act as a completion packet
3904 * 2. A packet that will generate MSI-X interrupt
3905 */
3906 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3907
3908 return rc;
3909}
3910
3911static int goya_patch_dma_packet(struct hl_device *hdev,
3912 struct hl_cs_parser *parser,
3913 struct packet_lin_dma *user_dma_pkt,
3914 struct packet_lin_dma *new_dma_pkt,
3915 u32 *new_dma_pkt_size)
3916{
3917 struct hl_userptr *userptr;
3918 struct scatterlist *sg, *sg_next_iter;
Oded Gabbaye99f16832019-02-24 11:55:26 +02003919 u32 count, dma_desc_cnt;
3920 u64 len, len_next;
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02003921 dma_addr_t dma_addr, dma_addr_next;
3922 enum goya_dma_direction user_dir;
3923 u64 device_memory_addr, addr;
3924 enum dma_data_direction dir;
3925 struct sg_table *sgt;
3926 bool skip_host_mem_pin = false;
3927 bool user_memset;
3928 u32 user_rdcomp_mask, user_wrcomp_mask;
3929
3930 user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
3931 GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
3932
3933 user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3934 GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3935
3936 if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
3937 (user_dma_pkt->tsize == 0)) {
3938 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
3939 *new_dma_pkt_size = sizeof(*new_dma_pkt);
3940 return 0;
3941 }
3942
3943 if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
3944 addr = user_dma_pkt->src_addr;
3945 device_memory_addr = user_dma_pkt->dst_addr;
3946 dir = DMA_TO_DEVICE;
3947 if (user_memset)
3948 skip_host_mem_pin = true;
3949 } else {
3950 addr = user_dma_pkt->dst_addr;
3951 device_memory_addr = user_dma_pkt->src_addr;
3952 dir = DMA_FROM_DEVICE;
3953 }
3954
3955 if ((!skip_host_mem_pin) &&
3956 (hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
3957 parser->job_userptr_list, &userptr) == false)) {
3958 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3959 addr, user_dma_pkt->tsize);
3960 return -EFAULT;
3961 }
3962
3963 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3964 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3965 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3966 return 0;
3967 }
3968
3969 user_rdcomp_mask =
3970 (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK);
3971
3972 user_wrcomp_mask =
3973 (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
3974
3975 sgt = userptr->sgt;
3976 dma_desc_cnt = 0;
3977
3978 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3979 len = sg_dma_len(sg);
3980 dma_addr = sg_dma_address(sg);
3981
3982 if (len == 0)
3983 break;
3984
3985 while ((count + 1) < sgt->nents) {
3986 sg_next_iter = sg_next(sg);
3987 len_next = sg_dma_len(sg_next_iter);
3988 dma_addr_next = sg_dma_address(sg_next_iter);
3989
3990 if (len_next == 0)
3991 break;
3992
3993 if ((dma_addr + len == dma_addr_next) &&
3994 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3995 len += len_next;
3996 count++;
3997 sg = sg_next_iter;
3998 } else {
3999 break;
4000 }
4001 }
4002
4003 new_dma_pkt->ctl = user_dma_pkt->ctl;
4004 if (likely(dma_desc_cnt))
4005 new_dma_pkt->ctl &= ~GOYA_PKT_CTL_EB_MASK;
4006 new_dma_pkt->ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
4007 GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
4008 new_dma_pkt->tsize = len;
4009
4010 dma_addr += hdev->asic_prop.host_phys_base_address;
4011
4012 if (dir == DMA_TO_DEVICE) {
4013 new_dma_pkt->src_addr = dma_addr;
4014 new_dma_pkt->dst_addr = device_memory_addr;
4015 } else {
4016 new_dma_pkt->src_addr = device_memory_addr;
4017 new_dma_pkt->dst_addr = dma_addr;
4018 }
4019
4020 if (!user_memset)
4021 device_memory_addr += len;
4022 dma_desc_cnt++;
4023 new_dma_pkt++;
4024 }
4025
4026 if (!dma_desc_cnt) {
4027 dev_err(hdev->dev,
4028 "Error of 0 SG entries when patching DMA packet\n");
4029 return -EFAULT;
4030 }
4031
4032 /* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
4033 new_dma_pkt--;
4034 new_dma_pkt->ctl |= (user_rdcomp_mask | user_wrcomp_mask);
4035
4036 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4037
4038 return 0;
4039}
4040
4041static int goya_patch_cb(struct hl_device *hdev,
4042 struct hl_cs_parser *parser)
4043{
4044 u32 cb_parsed_length = 0;
4045 u32 cb_patched_cur_length = 0;
4046 int rc = 0;
4047
4048 /* cb_user_size is more than 0 so loop will always be executed */
4049 while (cb_parsed_length < parser->user_cb_size) {
4050 enum packet_id pkt_id;
4051 u16 pkt_size;
4052 u32 new_pkt_size = 0;
4053 void *user_pkt, *kernel_pkt;
4054
4055 user_pkt = (void *) (uintptr_t)
4056 (parser->user_cb->kernel_address + cb_parsed_length);
4057 kernel_pkt = (void *) (uintptr_t)
4058 (parser->patched_cb->kernel_address +
4059 cb_patched_cur_length);
4060
4061 pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
4062 PACKET_HEADER_PACKET_ID_MASK) >>
4063 PACKET_HEADER_PACKET_ID_SHIFT);
4064
4065 pkt_size = goya_packet_sizes[pkt_id];
4066 cb_parsed_length += pkt_size;
4067 if (cb_parsed_length > parser->user_cb_size) {
4068 dev_err(hdev->dev,
4069 "packet 0x%x is out of CB boundary\n", pkt_id);
4070 rc = -EINVAL;
4071 break;
4072 }
4073
4074 switch (pkt_id) {
4075 case PACKET_LIN_DMA:
4076 rc = goya_patch_dma_packet(hdev, parser, user_pkt,
4077 kernel_pkt, &new_pkt_size);
4078 cb_patched_cur_length += new_pkt_size;
4079 break;
4080
4081 case PACKET_WREG_32:
4082 memcpy(kernel_pkt, user_pkt, pkt_size);
4083 cb_patched_cur_length += pkt_size;
4084 rc = goya_validate_wreg32(hdev, parser, kernel_pkt);
4085 break;
4086
4087 case PACKET_WREG_BULK:
4088 dev_err(hdev->dev,
4089 "User not allowed to use WREG_BULK\n");
4090 rc = -EPERM;
4091 break;
4092
4093 case PACKET_MSG_PROT:
4094 dev_err(hdev->dev,
4095 "User not allowed to use MSG_PROT\n");
4096 rc = -EPERM;
4097 break;
4098
4099 case PACKET_CP_DMA:
4100 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4101 rc = -EPERM;
4102 break;
4103
4104 case PACKET_STOP:
4105 dev_err(hdev->dev, "User not allowed to use STOP\n");
4106 rc = -EPERM;
4107 break;
4108
4109 case PACKET_MSG_LONG:
4110 case PACKET_MSG_SHORT:
4111 case PACKET_FENCE:
4112 case PACKET_NOP:
4113 memcpy(kernel_pkt, user_pkt, pkt_size);
4114 cb_patched_cur_length += pkt_size;
4115 break;
4116
4117 default:
4118 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4119 pkt_id);
4120 rc = -EINVAL;
4121 break;
4122 }
4123
4124 if (rc)
4125 break;
4126 }
4127
4128 return rc;
4129}
4130
4131static int goya_parse_cb_mmu(struct hl_device *hdev,
4132 struct hl_cs_parser *parser)
4133{
4134 u64 patched_cb_handle;
4135 u32 patched_cb_size;
4136 struct hl_cb *user_cb;
4137 int rc;
4138
4139 /*
4140 * The new CB should have space at the end for two MSG_PROT pkt:
4141 * 1. A packet that will act as a completion packet
4142 * 2. A packet that will generate MSI-X interrupt
4143 */
4144 parser->patched_cb_size = parser->user_cb_size +
4145 sizeof(struct packet_msg_prot) * 2;
4146
4147 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4148 parser->patched_cb_size,
4149 &patched_cb_handle, HL_KERNEL_ASID_ID);
4150
4151 if (rc) {
4152 dev_err(hdev->dev,
4153 "Failed to allocate patched CB for DMA CS %d\n",
4154 rc);
4155 return rc;
4156 }
4157
4158 patched_cb_handle >>= PAGE_SHIFT;
4159 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4160 (u32) patched_cb_handle);
4161 /* hl_cb_get should never fail here so use kernel WARN */
4162 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4163 (u32) patched_cb_handle);
4164 if (!parser->patched_cb) {
4165 rc = -EFAULT;
4166 goto out;
4167 }
4168
4169 /*
4170 * The check that parser->user_cb_size <= parser->user_cb->size was done
4171 * in validate_queue_index().
4172 */
4173 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4174 (void *) (uintptr_t) parser->user_cb->kernel_address,
4175 parser->user_cb_size);
4176
4177 patched_cb_size = parser->patched_cb_size;
4178
4179 /* validate patched CB instead of user CB */
4180 user_cb = parser->user_cb;
4181 parser->user_cb = parser->patched_cb;
4182 rc = goya_validate_cb(hdev, parser, true);
4183 parser->user_cb = user_cb;
4184
4185 if (rc) {
4186 hl_cb_put(parser->patched_cb);
4187 goto out;
4188 }
4189
4190 if (patched_cb_size != parser->patched_cb_size) {
4191 dev_err(hdev->dev, "user CB size mismatch\n");
4192 hl_cb_put(parser->patched_cb);
4193 rc = -EINVAL;
4194 goto out;
4195 }
4196
4197out:
4198 /*
4199 * Always call cb destroy here because we still have 1 reference
4200 * to it by calling cb_get earlier. After the job will be completed,
4201 * cb_put will release it, but here we want to remove it from the
4202 * idr
4203 */
4204 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4205 patched_cb_handle << PAGE_SHIFT);
4206
4207 return rc;
4208}
4209
4210int goya_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser)
4211{
4212 u64 patched_cb_handle;
4213 int rc;
4214
4215 rc = goya_validate_cb(hdev, parser, false);
4216
4217 if (rc)
4218 goto free_userptr;
4219
4220 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4221 parser->patched_cb_size,
4222 &patched_cb_handle, HL_KERNEL_ASID_ID);
4223 if (rc) {
4224 dev_err(hdev->dev,
4225 "Failed to allocate patched CB for DMA CS %d\n", rc);
4226 goto free_userptr;
4227 }
4228
4229 patched_cb_handle >>= PAGE_SHIFT;
4230 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4231 (u32) patched_cb_handle);
4232 /* hl_cb_get should never fail here so use kernel WARN */
4233 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4234 (u32) patched_cb_handle);
4235 if (!parser->patched_cb) {
4236 rc = -EFAULT;
4237 goto out;
4238 }
4239
4240 rc = goya_patch_cb(hdev, parser);
4241
4242 if (rc)
4243 hl_cb_put(parser->patched_cb);
4244
4245out:
4246 /*
4247 * Always call cb destroy here because we still have 1 reference
4248 * to it by calling cb_get earlier. After the job will be completed,
4249 * cb_put will release it, but here we want to remove it from the
4250 * idr
4251 */
4252 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4253 patched_cb_handle << PAGE_SHIFT);
4254
4255free_userptr:
4256 if (rc)
4257 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4258 return rc;
4259}
4260
4261int goya_parse_cb_no_ext_quque(struct hl_device *hdev,
4262 struct hl_cs_parser *parser)
4263{
4264 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4265 struct goya_device *goya = hdev->asic_specific;
4266
4267 if (!(goya->hw_cap_initialized & HW_CAP_MMU)) {
4268 /* For internal queue jobs, just check if cb address is valid */
4269 if (hl_mem_area_inside_range(
4270 (u64) (uintptr_t) parser->user_cb,
4271 parser->user_cb_size,
4272 asic_prop->sram_user_base_address,
4273 asic_prop->sram_end_address))
4274 return 0;
4275
4276 if (hl_mem_area_inside_range(
4277 (u64) (uintptr_t) parser->user_cb,
4278 parser->user_cb_size,
4279 asic_prop->dram_user_base_address,
4280 asic_prop->dram_end_address))
4281 return 0;
4282
4283 dev_err(hdev->dev,
4284 "Internal CB address 0x%llx + 0x%x is not in SRAM nor in DRAM\n",
4285 (u64) (uintptr_t) parser->user_cb,
4286 parser->user_cb_size);
4287
4288 return -EFAULT;
4289 }
4290
4291 return 0;
4292}
4293
4294int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4295{
4296 struct goya_device *goya = hdev->asic_specific;
4297
4298 if (!parser->ext_queue)
4299 return goya_parse_cb_no_ext_quque(hdev, parser);
4300
4301 if ((goya->hw_cap_initialized & HW_CAP_MMU) && parser->use_virt_addr)
4302 return goya_parse_cb_mmu(hdev, parser);
4303 else
4304 return goya_parse_cb_no_mmu(hdev, parser);
4305}
4306
4307void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
4308 u32 cq_val, u32 msix_vec)
4309{
4310 struct packet_msg_prot *cq_pkt;
4311
4312 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4313 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4314
4315 cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4316 (1 << GOYA_PKT_CTL_EB_SHIFT) |
4317 (1 << GOYA_PKT_CTL_MB_SHIFT);
4318 cq_pkt->value = cq_val;
4319 cq_pkt->addr = cq_addr;
4320
4321 cq_pkt++;
4322
4323 cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
4324 (1 << GOYA_PKT_CTL_MB_SHIFT);
4325 cq_pkt->value = msix_vec & 0x7FF;
4326 cq_pkt->addr = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
4327}
4328
Oded Gabbay1251f232019-02-16 00:39:18 +02004329static void goya_update_eq_ci(struct hl_device *hdev, u32 val)
4330{
4331 WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
4332}
4333
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02004334int goya_context_switch(struct hl_device *hdev, u32 asid)
4335{
4336 struct asic_fixed_properties *prop = &hdev->asic_prop;
4337 struct packet_lin_dma *clear_sram_pkt;
4338 struct hl_cs_parser parser;
4339 struct hl_cs_job *job;
4340 u32 cb_size;
4341 struct hl_cb *cb;
4342 int rc;
4343
4344 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4345 if (!cb)
4346 return -EFAULT;
4347
4348 clear_sram_pkt = (struct packet_lin_dma *)
4349 (uintptr_t) cb->kernel_address;
4350
4351 memset(clear_sram_pkt, 0, sizeof(*clear_sram_pkt));
4352 cb_size = sizeof(*clear_sram_pkt);
4353
4354 clear_sram_pkt->ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4355 (DMA_HOST_TO_SRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) |
4356 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4357 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4358 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4359 (1 << GOYA_PKT_CTL_MB_SHIFT));
4360
4361 clear_sram_pkt->src_addr = 0x7777777777777777ull;
4362 clear_sram_pkt->dst_addr = prop->sram_base_address;
4363 if (hdev->pldm)
4364 clear_sram_pkt->tsize = 0x10000;
4365 else
4366 clear_sram_pkt->tsize = prop->sram_size;
4367
4368 job = hl_cs_allocate_job(hdev, true);
4369 if (!job) {
4370 dev_err(hdev->dev, "Failed to allocate a new job\n");
4371 rc = -ENOMEM;
4372 goto release_cb;
4373 }
4374
4375 job->id = 0;
4376 job->user_cb = cb;
4377 job->user_cb->cs_cnt++;
4378 job->user_cb_size = cb_size;
4379 job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4380
Oded Gabbayc2164772019-02-16 00:39:24 +02004381 hl_debugfs_add_job(hdev, job);
4382
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02004383 parser.ctx_id = HL_KERNEL_ASID_ID;
4384 parser.cs_sequence = 0;
4385 parser.job_id = job->id;
4386 parser.hw_queue_id = job->hw_queue_id;
4387 parser.job_userptr_list = &job->userptr_list;
4388 parser.user_cb = job->user_cb;
4389 parser.user_cb_size = job->user_cb_size;
4390 parser.ext_queue = job->ext_queue;
4391 parser.use_virt_addr = hdev->mmu_enable;
4392
4393 rc = hdev->asic_funcs->cs_parser(hdev, &parser);
4394 if (rc) {
4395 dev_err(hdev->dev,
4396 "Failed to parse kernel CB during context switch\n");
4397 goto free_job;
4398 }
4399
4400 job->patched_cb = parser.patched_cb;
4401 job->job_cb_size = parser.patched_cb_size;
4402 job->patched_cb->cs_cnt++;
4403
4404 rc = goya_send_job_on_qman0(hdev, job);
4405
Omer Shpigelman0feaf862019-02-16 00:39:22 +02004406 /* no point in setting the asid in case of failure */
4407 if (!rc)
4408 goya_mmu_prepare(hdev, asid);
4409
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02004410 job->patched_cb->cs_cnt--;
4411 hl_cb_put(job->patched_cb);
4412
4413free_job:
4414 hl_userptr_delete_list(hdev, &job->userptr_list);
Oded Gabbayc2164772019-02-16 00:39:24 +02004415 hl_debugfs_remove_job(hdev, job);
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02004416 kfree(job);
4417 cb->cs_cnt--;
4418
4419release_cb:
4420 hl_cb_put(cb);
4421 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4422
4423 return rc;
4424}
4425
4426void goya_restore_phase_topology(struct hl_device *hdev)
4427{
4428 int i, num_of_sob_in_longs, num_of_mon_in_longs;
4429
4430 num_of_sob_in_longs =
4431 ((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
4432
4433 num_of_mon_in_longs =
4434 ((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
4435
4436 for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
4437 WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
4438
4439 for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
4440 WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
4441
4442 /* Flush all WREG to prevent race */
4443 i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
4444}
4445
Oded Gabbayc2164772019-02-16 00:39:24 +02004446/*
4447 * goya_debugfs_read32 - read a 32bit value from a given device address
4448 *
4449 * @hdev: pointer to hl_device structure
4450 * @addr: address in device
4451 * @val: returned value
4452 *
4453 * In case of DDR address that is not mapped into the default aperture that
4454 * the DDR bar exposes, the function will configure the iATU so that the DDR
4455 * bar will be positioned at a base address that allows reading from the
4456 * required address. Configuring the iATU during normal operation can
4457 * lead to undefined behavior and therefore, should be done with extreme care
4458 *
4459 */
4460int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4461{
4462 struct asic_fixed_properties *prop = &hdev->asic_prop;
4463 int rc = 0;
4464
4465 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4466 *val = RREG32(addr - CFG_BASE);
4467
4468 } else if ((addr >= SRAM_BASE_ADDR) &&
4469 (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4470
4471 *val = readl(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4472 (addr - SRAM_BASE_ADDR));
4473
4474 } else if ((addr >= DRAM_PHYS_BASE) &&
4475 (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4476
4477 u64 bar_base_addr = DRAM_PHYS_BASE +
4478 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4479
4480 rc = goya_set_ddr_bar_base(hdev, bar_base_addr);
4481 if (!rc) {
4482 *val = readl(hdev->pcie_bar[DDR_BAR_ID] +
4483 (addr - bar_base_addr));
4484
4485 rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
4486 (MMU_PAGE_TABLES_ADDR &
4487 ~(prop->dram_pci_bar_size - 0x1ull)));
4488 }
4489 } else {
4490 rc = -EFAULT;
4491 }
4492
4493 return rc;
4494}
4495
4496/*
4497 * goya_debugfs_write32 - write a 32bit value to a given device address
4498 *
4499 * @hdev: pointer to hl_device structure
4500 * @addr: address in device
4501 * @val: returned value
4502 *
4503 * In case of DDR address that is not mapped into the default aperture that
4504 * the DDR bar exposes, the function will configure the iATU so that the DDR
4505 * bar will be positioned at a base address that allows writing to the
4506 * required address. Configuring the iATU during normal operation can
4507 * lead to undefined behavior and therefore, should be done with extreme care
4508 *
4509 */
4510int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4511{
4512 struct asic_fixed_properties *prop = &hdev->asic_prop;
4513 int rc = 0;
4514
4515 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4516 WREG32(addr - CFG_BASE, val);
4517
4518 } else if ((addr >= SRAM_BASE_ADDR) &&
4519 (addr < SRAM_BASE_ADDR + SRAM_SIZE)) {
4520
4521 writel(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
4522 (addr - SRAM_BASE_ADDR));
4523
4524 } else if ((addr >= DRAM_PHYS_BASE) &&
4525 (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size)) {
4526
4527 u64 bar_base_addr = DRAM_PHYS_BASE +
4528 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4529
4530 rc = goya_set_ddr_bar_base(hdev, bar_base_addr);
4531 if (!rc) {
4532 writel(val, hdev->pcie_bar[DDR_BAR_ID] +
4533 (addr - bar_base_addr));
4534
4535 rc = goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
4536 (MMU_PAGE_TABLES_ADDR &
4537 ~(prop->dram_pci_bar_size - 0x1ull)));
4538 }
4539 } else {
4540 rc = -EFAULT;
4541 }
4542
4543 return rc;
4544}
4545
Omer Shpigelman0feaf862019-02-16 00:39:22 +02004546static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
4547{
4548 struct goya_device *goya = hdev->asic_specific;
4549
4550 return readq(hdev->pcie_bar[DDR_BAR_ID] +
4551 (addr - goya->ddr_bar_cur_addr));
4552}
4553
4554static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4555{
4556 struct goya_device *goya = hdev->asic_specific;
4557
4558 writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
4559 (addr - goya->ddr_bar_cur_addr));
4560}
4561
Oded Gabbay1251f232019-02-16 00:39:18 +02004562static void goya_get_axi_name(struct hl_device *hdev, u32 agent_id,
4563 u16 event_type, char *axi_name, int len)
4564{
4565 if (!strcmp(goya_axi_name[agent_id], "DMA"))
4566 if (event_type >= GOYA_ASYNC_EVENT_ID_DMA0_CH)
4567 snprintf(axi_name, len, "DMA %d",
4568 event_type - GOYA_ASYNC_EVENT_ID_DMA0_CH);
4569 else
4570 snprintf(axi_name, len, "DMA %d",
4571 event_type - GOYA_ASYNC_EVENT_ID_DMA0_QM);
4572 else
4573 snprintf(axi_name, len, "%s", goya_axi_name[agent_id]);
4574}
4575
4576static void goya_print_razwi_info(struct hl_device *hdev, u64 reg,
4577 bool is_hbw, bool is_read, u16 event_type)
4578{
4579 u32 val, agent_id;
4580 char axi_name[10] = {0};
4581
4582 val = RREG32(reg);
4583
4584 if (is_hbw)
4585 agent_id = (val & GOYA_IRQ_HBW_AGENT_ID_MASK) >>
4586 GOYA_IRQ_HBW_AGENT_ID_SHIFT;
4587 else
4588 agent_id = (val & GOYA_IRQ_LBW_AGENT_ID_MASK) >>
4589 GOYA_IRQ_LBW_AGENT_ID_SHIFT;
4590
4591 if (agent_id >= GOYA_MAX_INITIATORS) {
4592 dev_err(hdev->dev,
4593 "Illegal %s %s with wrong initiator id %d, H/W IRQ %d\n",
4594 is_read ? "read from" : "write to",
4595 is_hbw ? "HBW" : "LBW",
4596 agent_id,
4597 event_type);
4598 } else {
4599 goya_get_axi_name(hdev, agent_id, event_type, axi_name,
4600 sizeof(axi_name));
4601 dev_err(hdev->dev, "Illegal %s by %s %s %s, H/W IRQ %d\n",
4602 is_read ? "read" : "write",
4603 axi_name,
4604 is_read ? "from" : "to",
4605 is_hbw ? "HBW" : "LBW",
4606 event_type);
4607 }
4608}
4609
4610static void goya_print_irq_info(struct hl_device *hdev, u16 event_type)
4611{
4612 struct goya_device *goya = hdev->asic_specific;
4613 bool is_hbw = false, is_read = false, is_info = false;
4614
4615 if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
4616 goya_print_razwi_info(hdev, mmDMA_MACRO_RAZWI_LBW_WT_ID, is_hbw,
4617 is_read, event_type);
4618 WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
4619 is_info = true;
4620 }
4621 if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
4622 is_read = true;
4623 goya_print_razwi_info(hdev, mmDMA_MACRO_RAZWI_LBW_RD_ID, is_hbw,
4624 is_read, event_type);
4625 WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
4626 is_info = true;
4627 }
4628 if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
4629 is_hbw = true;
4630 goya_print_razwi_info(hdev, mmDMA_MACRO_RAZWI_HBW_WT_ID, is_hbw,
4631 is_read, event_type);
4632 WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
4633 is_info = true;
4634 }
4635 if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
4636 is_hbw = true;
4637 is_read = true;
4638 goya_print_razwi_info(hdev, mmDMA_MACRO_RAZWI_HBW_RD_ID, is_hbw,
4639 is_read, event_type);
4640 WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
4641 is_info = true;
4642 }
4643 if (!is_info) {
4644 dev_err(hdev->dev,
4645 "Received H/W interrupt %d, no additional info\n",
4646 event_type);
4647 return;
4648 }
4649
4650 if (goya->hw_cap_initialized & HW_CAP_MMU) {
4651 u32 val = RREG32(mmMMU_PAGE_ERROR_CAPTURE);
4652 u64 addr;
4653
4654 if (val & MMU_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
4655 addr = val & MMU_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
4656 addr <<= 32;
4657 addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);
4658
4659 dev_err(hdev->dev, "MMU page fault on va 0x%llx\n",
4660 addr);
4661
4662 WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
4663 }
4664 }
4665}
4666
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02004667static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
4668 size_t irq_arr_size)
4669{
4670 struct armcp_unmask_irq_arr_packet *pkt;
4671 size_t total_pkt_size;
4672 long result;
4673 int rc;
4674
4675 total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
4676 irq_arr_size;
4677
4678 /* data should be aligned to 8 bytes in order to ArmCP to copy it */
4679 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
4680
4681 /* total_pkt_size is casted to u16 later on */
4682 if (total_pkt_size > USHRT_MAX) {
4683 dev_err(hdev->dev, "too many elements in IRQ array\n");
4684 return -EINVAL;
4685 }
4686
4687 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
4688 if (!pkt)
4689 return -ENOMEM;
4690
4691 pkt->length = irq_arr_size / sizeof(irq_arr[0]);
4692 memcpy(&pkt->irqs, irq_arr, irq_arr_size);
4693
4694 pkt->armcp_pkt.ctl = ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
4695 ARMCP_PKT_CTL_OPCODE_SHIFT;
4696
4697 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
4698 total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
4699
4700 if (rc)
4701 dev_err(hdev->dev, "failed to unmask IRQ array\n");
4702
4703 kfree(pkt);
4704
4705 return rc;
4706}
4707
4708static int goya_soft_reset_late_init(struct hl_device *hdev)
4709{
4710 /*
4711 * Unmask all IRQs since some could have been received
4712 * during the soft reset
4713 */
4714 return goya_unmask_irq_arr(hdev, goya_non_fatal_events,
4715 sizeof(goya_non_fatal_events));
4716}
4717
Oded Gabbay1251f232019-02-16 00:39:18 +02004718static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
4719{
4720 struct armcp_packet pkt;
4721 long result;
4722 int rc;
4723
4724 memset(&pkt, 0, sizeof(pkt));
4725
4726 pkt.ctl = ARMCP_PACKET_UNMASK_RAZWI_IRQ << ARMCP_PKT_CTL_OPCODE_SHIFT;
4727 pkt.value = event_type;
4728
4729 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
4730 HL_DEVICE_TIMEOUT_USEC, &result);
4731
4732 if (rc)
4733 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
4734
4735 return rc;
4736}
4737
4738void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
4739{
4740 u16 event_type = ((eq_entry->hdr.ctl & EQ_CTL_EVENT_TYPE_MASK)
4741 >> EQ_CTL_EVENT_TYPE_SHIFT);
4742 struct goya_device *goya = hdev->asic_specific;
4743
4744 goya->events_stat[event_type]++;
4745
4746 switch (event_type) {
4747 case GOYA_ASYNC_EVENT_ID_PCIE_IF:
4748 case GOYA_ASYNC_EVENT_ID_TPC0_ECC:
4749 case GOYA_ASYNC_EVENT_ID_TPC1_ECC:
4750 case GOYA_ASYNC_EVENT_ID_TPC2_ECC:
4751 case GOYA_ASYNC_EVENT_ID_TPC3_ECC:
4752 case GOYA_ASYNC_EVENT_ID_TPC4_ECC:
4753 case GOYA_ASYNC_EVENT_ID_TPC5_ECC:
4754 case GOYA_ASYNC_EVENT_ID_TPC6_ECC:
4755 case GOYA_ASYNC_EVENT_ID_TPC7_ECC:
4756 case GOYA_ASYNC_EVENT_ID_MME_ECC:
4757 case GOYA_ASYNC_EVENT_ID_MME_ECC_EXT:
4758 case GOYA_ASYNC_EVENT_ID_MMU_ECC:
4759 case GOYA_ASYNC_EVENT_ID_DMA_MACRO:
4760 case GOYA_ASYNC_EVENT_ID_DMA_ECC:
4761 case GOYA_ASYNC_EVENT_ID_CPU_IF_ECC:
4762 case GOYA_ASYNC_EVENT_ID_PSOC_MEM:
4763 case GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT:
4764 case GOYA_ASYNC_EVENT_ID_SRAM0 ... GOYA_ASYNC_EVENT_ID_SRAM29:
4765 case GOYA_ASYNC_EVENT_ID_GIC500:
4766 case GOYA_ASYNC_EVENT_ID_PLL0:
4767 case GOYA_ASYNC_EVENT_ID_PLL1:
4768 case GOYA_ASYNC_EVENT_ID_PLL3:
4769 case GOYA_ASYNC_EVENT_ID_PLL4:
4770 case GOYA_ASYNC_EVENT_ID_PLL5:
4771 case GOYA_ASYNC_EVENT_ID_PLL6:
4772 case GOYA_ASYNC_EVENT_ID_AXI_ECC:
4773 case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
4774 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
4775 case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
4776 dev_err(hdev->dev,
4777 "Received H/W interrupt %d, reset the chip\n",
4778 event_type);
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02004779 hl_device_reset(hdev, true, false);
Oded Gabbay1251f232019-02-16 00:39:18 +02004780 break;
4781
4782 case GOYA_ASYNC_EVENT_ID_PCIE_DEC:
4783 case GOYA_ASYNC_EVENT_ID_TPC0_DEC:
4784 case GOYA_ASYNC_EVENT_ID_TPC1_DEC:
4785 case GOYA_ASYNC_EVENT_ID_TPC2_DEC:
4786 case GOYA_ASYNC_EVENT_ID_TPC3_DEC:
4787 case GOYA_ASYNC_EVENT_ID_TPC4_DEC:
4788 case GOYA_ASYNC_EVENT_ID_TPC5_DEC:
4789 case GOYA_ASYNC_EVENT_ID_TPC6_DEC:
4790 case GOYA_ASYNC_EVENT_ID_TPC7_DEC:
4791 case GOYA_ASYNC_EVENT_ID_MME_WACS:
4792 case GOYA_ASYNC_EVENT_ID_MME_WACSD:
4793 case GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER:
4794 case GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC:
4795 case GOYA_ASYNC_EVENT_ID_PSOC:
4796 case GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR:
4797 case GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR:
4798 case GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR:
4799 case GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR:
4800 case GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR:
4801 case GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR:
4802 case GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR:
4803 case GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR:
4804 case GOYA_ASYNC_EVENT_ID_TPC0_CMDQ ... GOYA_ASYNC_EVENT_ID_TPC7_QM:
4805 case GOYA_ASYNC_EVENT_ID_MME_QM:
4806 case GOYA_ASYNC_EVENT_ID_MME_CMDQ:
4807 case GOYA_ASYNC_EVENT_ID_DMA0_QM ... GOYA_ASYNC_EVENT_ID_DMA4_QM:
4808 case GOYA_ASYNC_EVENT_ID_DMA0_CH ... GOYA_ASYNC_EVENT_ID_DMA4_CH:
4809 goya_print_irq_info(hdev, event_type);
4810 goya_unmask_irq(hdev, event_type);
4811 break;
4812
4813 case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
4814 case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
4815 case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
4816 case GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU:
4817 case GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU:
4818 case GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU:
4819 case GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU:
4820 case GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU:
4821 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0:
4822 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH1:
4823 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH2:
4824 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH3:
4825 case GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
4826 dev_info(hdev->dev, "Received H/W interrupt %d\n", event_type);
4827 break;
4828
4829 default:
4830 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
4831 event_type);
4832 break;
4833 }
4834}
4835
4836void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
4837{
4838 struct goya_device *goya = hdev->asic_specific;
4839
4840 *size = (u32) sizeof(goya->events_stat);
4841
4842 return goya->events_stat;
4843}
4844
Omer Shpigelman0feaf862019-02-16 00:39:22 +02004845static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
4846{
4847 struct asic_fixed_properties *prop = &hdev->asic_prop;
4848 struct goya_device *goya = hdev->asic_specific;
4849 struct packet_lin_dma *clear_pgt_range_pkt;
4850 struct hl_cs_parser parser;
4851 struct hl_cs_job *job;
4852 u32 cb_size;
4853 struct hl_cb *cb;
4854 int rc;
4855
4856 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4857 return 0;
4858
4859 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4860 if (!cb)
4861 return -EFAULT;
4862
4863 clear_pgt_range_pkt = (struct packet_lin_dma *)
4864 (uintptr_t) cb->kernel_address;
4865
4866 memset(clear_pgt_range_pkt, 0, sizeof(*clear_pgt_range_pkt));
4867 cb_size = sizeof(*clear_pgt_range_pkt);
4868
4869 clear_pgt_range_pkt->ctl =
4870 ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
4871 (DMA_HOST_TO_DRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) |
4872 (1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4873 (1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
4874 (1 << GOYA_PKT_CTL_RB_SHIFT) |
4875 (1 << GOYA_PKT_CTL_MB_SHIFT));
4876
4877 clear_pgt_range_pkt->src_addr = 0;
4878 clear_pgt_range_pkt->dst_addr = prop->mmu_pgt_addr;
4879 clear_pgt_range_pkt->tsize = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4880
4881 job = hl_cs_allocate_job(hdev, true);
4882 if (!job) {
4883 dev_err(hdev->dev, "Failed to allocate a new job\n");
4884 rc = -ENOMEM;
4885 goto release_cb;
4886 }
4887
4888 job->id = 0;
4889 job->user_cb = cb;
4890 job->user_cb->cs_cnt++;
4891 job->user_cb_size = cb_size;
4892 job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
4893
Oded Gabbayc2164772019-02-16 00:39:24 +02004894 hl_debugfs_add_job(hdev, job);
4895
Omer Shpigelman0feaf862019-02-16 00:39:22 +02004896 parser.ctx_id = HL_KERNEL_ASID_ID;
4897 parser.cs_sequence = 0;
4898 parser.job_id = job->id;
4899 parser.hw_queue_id = job->hw_queue_id;
4900 parser.job_userptr_list = &job->userptr_list;
4901 parser.user_cb = job->user_cb;
4902 parser.user_cb_size = job->user_cb_size;
4903 parser.ext_queue = job->ext_queue;
4904 parser.use_virt_addr = hdev->mmu_enable;
4905
4906 rc = hdev->asic_funcs->cs_parser(hdev, &parser);
4907 if (rc) {
4908 dev_err(hdev->dev,
4909 "Failed to parse kernel CB when clearing pgt\n");
4910 goto free_job;
4911 }
4912
4913 job->patched_cb = parser.patched_cb;
4914 job->job_cb_size = parser.patched_cb_size;
4915 job->patched_cb->cs_cnt++;
4916
4917 rc = goya_send_job_on_qman0(hdev, job);
4918
4919 job->patched_cb->cs_cnt--;
4920 hl_cb_put(job->patched_cb);
4921
4922free_job:
4923 hl_userptr_delete_list(hdev, &job->userptr_list);
Oded Gabbayc2164772019-02-16 00:39:24 +02004924 hl_debugfs_remove_job(hdev, job);
Omer Shpigelman0feaf862019-02-16 00:39:22 +02004925 kfree(job);
4926 cb->cs_cnt--;
4927
4928release_cb:
4929 hl_cb_put(cb);
4930 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4931
4932 return rc;
4933}
4934
4935static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
4936{
4937 struct goya_device *goya = hdev->asic_specific;
4938 int i;
4939
4940 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4941 return;
4942
4943 if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
4944 WARN(1, "asid %u is too big\n", asid);
4945 return;
4946 }
4947
4948 /* zero the MMBP and ASID bits and then set the ASID */
4949 for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) {
4950 WREG32_AND(goya_mmu_regs[i], ~0x7FF);
4951 WREG32_OR(goya_mmu_regs[i], asid);
4952 }
4953}
4954
4955static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
4956{
4957 struct goya_device *goya = hdev->asic_specific;
4958 u32 status, timeout_usec;
4959 int rc;
4960
4961 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
4962 return;
4963
4964 /* no need in L1 only invalidation in Goya */
4965 if (!is_hard)
4966 return;
4967
4968 if (hdev->pldm)
4969 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
4970 else
4971 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
4972
4973 mutex_lock(&hdev->mmu_cache_lock);
4974
4975 /* L0 & L1 invalidation */
4976 WREG32(mmSTLB_INV_ALL_START, 1);
4977
4978 rc = hl_poll_timeout(
4979 hdev,
4980 mmSTLB_INV_ALL_START,
4981 status,
4982 !status,
4983 1000,
4984 timeout_usec);
4985
4986 mutex_unlock(&hdev->mmu_cache_lock);
4987
4988 if (rc)
4989 dev_notice_ratelimited(hdev->dev,
4990 "Timeout when waiting for MMU cache invalidation\n");
4991}
4992
4993static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
4994 bool is_hard, u32 asid, u64 va, u64 size)
4995{
4996 struct goya_device *goya = hdev->asic_specific;
4997 u32 status, timeout_usec, inv_data, pi;
4998 int rc;
4999
5000 if (!(goya->hw_cap_initialized & HW_CAP_MMU))
5001 return;
5002
5003 /* no need in L1 only invalidation in Goya */
5004 if (!is_hard)
5005 return;
5006
5007 if (hdev->pldm)
5008 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5009 else
5010 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5011
5012 mutex_lock(&hdev->mmu_cache_lock);
5013
5014 /*
5015 * TODO: currently invalidate entire L0 & L1 as in regular hard
5016 * invalidation. Need to apply invalidation of specific cache lines with
5017 * mask of ASID & VA & size.
5018 * Note that L1 with be flushed entirely in any case.
5019 */
5020
5021 /* L0 & L1 invalidation */
5022 inv_data = RREG32(mmSTLB_CACHE_INV);
5023 /* PI is 8 bit */
5024 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
5025 WREG32(mmSTLB_CACHE_INV,
5026 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
5027
5028 rc = hl_poll_timeout(
5029 hdev,
5030 mmSTLB_INV_CONSUMER_INDEX,
5031 status,
5032 status == pi,
5033 1000,
5034 timeout_usec);
5035
5036 mutex_unlock(&hdev->mmu_cache_lock);
5037
5038 if (rc)
5039 dev_notice_ratelimited(hdev->dev,
5040 "Timeout when waiting for MMU cache invalidation\n");
5041}
5042
5043static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
5044 u64 phys_addr)
5045{
5046 u32 status, timeout_usec;
5047 int rc;
5048
5049 if (hdev->pldm)
5050 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
5051 else
5052 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5053
5054 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5055 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
5056 WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
5057
5058 rc = hl_poll_timeout(
5059 hdev,
5060 MMU_ASID_BUSY,
5061 status,
5062 !(status & 0x80000000),
5063 1000,
5064 timeout_usec);
5065
5066 if (rc) {
5067 dev_err(hdev->dev,
5068 "Timeout during MMU hop0 config of asid %d\n", asid);
5069 return rc;
5070 }
5071
5072 return 0;
5073}
5074
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02005075int goya_send_heartbeat(struct hl_device *hdev)
5076{
5077 struct goya_device *goya = hdev->asic_specific;
5078 struct armcp_packet hb_pkt;
5079 long result;
5080 int rc;
5081
5082 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5083 return 0;
5084
5085 memset(&hb_pkt, 0, sizeof(hb_pkt));
5086
5087 hb_pkt.ctl = ARMCP_PACKET_TEST << ARMCP_PKT_CTL_OPCODE_SHIFT;
5088 hb_pkt.value = ARMCP_PACKET_FENCE_VAL;
5089
5090 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
5091 sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
5092
5093 if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
5094 rc = -EIO;
5095
5096 return rc;
5097}
5098
Oded Gabbayd91389b2019-02-16 00:39:19 +02005099static int goya_armcp_info_get(struct hl_device *hdev)
5100{
5101 struct goya_device *goya = hdev->asic_specific;
5102 struct asic_fixed_properties *prop = &hdev->asic_prop;
5103 struct armcp_packet pkt;
5104 void *armcp_info_cpu_addr;
5105 dma_addr_t armcp_info_dma_addr;
5106 u64 dram_size;
5107 long result;
5108 int rc;
5109
5110 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5111 return 0;
5112
5113 armcp_info_cpu_addr =
5114 hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
5115 sizeof(struct armcp_info), &armcp_info_dma_addr);
5116 if (!armcp_info_cpu_addr) {
5117 dev_err(hdev->dev,
5118 "Failed to allocate DMA memory for ArmCP info packet\n");
5119 return -ENOMEM;
5120 }
5121
5122 memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
5123
5124 memset(&pkt, 0, sizeof(pkt));
5125
5126 pkt.ctl = ARMCP_PACKET_INFO_GET << ARMCP_PKT_CTL_OPCODE_SHIFT;
5127 pkt.addr = armcp_info_dma_addr + prop->host_phys_base_address;
5128 pkt.data_max_size = sizeof(struct armcp_info);
5129
5130 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
5131 GOYA_ARMCP_INFO_TIMEOUT, &result);
5132
5133 if (rc) {
5134 dev_err(hdev->dev,
5135 "Failed to send armcp info pkt, error %d\n", rc);
5136 goto out;
5137 }
5138
5139 memcpy(&prop->armcp_info, armcp_info_cpu_addr,
5140 sizeof(prop->armcp_info));
5141
5142 dram_size = prop->armcp_info.dram_size;
5143 if (dram_size) {
5144 if ((!is_power_of_2(dram_size)) ||
5145 (dram_size < DRAM_PHYS_DEFAULT_SIZE)) {
5146 dev_err(hdev->dev,
5147 "F/W reported invalid DRAM size %llu. Trying to use default size\n",
5148 dram_size);
5149 dram_size = DRAM_PHYS_DEFAULT_SIZE;
5150 }
5151
5152 prop->dram_size = dram_size;
5153 prop->dram_end_address = prop->dram_base_address + dram_size;
5154 }
5155
5156 rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
5157 if (rc) {
5158 dev_err(hdev->dev,
5159 "Failed to build hwmon channel info, error %d\n", rc);
5160 rc = -EFAULT;
5161 goto out;
5162 }
5163
5164out:
5165 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
5166 sizeof(struct armcp_info), armcp_info_cpu_addr);
5167
5168 return rc;
5169}
5170
5171static void goya_init_clock_gating(struct hl_device *hdev)
5172{
5173
5174}
5175
5176static void goya_disable_clock_gating(struct hl_device *hdev)
5177{
5178
5179}
Oded Gabbay9494a8d2019-02-16 00:39:17 +02005180
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02005181static bool goya_is_device_idle(struct hl_device *hdev)
5182{
5183 u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
5184 int i;
5185
5186 offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
5187
5188 for (i = 0 ; i < DMA_MAX_NUM ; i++) {
5189 dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
5190
5191 if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
5192 DMA_QM_IDLE_MASK)
5193 return false;
5194 }
5195
5196 offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
5197
5198 for (i = 0 ; i < TPC_MAX_NUM ; i++) {
5199 tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
5200 tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
5201 tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
5202
5203 if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
5204 TPC_QM_IDLE_MASK)
5205 return false;
5206
5207 if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
5208 TPC_CMDQ_IDLE_MASK)
5209 return false;
5210
5211 if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
5212 TPC_CFG_IDLE_MASK)
5213 return false;
5214 }
5215
5216 if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
5217 MME_QM_IDLE_MASK)
5218 return false;
5219
5220 if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
5221 MME_CMDQ_IDLE_MASK)
5222 return false;
5223
5224 if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
5225 MME_ARCH_IDLE_MASK)
5226 return false;
5227
5228 if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
5229 return false;
5230
5231 return true;
5232}
5233
Oded Gabbay9494a8d2019-02-16 00:39:17 +02005234static void goya_hw_queues_lock(struct hl_device *hdev)
5235{
5236 struct goya_device *goya = hdev->asic_specific;
5237
5238 spin_lock(&goya->hw_queues_lock);
5239}
5240
5241static void goya_hw_queues_unlock(struct hl_device *hdev)
5242{
5243 struct goya_device *goya = hdev->asic_specific;
5244
5245 spin_unlock(&goya->hw_queues_lock);
5246}
5247
Oded Gabbayd8dd7b02019-02-16 00:39:23 +02005248static u32 goya_get_pci_id(struct hl_device *hdev)
5249{
5250 return hdev->pdev->device;
5251}
5252
Oded Gabbayd91389b2019-02-16 00:39:19 +02005253int goya_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
5254{
5255 struct goya_device *goya = hdev->asic_specific;
5256 struct asic_fixed_properties *prop = &hdev->asic_prop;
5257 struct armcp_packet pkt;
5258 void *eeprom_info_cpu_addr;
5259 dma_addr_t eeprom_info_dma_addr;
5260 long result;
5261 int rc;
5262
5263 if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
5264 return 0;
5265
5266 eeprom_info_cpu_addr =
5267 hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
5268 max_size, &eeprom_info_dma_addr);
5269 if (!eeprom_info_cpu_addr) {
5270 dev_err(hdev->dev,
5271 "Failed to allocate DMA memory for EEPROM info packet\n");
5272 return -ENOMEM;
5273 }
5274
5275 memset(eeprom_info_cpu_addr, 0, max_size);
5276
5277 memset(&pkt, 0, sizeof(pkt));
5278
5279 pkt.ctl = ARMCP_PACKET_EEPROM_DATA_GET << ARMCP_PKT_CTL_OPCODE_SHIFT;
5280 pkt.addr = eeprom_info_dma_addr + prop->host_phys_base_address;
5281 pkt.data_max_size = max_size;
5282
5283 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
5284 GOYA_ARMCP_EEPROM_TIMEOUT, &result);
5285
5286 if (rc) {
5287 dev_err(hdev->dev,
5288 "Failed to send armcp EEPROM pkt, error %d\n", rc);
5289 goto out;
5290 }
5291
5292 /* result contains the actual size */
5293 memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
5294
5295out:
5296 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
5297 eeprom_info_cpu_addr);
5298
5299 return rc;
5300}
5301
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02005302static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
5303{
5304 return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS);
5305}
5306
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02005307static const struct hl_asic_funcs goya_funcs = {
5308 .early_init = goya_early_init,
5309 .early_fini = goya_early_fini,
Oded Gabbayd91389b2019-02-16 00:39:19 +02005310 .late_init = goya_late_init,
5311 .late_fini = goya_late_fini,
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02005312 .sw_init = goya_sw_init,
5313 .sw_fini = goya_sw_fini,
Oded Gabbay839c4802019-02-16 00:39:16 +02005314 .hw_init = goya_hw_init,
5315 .hw_fini = goya_hw_fini,
Oded Gabbay1251f232019-02-16 00:39:18 +02005316 .halt_engines = goya_halt_engines,
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02005317 .suspend = goya_suspend,
5318 .resume = goya_resume,
Oded Gabbaybe5d9262019-02-16 00:39:15 +02005319 .mmap = goya_mmap,
5320 .cb_mmap = goya_cb_mmap,
Oded Gabbay9494a8d2019-02-16 00:39:17 +02005321 .ring_doorbell = goya_ring_doorbell,
5322 .flush_pq_write = goya_flush_pq_write,
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02005323 .dma_alloc_coherent = goya_dma_alloc_coherent,
5324 .dma_free_coherent = goya_dma_free_coherent,
Oded Gabbay9494a8d2019-02-16 00:39:17 +02005325 .get_int_queue_base = goya_get_int_queue_base,
5326 .test_queues = goya_test_queues,
5327 .dma_pool_zalloc = goya_dma_pool_zalloc,
5328 .dma_pool_free = goya_dma_pool_free,
5329 .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
5330 .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02005331 .hl_dma_unmap_sg = goya_dma_unmap_sg,
5332 .cs_parser = goya_cs_parser,
5333 .asic_dma_map_sg = goya_dma_map_sg,
5334 .get_dma_desc_list_size = goya_get_dma_desc_list_size,
5335 .add_end_of_cb_packets = goya_add_end_of_cb_packets,
Oded Gabbay1251f232019-02-16 00:39:18 +02005336 .update_eq_ci = goya_update_eq_ci,
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02005337 .context_switch = goya_context_switch,
5338 .restore_phase_topology = goya_restore_phase_topology,
Oded Gabbayc2164772019-02-16 00:39:24 +02005339 .debugfs_read32 = goya_debugfs_read32,
5340 .debugfs_write32 = goya_debugfs_write32,
Oded Gabbayd91389b2019-02-16 00:39:19 +02005341 .add_device_attr = goya_add_device_attr,
Oded Gabbay1251f232019-02-16 00:39:18 +02005342 .handle_eqe = goya_handle_eqe,
Oded Gabbayd91389b2019-02-16 00:39:19 +02005343 .set_pll_profile = goya_set_pll_profile,
Oded Gabbay1251f232019-02-16 00:39:18 +02005344 .get_events_stat = goya_get_events_stat,
Omer Shpigelman0feaf862019-02-16 00:39:22 +02005345 .read_pte = goya_read_pte,
5346 .write_pte = goya_write_pte,
5347 .mmu_invalidate_cache = goya_mmu_invalidate_cache,
5348 .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02005349 .send_heartbeat = goya_send_heartbeat,
Oded Gabbayd91389b2019-02-16 00:39:19 +02005350 .enable_clock_gating = goya_init_clock_gating,
5351 .disable_clock_gating = goya_disable_clock_gating,
Oded Gabbayeff6f4a2019-02-16 00:39:21 +02005352 .is_device_idle = goya_is_device_idle,
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02005353 .soft_reset_late_init = goya_soft_reset_late_init,
Oded Gabbay9494a8d2019-02-16 00:39:17 +02005354 .hw_queues_lock = goya_hw_queues_lock,
5355 .hw_queues_unlock = goya_hw_queues_unlock,
Oded Gabbayd8dd7b02019-02-16 00:39:23 +02005356 .get_pci_id = goya_get_pci_id,
Oded Gabbayd91389b2019-02-16 00:39:19 +02005357 .get_eeprom_data = goya_get_eeprom_data,
Oded Gabbayf8c8c7d2019-02-16 00:39:20 +02005358 .send_cpu_message = goya_send_cpu_message,
5359 .get_hw_state = goya_get_hw_state
Oded Gabbay99b9d7b2019-02-16 00:39:13 +02005360};
5361
5362/*
5363 * goya_set_asic_funcs - set Goya function pointers
5364 *
5365 * @*hdev: pointer to hl_device structure
5366 *
5367 */
5368void goya_set_asic_funcs(struct hl_device *hdev)
5369{
5370 hdev->asic_funcs = &goya_funcs;
5371}