summaryrefslogtreecommitdiff
path: root/mali_kbase/platform/pixel/pixel_gpu_itmon.c
blob: 7dbce370c702c4fa5063df2c27c0a25b919172fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright 2023 Google LLC.
 *
 * This platform component registers an ITMON notifier callback which filters
 * fabric fault reports where the GPU is identified as the initiator of the
 * transaction.
 *
 * When such a fault occurs, it searches for the faulting physical address in
 * the GPU page tables of all GPU contexts.  If the physical address appears in
 * a page table, the context and corresponding virtual address are logged.
 *
 * Otherwise, a message is logged indicating that the physical address does not
 * appear in any GPU page table.
 */

#if IS_ENABLED(CONFIG_EXYNOS_ITMON)

/* Linux includes */
#include <linux/of.h>

/* SOC includes */
#include <soc/google/exynos-itmon.h>

/* Mali core includes */
#include <mali_kbase.h>

/* Pixel integration includes */
#include "mali_kbase_config_platform.h"
#include "pixel_gpu_control.h"


/* GPU page tables may use more physical address bits than the bus, to encode
 * other information.  We'll need to mask those away to match with bus
 * addresses.
 */
#define PHYSICAL_ADDRESS_BITS 36
#define PHYSICAL_ADDRESS_MASK ((1ULL << (PHYSICAL_ADDRESS_BITS)) - 1)

/* Convert KBASE_MMU_PAGE_ENTRIES to number of bits */
#define KBASE_MMU_PAGE_ENTRIES_LOG2 const_ilog2(KBASE_MMU_PAGE_ENTRIES)


/**
 * pixel_gpu_itmon_search_pgd() - Search a page directory page.
 *
 * @mmu_mode:  The &struct kbase_mmu_mode PTE accessor functions.
 * @level:     The level of the page directory.
 * @pa_pgd:    The physical address of the page directory page.
 * @pa_search: The physical address to search for.
 * @va_prefix: The virtual address prefix above this level.
 *
 * Return: The virtual address mapped to the physical address, or zero.
 */
static u64 pixel_gpu_itmon_search_pgd(struct kbase_mmu_mode const *mmu_mode,
	int level, phys_addr_t pa_pgd, phys_addr_t pa_search, u64 va_prefix)
{
	u64 va_found = 0;
	int i;

	/* Map the page */
	const u64 *entry = kmap(pfn_to_page(PFN_DOWN(pa_pgd)));
	if (!entry)
		return 0;

	/* Shift the VA prefix left to make room for this new level */
	va_prefix <<= KBASE_MMU_PAGE_ENTRIES_LOG2;

	/* For each entry in the page directory */
	for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {

		/* Is this a PTE, an ATE, or invalid? */
		if (mmu_mode->pte_is_valid(entry[i], level)) {

			/* PTE: Get the physical address of the next level PGD */
			phys_addr_t pa_next = mmu_mode->pte_to_phy_addr(entry[i])
				& PHYSICAL_ADDRESS_MASK;

			/* Recurse into it */
			if (pa_next) {
				va_found = pixel_gpu_itmon_search_pgd(mmu_mode, level + 1,
					pa_next, pa_search, va_prefix);
				if (va_found)
					break;
			}

		} else if (mmu_mode->ate_is_valid(entry[i], level)) {

			/* ATE: Get the page (or block) physical address */
			phys_addr_t pa_start = mmu_mode->pte_to_phy_addr(entry[i])
				& PHYSICAL_ADDRESS_MASK;

			if (pa_start) {
				/* Get the size of the block:
				 * this may be larger than a page, depending on level.
				 * A competent compiler will hoist this out of the loop.
				 */
				int remaining_levels = MIDGARD_MMU_BOTTOMLEVEL - level;
				size_t block_size = PAGE_SIZE <<
					(KBASE_MMU_PAGE_ENTRIES_LOG2 * remaining_levels);

				/* Test if the block contains the PA we are searching for */
				if ((pa_search >= pa_start) &&
				    (pa_search < (pa_start + block_size))) {

					/* Combine translated and non-translated address bits */
					va_found = (va_prefix * block_size) +
					           (pa_search % block_size);
					break;
				}
			}
		}

		/* Advance the virtual address prefix with each entry */
		va_prefix++;
	}

	kunmap(pfn_to_page(PFN_DOWN(pa_pgd)));

	return va_found;
}

/**
 * pixel_gpu_itmon_search_page_table() - Search a page table for a PA.
 *
 * @kbdev: The &struct kbase_device.
 * @table: The &struct kbase_mmu_table to search.
 * @pa:    The physical address to search for.
 *
 * Return: The virtual address mapped to the physical address, or zero.
 */
static u64 pixel_gpu_itmon_search_page_table(struct kbase_device *kbdev,
	struct kbase_mmu_table* table, phys_addr_t pa)
{
	u64 va;

	rt_mutex_lock(&table->mmu_lock);
	va = pixel_gpu_itmon_search_pgd(kbdev->mmu_mode, MIDGARD_MMU_TOPLEVEL,
		table->pgd, pa, 0);
	rt_mutex_unlock(&table->mmu_lock);

	return va;
}

/**
 * pixel_gpu_itmon_search_context() - Search the page tables of a context.
 *
 * @pc:   The &struct pixel_context.
 * @kctx: The &struct kbase_context to search.
 *
 * Return: True if the faulting physical address was found.
 */
static bool pixel_gpu_itmon_search_context(struct pixel_context *pc,
	struct kbase_context *kctx)
{
	u64 va = pixel_gpu_itmon_search_page_table(pc->kbdev, &kctx->mmu,
		pc->itmon.pa);

	/* If a mapping was found */
	if (va) {
		/* Get the task from the context */
		struct pid *pid_struct;
		struct task_struct *task;

		rcu_read_lock();
		pid_struct = find_get_pid(kctx->pid);
		task = pid_task(pid_struct, PIDTYPE_PID);

		/* And report it */
		dev_err(pc->kbdev->dev,
			"ITMON: Faulting physical address 0x%llX appears in page table of "
			"task %s (pid %u), mapped from virtual address 0x%llx (as %d)\n",
			pc->itmon.pa, task ? task->comm : "[null task]", kctx->pid, va,
			kctx->as_nr);

		put_pid(pid_struct);
		rcu_read_unlock();

		return true;
	}

	return false;
}

#if MALI_USE_CSF
/**
 * pixel_gpu_itmon_search_csffw() - Search the CSF MCU page table.
 *
 * @pc: The &struct pixel_context.
 *
 * Return: True if the faulting physical address was found.
 */
static bool pixel_gpu_itmon_search_csffw(struct pixel_context *pc)
{
	struct kbase_device *kbdev = pc->kbdev;

	u64 va = pixel_gpu_itmon_search_page_table(kbdev, &kbdev->csf.mcu_mmu,
		pc->itmon.pa);

	/* If a mapping was found */
	if (va) {
		dev_err(kbdev->dev,
			"ITMON: Faulting physical address 0x%llX appears in CSF MCU page "
			"table, mapped from virtual address 0x%llx (as 0)\n",
			pc->itmon.pa, va);
		return true;
	}

	return false;
}
#endif /* MALI_USE_CSF */

/**
 * pixel_gpu_itmon_worker() - ITMON fault worker.
 *
 * Required to be able to lock mutexes while searching page tables.
 *
 * @data: The &struct work_struct.
 */
static void pixel_gpu_itmon_worker(struct work_struct *data)
{
	/* Recover the pixel_context */
	struct pixel_context *pc = container_of(data, struct pixel_context,
		itmon.work);

	struct kbase_device *kbdev = pc->kbdev;
	struct kbase_context *kctx;
	bool found = false;

	/* Log that the work has started */
	dev_err(kbdev->dev,
		"ITMON: Searching for physical address 0x%llX across all GPU page "
		"tables...\n", pc->itmon.pa);

	/* Search the CSF MCU page table first */
#if MALI_USE_CSF
	found |= pixel_gpu_itmon_search_csffw(pc);
#endif

	mutex_lock(&kbdev->kctx_list_lock);

	/* Enumerate all contexts and search their page tables */
	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
		found |= pixel_gpu_itmon_search_context(pc, kctx);
	}

	mutex_unlock(&kbdev->kctx_list_lock);

	/* For completeness, log that we did not find the fault address anywhere */
	if (!found) {
		dev_err(kbdev->dev,
			"ITMON: Faulting physical address 0x%llX NOT PRESENT in any GPU "
			"page table - GPU would not have initiated this access\n",
			pc->itmon.pa);
	}

	/* Let the ITMON ISR know that we're done and it can continue */
	atomic_dec(&pc->itmon.active);
}

/**
 * pixel_gpu_itmon_notifier() - Handle an ITMON fault report.
 *
 * @nb:      The &struct notifier_block inside &struct pixel_context.
 * @action:  Unused.
 * @nb_data: The ITMON report.
 *
 * Return: NOTIFY_OK to continue calling other notifier blocks.
 */
static int pixel_gpu_itmon_notifier(struct notifier_block *nb,
	unsigned long action, void *nb_data)
{
	/* Recover the pixel_context */
	struct pixel_context *pc = container_of(nb, struct pixel_context, itmon.nb);

	/* Get details of the ITMON report */
	struct itmon_notifier *itmon_info = nb_data;

	/* Filter out non-GPU ports */
	if ((!itmon_info->port) ||
	    (strncmp(itmon_info->port, "GPU", 3) &&
	     strncmp(itmon_info->port, "G3D", 3)))
		return NOTIFY_OK;

	/* Immediately acknowledge that this fault matched our filter */
	dev_err(pc->kbdev->dev,
		"Detected relevant ITMON fault report from %s to 0x%llX, "
		"enqueueing work...\n", itmon_info->port, (u64)itmon_info->target_addr);

	/* Make sure we have finished processing previous work */
	if (atomic_fetch_inc(&pc->itmon.active) != 0) {
		atomic_dec(&pc->itmon.active);
		dev_err(pc->kbdev->dev, "Previous work not yet finished, skipping\n");
		return NOTIFY_OK;
	}

	/* Save the PA to search for */
	pc->itmon.pa = itmon_info->target_addr;

	/* Access to GPU page tables is protected by a mutex, which we cannot lock
	 * here in an atomic context.  Queue work to another CPU to do the search.
	 */
	queue_work(pc->itmon.wq, &pc->itmon.work);

	/* (Try to) busy-wait for that work to complete, before we ramdump */
	{
		u64 start = ktime_get_ns();

		while (atomic_read(&pc->itmon.active) > 0) {

			if ((ktime_get_ns() - start) < (NSEC_PER_SEC / 2)) {
				udelay(10000);
			} else {
				dev_err(pc->kbdev->dev,
					"Timed out waiting for ITMON work, this is not an error\n");
				break;
			}
		}
	}

	return NOTIFY_OK;
}

/**
 * gpu_itmon_init() - Initialize ITMON notifier callback.
 *
 * @kbdev: The &struct kbase_device.
 *
 * Return: An error code, or 0 on success.
 */
int gpu_itmon_init(struct kbase_device *kbdev)
{
	struct pixel_context *pc = kbdev->platform_context;

	/* The additional diagnostic information offered by this callback is only
	 * useful if it can be collected as part of a ramdump.  Ramdumps are
	 * disabled in "user" builds, so query the build variant and skip
	 * initialization if that is the case.
	 */
	struct device_node *dpm = of_find_node_by_name(NULL, "dpm");
	const char *variant = NULL;
	if ((!dpm) || of_property_read_string(dpm, "variant", &variant) ||
	    (!strcmp(variant, "user")))
		return 0;

	/* Create a workqueue that can run on any CPU with high priority, so that
	 * it can run while we (try to) wait for it in the ITMON interrupt.
	 */
	pc->itmon.wq = alloc_workqueue("mali_itmon_wq", WQ_UNBOUND | WQ_HIGHPRI, 1);
	if (!pc->itmon.wq)
		return -ENOMEM;
	INIT_WORK(&pc->itmon.work, pixel_gpu_itmon_worker);

	/* Then register our ITMON notifier callback */
	pc->itmon.nb.notifier_call = pixel_gpu_itmon_notifier;
	itmon_notifier_chain_register(&pc->itmon.nb);

	return 0;
}

/**
 * gpu_itmon_term() - Terminate ITMON notifier callback.
 *
 * @kbdev: The &struct kbase_device.
 */
void gpu_itmon_term(struct kbase_device *kbdev)
{
	struct pixel_context *pc = kbdev->platform_context;

	if (pc->itmon.wq) {
		/* Unregister our ITMON notifier callback first */
		itmon_notifier_chain_unregister(&pc->itmon.nb);

		/* Then it's safe to destroy the workqueue */
		destroy_workqueue(pc->itmon.wq);
		pc->itmon.wq = NULL;
	}
}

/* Depend on ITMON driver */
MODULE_SOFTDEP("pre: itmon");

#endif /* IS_ENABLED(CONFIG_EXYNOS_ITMON) */