mali_kbase/csf/mali_kbase_csf_reset_gpu.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694

// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
 *
 * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU license.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, you can access it online at
 * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 */

#include <mali_kbase.h>
#include <mali_kbase_ctx_sched.h>
#include <mali_kbase_hwcnt_context.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_regs_history_debugfs.h>
#include <csf/mali_kbase_csf_trace_buffer.h>
#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
#include <mali_kbase_reset_gpu.h>
#include <linux/string.h>

enum kbasep_soft_reset_status {
	RESET_SUCCESS = 0,
	SOFT_RESET_FAILED,
	L2_ON_FAILED,
	MCU_REINIT_FAILED
};

static inline bool
kbase_csf_reset_state_is_silent(enum kbase_csf_reset_gpu_state state)
{
	return (state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT);
}

static inline bool
kbase_csf_reset_state_is_committed(enum kbase_csf_reset_gpu_state state)
{
	return (state == KBASE_CSF_RESET_GPU_COMMITTED ||
		state == KBASE_CSF_RESET_GPU_COMMITTED_SILENT);
}

static inline bool
kbase_csf_reset_state_is_active(enum kbase_csf_reset_gpu_state state)
{
	return (state == KBASE_CSF_RESET_GPU_HAPPENING);
}

/**
 * DOC: Mechanism for coherent access to the HW with respect to GPU reset
 *
 * Access to the HW from non-atomic context outside of the reset thread must
 * use kbase_reset_gpu_prevent_and_wait() / kbase_reset_gpu_try_prevent().
 *
 * This currently works by taking the &kbase_device's csf.reset.sem, for
 * 'write' access by the GPU reset thread and 'read' access by every other
 * thread. The use of this rw_semaphore means:
 *
 * - there will be mutual exclusion (and thus waiting) between the thread doing
 *   reset ('writer') and threads trying to access the GPU for 'normal'
 *   operations ('readers')
 *
 * - multiple threads may prevent reset from happening without serializing each
 *   other prematurely. Note that at present the wait for reset to finish has
 *   to be done higher up in the driver than actual GPU access, at a point
 *   where it won't cause lock ordering issues. At such a point, some paths may
 *   actually lead to no GPU access, but we would prefer to avoid serializing
 *   at that level
 *
 * - lockdep (if enabled in the kernel) will check such uses for deadlock
 *
 * If instead &kbase_device's csf.reset.wait &wait_queue_head_t were used on
 * its own, we'd also need to add a &lockdep_map and appropriate lockdep calls
 * to make use of lockdep checking in all places where the &wait_queue_head_t
 * is waited upon or signaled.
 *
 * Indeed places where we wait on &kbase_device's csf.reset.wait (such as
 * kbase_reset_gpu_wait()) are the only places where we need extra call(s) to
 * lockdep, and they are made on the existing rw_semaphore.
 *
 * For non-atomic access, the &kbase_device's csf.reset.state member should be
 * checked instead, such as by using kbase_reset_gpu_is_active().
 *
 * Ideally the &rw_semaphore should be replaced in future with a single mutex
 * that protects any access to the GPU, via reset or otherwise.
 */

int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev)
{
	down_read(&kbdev->csf.reset.sem);

	if (atomic_read(&kbdev->csf.reset.state) ==
	    KBASE_CSF_RESET_GPU_FAILED) {
		up_read(&kbdev->csf.reset.sem);
		return -ENOMEM;
	}

	if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) {
		up_read(&kbdev->csf.reset.sem);
		return -EFAULT;
	}

	return 0;
}
KBASE_EXPORT_TEST_API(kbase_reset_gpu_prevent_and_wait);

int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev)
{
	if (!down_read_trylock(&kbdev->csf.reset.sem))
		return -EAGAIN;

	if (atomic_read(&kbdev->csf.reset.state) ==
	    KBASE_CSF_RESET_GPU_FAILED) {
		up_read(&kbdev->csf.reset.sem);
		return -ENOMEM;
	}

	if (WARN_ON(kbase_reset_gpu_is_active(kbdev))) {
		up_read(&kbdev->csf.reset.sem);
		return -EFAULT;
	}

	return 0;
}

void kbase_reset_gpu_allow(struct kbase_device *kbdev)
{
	up_read(&kbdev->csf.reset.sem);
}
KBASE_EXPORT_TEST_API(kbase_reset_gpu_allow);

void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev)
{
#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
	lockdep_assert_held_read(&kbdev->csf.reset.sem);
#else
	lockdep_assert_held(&kbdev->csf.reset.sem);
#endif
	WARN_ON(kbase_reset_gpu_is_active(kbdev));
}

void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev)
{
	if (atomic_read(&kbdev->csf.reset.state) == KBASE_CSF_RESET_GPU_FAILED)
		return;

#if KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
	lockdep_assert_held_read(&kbdev->csf.reset.sem);
#else
	lockdep_assert_held(&kbdev->csf.reset.sem);
#endif
	WARN_ON(kbase_reset_gpu_is_active(kbdev));
}

/* Mark the reset as now happening, and synchronize with other threads that
 * might be trying to access the GPU
 */
static void kbase_csf_reset_begin_hw_access_sync(
	struct kbase_device *kbdev,
	enum kbase_csf_reset_gpu_state initial_reset_state)
{
	unsigned long hwaccess_lock_flags;
	unsigned long scheduler_spin_lock_flags;

	/* Note this is a WARN/atomic_set because it is a software issue for a
	 * race to be occurring here
	 */
	WARN_ON(!kbase_csf_reset_state_is_committed(initial_reset_state));

	down_write(&kbdev->csf.reset.sem);

	/* Threads in atomic context accessing the HW will hold one of these
	 * locks, so synchronize with them too.
	 */
	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags);
	kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);
	atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING);
	kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags);
	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags);
}

/* Mark the reset as finished and allow others threads to once more access the
 * GPU
 */
static void kbase_csf_reset_end_hw_access(struct kbase_device *kbdev,
					  int err_during_reset,
					  bool firmware_inited)
{
	unsigned long hwaccess_lock_flags;
	unsigned long scheduler_spin_lock_flags;

	WARN_ON(!kbase_csf_reset_state_is_active(
		atomic_read(&kbdev->csf.reset.state)));

	/* Once again, we synchronize with atomic context threads accessing the
	 * HW, as otherwise any actions they defer could get lost
	 */
	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags);
	kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);

	if (!err_during_reset) {
		atomic_set(&kbdev->csf.reset.state,
			   KBASE_CSF_RESET_GPU_NOT_PENDING);
	} else {
		dev_err(kbdev->dev, "Reset failed to complete");
		atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_FAILED);
	}

	kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags);
	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags);

	/* Invoke the scheduling tick after formally finishing the reset,
	 * otherwise the tick might start too soon and notice that reset
	 * is still in progress.
	 */
	up_write(&kbdev->csf.reset.sem);
	wake_up(&kbdev->csf.reset.wait);

	if (!err_during_reset && likely(firmware_inited))
		kbase_csf_scheduler_enable_tick_timer(kbdev);
}

static void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
{
	kbase_io_history_dump(kbdev);

	dev_err(kbdev->dev, "Register state:");
	dev_err(kbdev->dev, "  GPU_IRQ_RAWSTAT=0x%08x   GPU_STATUS=0x%08x  MCU_STATUS=0x%08x",
		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)),
		kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)));
	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x   MMU_IRQ_RAWSTAT=0x%08x   GPU_FAULTSTATUS=0x%08x",
		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x   JOB_IRQ_MASK=0x%08x   MMU_IRQ_MASK=0x%08x",
		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
	dev_err(kbdev->dev, "  SHADER_CONFIG=0x%08x   L2_MMU_CONFIG=0x%08x   TILER_CONFIG=0x%08x",
		kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_CONFIG)),
		kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_MMU_CONFIG)),
		kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_CONFIG)));
}

static void kbase_csf_dump_firmware_trace_buffer(struct kbase_device *kbdev)
{
	u8 *buf, *p, *pnewline, *pend, *pendbuf;
	unsigned int read_size, remaining_size;
	struct firmware_trace_buffer *tb =
		kbase_csf_firmware_get_trace_buffer(kbdev, FW_TRACE_BUF_NAME);

	if (tb == NULL) {
		dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
		return;
	}

	buf = kmalloc(PAGE_SIZE + 1, GFP_KERNEL);
	if (buf == NULL) {
		dev_err(kbdev->dev, "Short of memory, firmware trace dump skipped");
		return;
	}

	buf[PAGE_SIZE] = 0;

	p = buf;
	pendbuf = &buf[PAGE_SIZE];

	dev_err(kbdev->dev, "Firmware trace buffer dump:");
	while ((read_size = kbase_csf_firmware_trace_buffer_read_data(tb, p,
								pendbuf - p))) {
		pend = p + read_size;
		p = buf;

		while (p < pend && (pnewline = memchr(p, '\n', pend - p))) {
			/* Null-terminate the string */
			*pnewline = 0;

			dev_err(kbdev->dev, "FW> %s", p);

			p = pnewline + 1;
		}

		remaining_size = pend - p;

		if (!remaining_size) {
			p = buf;
		} else if (remaining_size < PAGE_SIZE) {
			/* Copy unfinished string to the start of the buffer */
			memmove(buf, p, remaining_size);
			p = &buf[remaining_size];
		} else {
			/* Print abnormal page-long string without newlines */
			dev_err(kbdev->dev, "FW> %s", buf);
			p = buf;
		}
	}

	if (p != buf) {
		/* Null-terminate and print last unfinished string */
		*p = 0;
		dev_err(kbdev->dev, "FW> %s", buf);
	}

	kfree(buf);
}

/**
 * kbase_csf_hwcnt_on_reset_error() - Sets HWCNT to appropriate state in the
 *                                    event of an error during GPU reset.
 * @kbdev: Pointer to KBase device
 */
static void kbase_csf_hwcnt_on_reset_error(struct kbase_device *kbdev)
{
	unsigned long flags;

	/* Treat this as an unrecoverable error for HWCNT */
	kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);

	/* Re-enable counters to ensure matching enable/disable pair.
	 * This might reduce the hwcnt disable count to 0, and therefore
	 * trigger actual re-enabling of hwcnt.
	 * However, as the backend is now in the unrecoverable error state,
	 * re-enabling will immediately fail and put the context into the error
	 * state, preventing the hardware from being touched (which could have
	 * risked a hang).
	 */
	kbase_csf_scheduler_spin_lock(kbdev, &flags);
	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
	kbase_csf_scheduler_spin_unlock(kbdev, flags);
}

static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_device *kbdev,
							      bool firmware_inited, bool silent)
{
	unsigned long flags;
	int err;
	enum kbasep_soft_reset_status ret = RESET_SUCCESS;

	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
	spin_lock(&kbdev->mmu_mask_change);
	kbase_pm_reset_start_locked(kbdev);

	dev_dbg(kbdev->dev,
		"We're about to flush out the IRQs and their bottom halves\n");
	kbdev->irq_reset_flush = true;

	/* Disable IRQ to avoid IRQ handlers to kick in after releasing the
	 * spinlock; this also clears any outstanding interrupts
	 */
	kbase_pm_disable_interrupts_nolock(kbdev);

	spin_unlock(&kbdev->mmu_mask_change);
	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

	dev_dbg(kbdev->dev, "Ensure that any IRQ handlers have finished\n");
	/* Must be done without any locks IRQ handlers will take. */
	kbase_synchronize_irqs(kbdev);

	dev_dbg(kbdev->dev, "Flush out any in-flight work items\n");
	kbase_flush_mmu_wqs(kbdev);

	dev_dbg(kbdev->dev,
		"The flush has completed so reset the active indicator\n");
	kbdev->irq_reset_flush = false;

	mutex_lock(&kbdev->pm.lock);
	if (!silent)
		dev_err(kbdev->dev, "Resetting GPU (allowing up to %d ms)",
								RESET_TIMEOUT);

	/* Output the state of some interesting registers to help in the
	 * debugging of GPU resets, and dump the firmware trace buffer
	 */
	if (!silent) {
		kbase_csf_debug_dump_registers(kbdev);
		if (likely(firmware_inited))
			kbase_csf_dump_firmware_trace_buffer(kbdev);
	}

	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
	kbase_ipa_control_handle_gpu_reset_pre(kbdev);
	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

	/* Tell hardware counters a reset is about to occur.
	 * If the backend is in an unrecoverable error state (e.g. due to
	 * firmware being unresponsive) this will transition the backend out of
	 * it, on the assumption a reset will fix whatever problem there was.
	 */
	kbase_hwcnt_backend_csf_on_before_reset(&kbdev->hwcnt_gpu_iface);

	/* Reset the GPU */
	err = kbase_pm_init_hw(kbdev, 0);

	mutex_unlock(&kbdev->pm.lock);

	if (WARN_ON(err))
		return SOFT_RESET_FAILED;

	mutex_lock(&kbdev->mmu_hw_mutex);
	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
	kbase_ctx_sched_restore_all_as(kbdev);
	kbase_ipa_control_handle_gpu_reset_post(kbdev);
	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
	mutex_unlock(&kbdev->mmu_hw_mutex);

	kbase_pm_enable_interrupts(kbdev);

	mutex_lock(&kbdev->pm.lock);
	kbase_pm_reset_complete(kbdev);
	/* Synchronously wait for the reload of firmware to complete */
	err = kbase_pm_wait_for_desired_state(kbdev);
	mutex_unlock(&kbdev->pm.lock);

	if (err) {
		if (!kbase_pm_l2_is_in_desired_state(kbdev))
			ret = L2_ON_FAILED;
		else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
			ret = MCU_REINIT_FAILED;
	}

	return ret;
}

static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_inited, bool silent)
{
	unsigned long flags;
	enum kbasep_soft_reset_status ret;

	WARN_ON(kbdev->irq_reset_flush);
	/* The reset must now be happening otherwise other threads will not
	 * have been synchronized with to stop their access to the HW
	 */
#if KERNEL_VERSION(5, 3, 0) <= LINUX_VERSION_CODE
	lockdep_assert_held_write(&kbdev->csf.reset.sem);
#elif KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
	lockdep_assert_held_exclusive(&kbdev->csf.reset.sem);
#else
	lockdep_assert_held(&kbdev->csf.reset.sem);
#endif
	WARN_ON(!kbase_reset_gpu_is_active(kbdev));

	/* Reset the scheduler state before disabling the interrupts as suspend
	 * of active CSG slots would also be done as a part of reset.
	 */
	if (likely(firmware_inited))
		kbase_csf_scheduler_reset(kbdev);
	cancel_work_sync(&kbdev->csf.firmware_reload_work);

	dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
	/* This call will block until counters are disabled. */
	kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);

	ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent);
	if (ret == SOFT_RESET_FAILED) {
		dev_err(kbdev->dev, "Soft-reset failed");
		goto err;
	} else if (ret == L2_ON_FAILED) {
		dev_err(kbdev->dev, "L2 power up failed after the soft-reset");
		goto err;
	} else if (ret == MCU_REINIT_FAILED) {
		dev_err(kbdev->dev, "MCU re-init failed trying full firmware reload");
		/* Since MCU reinit failed despite successful soft reset, we can try
		 * the firmware full reload.
		 */
		kbdev->csf.firmware_full_reload_needed = true;
		ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, true);
		if (ret != RESET_SUCCESS) {
			dev_err(kbdev->dev,
				"MCU Re-init failed even after trying full firmware reload, ret = [%d]",
				ret);
			goto err;
		}
	}

	/* Re-enable GPU hardware counters */
	kbase_csf_scheduler_spin_lock(kbdev, &flags);
	kbase_hwcnt_context_enable(kbdev->hwcnt_gpu_ctx);
	kbase_csf_scheduler_spin_unlock(kbdev, flags);
	if (!silent)
		dev_err(kbdev->dev, "Reset complete");
	return 0;
err:

	kbase_csf_hwcnt_on_reset_error(kbdev);
	return -1;
}

static void kbase_csf_reset_gpu_worker(struct work_struct *data)
{
	struct kbase_device *kbdev = container_of(data, struct kbase_device,
						  csf.reset.work);
	bool gpu_sleep_mode_active = false;
	bool firmware_inited;
	unsigned long flags;
	int err = 0;
	const enum kbase_csf_reset_gpu_state initial_reset_state =
		atomic_read(&kbdev->csf.reset.state);
	const bool silent =
		kbase_csf_reset_state_is_silent(initial_reset_state);

	/* Ensure any threads (e.g. executing the CSF scheduler) have finished
	 * using the HW
	 */
	kbase_csf_reset_begin_hw_access_sync(kbdev, initial_reset_state);

	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
	firmware_inited = kbdev->csf.firmware_inited;
#ifdef KBASE_PM_RUNTIME
	gpu_sleep_mode_active = kbdev->pm.backend.gpu_sleep_mode_active;
#endif
	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);

	if (unlikely(gpu_sleep_mode_active)) {
#ifdef KBASE_PM_RUNTIME
		/* As prior to GPU reset all on-slot groups are suspended,
		 * need to wake up the MCU from sleep.
		 * No pm active reference is taken here since GPU is in sleep
		 * state and both runtime & system suspend synchronize with the
		 * GPU reset before they wake up the GPU to suspend on-slot
		 * groups. GPUCORE-29850 would add the proper handling.
		 */
		kbase_pm_lock(kbdev);
		if (kbase_pm_force_mcu_wakeup_after_sleep(kbdev))
			dev_warn(kbdev->dev, "Wait for MCU wake up failed on GPU reset");
		kbase_pm_unlock(kbdev);

		err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent);
#endif
	} else if (!kbase_pm_context_active_handle_suspend(kbdev,
			KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
		err = kbase_csf_reset_gpu_now(kbdev, firmware_inited, silent);
		kbase_pm_context_idle(kbdev);
	}

	kbase_disjoint_state_down(kbdev);

	/* Allow other threads to once again use the GPU */
	kbase_csf_reset_end_hw_access(kbdev, err, firmware_inited);
}

bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
{
	if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
		kbase_hwcnt_backend_csf_on_unrecoverable_error(
			&kbdev->hwcnt_gpu_iface);

	if (atomic_cmpxchg(&kbdev->csf.reset.state,
			KBASE_CSF_RESET_GPU_NOT_PENDING,
			KBASE_CSF_RESET_GPU_PREPARED) !=
			KBASE_CSF_RESET_GPU_NOT_PENDING)
		/* Some other thread is already resetting the GPU */
		return false;

	return true;
}
KBASE_EXPORT_TEST_API(kbase_prepare_to_reset_gpu);

bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
				       unsigned int flags)
{
	lockdep_assert_held(&kbdev->hwaccess_lock);

	return kbase_prepare_to_reset_gpu(kbdev, flags);
}

void kbase_reset_gpu(struct kbase_device *kbdev)
{
	/* Note this is a WARN/atomic_set because it is a software issue for
	 * a race to be occurring here
	 */
	if (WARN_ON(atomic_read(&kbdev->csf.reset.state) !=
		    KBASE_RESET_GPU_PREPARED))
		return;

	atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_COMMITTED);
	dev_err(kbdev->dev, "Preparing to soft-reset GPU\n");

	kbase_disjoint_state_up(kbdev);

	queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work);
}
KBASE_EXPORT_TEST_API(kbase_reset_gpu);

void kbase_reset_gpu_locked(struct kbase_device *kbdev)
{
	lockdep_assert_held(&kbdev->hwaccess_lock);

	kbase_reset_gpu(kbdev);
}

int kbase_reset_gpu_silent(struct kbase_device *kbdev)
{
	if (atomic_cmpxchg(&kbdev->csf.reset.state,
				KBASE_CSF_RESET_GPU_NOT_PENDING,
				KBASE_CSF_RESET_GPU_COMMITTED_SILENT) !=
				KBASE_CSF_RESET_GPU_NOT_PENDING) {
		/* Some other thread is already resetting the GPU */
		return -EAGAIN;
	}

	kbase_disjoint_state_up(kbdev);

	queue_work(kbdev->csf.reset.workq, &kbdev->csf.reset.work);

	return 0;
}
KBASE_EXPORT_TEST_API(kbase_reset_gpu_silent);

bool kbase_reset_gpu_is_active(struct kbase_device *kbdev)
{
	enum kbase_csf_reset_gpu_state reset_state =
		atomic_read(&kbdev->csf.reset.state);

	/* For CSF, the reset is considered active only when the reset worker
	 * is actually executing and other threads would have to wait for it to
	 * complete
	 */
	return kbase_csf_reset_state_is_active(reset_state);
}

int kbase_reset_gpu_wait(struct kbase_device *kbdev)
{
	const long wait_timeout =
		kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT));
	long remaining;

	/* Inform lockdep we might be trying to wait on a reset (as
	 * would've been done with down_read() - which has no 'timeout'
	 * variant), then use wait_event_timeout() to implement the timed
	 * wait.
	 *
	 * in CONFIG_PROVE_LOCKING builds, this should catch potential 'time
	 * bound' deadlocks such as:
	 * - incorrect lock order with respect to others locks
	 * - current thread has prevented reset
	 * - current thread is executing the reset worker
	 */
	might_lock_read(&kbdev->csf.reset.sem);

	remaining = wait_event_timeout(
		kbdev->csf.reset.wait,
		(atomic_read(&kbdev->csf.reset.state) ==
		 KBASE_CSF_RESET_GPU_NOT_PENDING) ||
			(atomic_read(&kbdev->csf.reset.state) ==
			 KBASE_CSF_RESET_GPU_FAILED),
		wait_timeout);

	if (!remaining) {
		dev_warn(kbdev->dev, "Timed out waiting for the GPU reset to complete");


		return -ETIMEDOUT;
	} else if (atomic_read(&kbdev->csf.reset.state) ==
			KBASE_CSF_RESET_GPU_FAILED) {
		return -ENOMEM;
	}

	return 0;
}
KBASE_EXPORT_TEST_API(kbase_reset_gpu_wait);

int kbase_reset_gpu_init(struct kbase_device *kbdev)
{
	kbdev->csf.reset.workq = alloc_workqueue("Mali reset workqueue", 0, 1);
	if (kbdev->csf.reset.workq == NULL)
		return -ENOMEM;

	INIT_WORK(&kbdev->csf.reset.work, kbase_csf_reset_gpu_worker);

	init_waitqueue_head(&kbdev->csf.reset.wait);
	init_rwsem(&kbdev->csf.reset.sem);

	return 0;
}

void kbase_reset_gpu_term(struct kbase_device *kbdev)
{
	destroy_workqueue(kbdev->csf.reset.workq);
}