Merge r44p1-00dev3 from partner/upstream into android13-gs-pixel-5.10-udc-qpr1

Bug: 290882327 Change-Id: I90723cbaa3f294431087587fd8025f0688e51bf2
author: Jörg Wagner <jorwag@google.com> 2023-08-31 19:15:13 +0000
committer: Jörg Wagner <jorwag@google.com> 2023-09-01 09:13:55 +0000
commit: b6fd708b3a4da86a196a61592ea3585f1aca7313 (patch)
tree: 1cbe3029a45bf9869c17a5b6954e5ae074b44ac8
parent: 46edf1b5965d872c5f8a09c6dc3dcbff58f78a92 (diff)
parent: e61eb93296e9f940b32d4ad4b0c3a5557cbeaf17 (diff)
download: gpu-b6fd708b3a4da86a196a61592ea3585f1aca7313.tar.gz
129 files changed, 7422 insertions, 3458 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h
index c9b1f62..47551f2 100644
--- a/common/include/linux/version_compat_defs.h
+++ b/common/include/linux/version_compat_defs.h
@@ -23,6 +23,21 @@
 #define _VERSION_COMPAT_DEFS_H_
 
 #include <linux/version.h>
+#include <linux/highmem.h>
+#include <linux/timer.h>
+
+#if (KERNEL_VERSION(4, 4, 267) < LINUX_VERSION_CODE)
+#include <linux/overflow.h>
+#endif
+
+#include <linux/bitops.h>
+#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
+#include <linux/bits.h>
+#endif
+
+#ifndef BITS_PER_TYPE
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#endif
 
 #if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
 typedef unsigned int __poll_t;
@@ -62,18 +77,167 @@ typedef unsigned int __poll_t;
 /* Replace the default definition with CONFIG_LSM_MMAP_MIN_ADDR */
 #undef kbase_mmap_min_addr
 #define kbase_mmap_min_addr CONFIG_LSM_MMAP_MIN_ADDR
-#pragma message "kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update!"
+#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG                                                           \
+	"* MALI kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update possible! *"
 #endif /* (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR) */
 #endif /* CONFIG_LSM_MMAP_MIN_ADDR */
 
 #if (kbase_mmap_min_addr == CONFIG_DEFAULT_MMAP_MIN_ADDR)
-#pragma message "kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update!"
+#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG                                                           \
+	"* MALI kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update possible! *"
 #endif
 
 #else /* CONFIG_MMU */
 #define kbase_mmap_min_addr (0UL)
-#pragma message "kbase_mmap_min_addr compiled to (0UL), no runtime update!"
+#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG                                                           \
+	"* MALI kbase_mmap_min_addr compiled to (0UL), no runtime update possible! *"
 #endif /* CONFIG_MMU */
 #endif /* KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE */
 
+static inline void kbase_timer_setup(struct timer_list *timer,
+				     void (*callback)(struct timer_list *timer))
+{
+#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE
+	setup_timer(timer, (void (*)(unsigned long))callback, (unsigned long)timer);
+#else
+	timer_setup(timer, callback, 0);
+#endif
+}
+
+#ifndef WRITE_ONCE
+#ifdef ASSIGN_ONCE
+#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
+#else
+#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+#endif
+#endif
+
+#ifndef READ_ONCE
+#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
+static inline void *kbase_kmap(struct page *p)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+	return kmap_local_page(p);
+#else
+	return kmap(p);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+static inline void *kbase_kmap_atomic(struct page *p)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+	return kmap_local_page(p);
+#else
+	return kmap_atomic(p);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+static inline void kbase_kunmap(struct page *p, void *address)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+	kunmap_local(address);
+#else
+	kunmap(p);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+static inline void kbase_kunmap_atomic(void *address)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+	kunmap_local(address);
+#else
+	kunmap_atomic(address);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+/* Some of the older 4.4 kernel patch versions do
+ * not contain the overflow check functions. However,
+ * they are based on compiler instrinsics, so they
+ * are simple to reproduce.
+ */
+#if (KERNEL_VERSION(4, 4, 267) >= LINUX_VERSION_CODE)
+/* Some of the older 4.4 kernel patch versions do
+ * not contain the overflow check functions. However,
+ * they are based on compiler instrinsics, so they
+ * are simple to reproduce.
+ */
+#define check_mul_overflow(a, b, d) __builtin_mul_overflow(a, b, d)
+#endif
+
+/*
+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*),
+ * with most of the related functions keeping the same signatures.
+ */
+
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+
+#include <linux/fence.h>
+
+#define dma_fence fence
+#define dma_fence_ops fence_ops
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+#define dma_fence_default_wait fence_default_wait
+
+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+static inline void dma_fence_set_error_helper(
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+					      struct fence *fence,
+#else
+					      struct dma_fence *fence,
+#endif
+					      int error)
+{
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+	dma_fence_set_error(fence, error);
+#elif (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+		KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+	fence_set_error(fence, error);
+#else
+	fence->status = error;
+#endif
+}
+
+#include <linux/mm.h>
+#if !((KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) || \
+      ((KERNEL_VERSION(6, 1, 25) <= LINUX_VERSION_CODE) && defined(__ANDROID_COMMON_KERNEL__)))
+static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
+{
+	vma->vm_flags |= flags;
+}
+static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
+{
+	vma->vm_flags &= ~flags;
+}
+#endif
+
+#if (KERNEL_VERSION(6, 4, 0) <= LINUX_VERSION_CODE)
+#define KBASE_CLASS_CREATE(owner, name) class_create(name)
+#else
+#define KBASE_CLASS_CREATE(owner, name) class_create(owner, name)
+#endif
+
 #endif /* _VERSION_COMPAT_DEFS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
index c6f6ff1..a8e5802 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
@@ -177,7 +177,7 @@ enum base_kcpu_command_type {
 	BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
 	BASE_KCPU_COMMAND_TYPE_JIT_FREE,
 	BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
-	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER
+	BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
 };
 
 /**
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
index 7c37cfc..c9de5fd 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -82,10 +82,18 @@
  * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
  *   before allocating GPU memory for the context.
  * - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 1.19:
+ * - Add NE support in queue_group_create IOCTL fields
+ * - Previous version retained as KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 for
+ *     backward compatibility.
+ * 1.20:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ *   read, poll) on the file descriptor of mali device file that was inherited
+ *   from the parent process.
  */
 
 #define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 18
+#define BASE_UK_VERSION_MINOR 20
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
@@ -258,6 +266,56 @@ union kbase_ioctl_cs_queue_group_create_1_6 {
 	_IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6)
 
 /**
+ * union kbase_ioctl_cs_queue_group_create_1_18 - Create a GPU command queue group
+ * @in:               Input parameters
+ * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask:  Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min:        Minimum number of CSs required.
+ * @in.priority:      Queue group's priority within a process.
+ * @in.tiler_max:     Maximum number of tiler endpoints the group is allowed
+ *                    to use.
+ * @in.fragment_max:  Maximum number of fragment endpoints the group is
+ *                    allowed to use.
+ * @in.compute_max:   Maximum number of compute endpoints the group is allowed
+ *                    to use.
+ * @in.csi_handlers:  Flags to signal that the application intends to use CSI
+ *                    exception handlers in some linear buffers to deal with
+ *                    the given exception types.
+ * @in.padding:       Currently unused, must be zero
+ * @out:              Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding:      Currently unused, must be zero
+ * @out.group_uid:    UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_18 {
+	struct {
+		__u64 tiler_mask;
+		__u64 fragment_mask;
+		__u64 compute_mask;
+		__u8 cs_min;
+		__u8 priority;
+		__u8 tiler_max;
+		__u8 fragment_max;
+		__u8 compute_max;
+		__u8 csi_handlers;
+		__u8 padding[2];
+		/**
+		 * @in.dvs_buf: buffer for deferred vertex shader
+		 */
+		__u64 dvs_buf;
+	} in;
+	struct {
+		__u8 group_handle;
+		__u8 padding[3];
+		__u32 group_uid;
+	} out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18                                                     \
+	_IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create_1_18)
+
+/**
  * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
  * @in:               Input parameters
  * @in.tiler_mask:    Mask of tiler endpoints the group is allowed to use.
@@ -291,11 +349,15 @@ union kbase_ioctl_cs_queue_group_create {
 		__u8 fragment_max;
 		__u8 compute_max;
 		__u8 csi_handlers;
-		__u8 padding[2];
+		/**
+		 * @in.reserved:   Reserved, currently unused, must be zero.
+		 */
+		__u16 reserved;
 		/**
 		 * @in.dvs_buf: buffer for deferred vertex shader
 		 */
 		__u64 dvs_buf;
+		__u64 padding[9];
 	} in;
 	struct {
 		__u8 group_handle;
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
index 0ca5d90..eaa4b2d 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,11 +22,6 @@
 #ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
 #define _UAPI_KBASE_GPU_REGMAP_CSF_H_
 
-/* IPA control registers */
-#define IPA_CONTROL_BASE 0x40000
-#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r))
-#define STATUS 0x004 /* (RO) Status register */
-
 /* USER base address */
 #define USER_BASE 0x0010000
 #define USER_REG(r) (USER_BASE + (r))
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
index 9bfd6d2..d24afcc 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,29 +22,4 @@
 #ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_
 #define _UAPI_KBASE_GPU_REGMAP_JM_H_
 
-/* GPU control registers */
-
-#define LATEST_FLUSH           0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */
-
-/* Job control registers */
-
-#define JS_HEAD_LO             0x00	/* (RO) Job queue head pointer for job slot n, low word */
-#define JS_HEAD_HI             0x04	/* (RO) Job queue head pointer for job slot n, high word */
-#define JS_TAIL_LO             0x08	/* (RO) Job queue tail pointer for job slot n, low word */
-#define JS_TAIL_HI             0x0C	/* (RO) Job queue tail pointer for job slot n, high word */
-#define JS_AFFINITY_LO         0x10	/* (RO) Core affinity mask for job slot n, low word */
-#define JS_AFFINITY_HI         0x14	/* (RO) Core affinity mask for job slot n, high word */
-#define JS_CONFIG              0x18	/* (RO) Configuration settings for job slot n */
-
-#define JS_HEAD_NEXT_LO        0x40	/* (RW) Next job queue head pointer for job slot n, low word */
-#define JS_HEAD_NEXT_HI        0x44	/* (RW) Next job queue head pointer for job slot n, high word */
-#define JS_AFFINITY_NEXT_LO    0x50	/* (RW) Next core affinity mask for job slot n, low word */
-#define JS_AFFINITY_NEXT_HI    0x54	/* (RW) Next core affinity mask for job slot n, high word */
-#define JS_CONFIG_NEXT         0x58	/* (RW) Next configuration settings for job slot n */
-#define JS_COMMAND_NEXT        0x60	/* (RW) Next command register for job slot n */
-
-#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
-
-#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
 #endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
index 1f33167..8256191 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,71 +28,4 @@
 #include "backend/mali_kbase_gpu_regmap_jm.h"
 #endif /* !MALI_USE_CSF */
 
-/* Begin Register Offsets */
-/* GPU control registers */
-
-#define GPU_CONTROL_BASE        0x0000
-#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
-
-#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
-
-#define GPU_IRQ_CLEAR           0x024   /* (WO) */
-#define GPU_IRQ_STATUS          0x02C   /* (RO) */
-
-#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
-#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
-
-#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
-#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
-
-#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
-#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
-
-#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
-#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
-
-#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
-#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
-
-#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
-#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
-
-/* Job control registers */
-
-#define JOB_CONTROL_BASE        0x1000
-
-#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
-
-#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
-#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
-#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
-
-/* MMU control registers */
-
-#define MEMORY_MANAGEMENT_BASE  0x2000
-
-#define MMU_REG(r)              (MEMORY_MANAGEMENT_BASE + (r))
-
-#define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
-#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
-
-#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
-
-/* MMU address space control registers */
-
-#define MMU_AS_REG(n, r)        (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
-
-#define AS_TRANSTAB_LO         0x00	/* (RW) Translation Table Base Address for address space n, low word */
-#define AS_TRANSTAB_HI         0x04	/* (RW) Translation Table Base Address for address space n, high word */
-#define AS_MEMATTR_LO          0x08	/* (RW) Memory attributes for address space n, low word. */
-#define AS_MEMATTR_HI          0x0C	/* (RW) Memory attributes for address space n, high word. */
-#define AS_COMMAND             0x18	/* (WO) MMU command register for address space n */
-
-/* (RW) Translation table configuration for address space n, low word */
-#define AS_TRANSCFG_LO         0x30
-/* (RW) Translation table configuration for address space n, high word */
-#define AS_TRANSCFG_HI         0x34
-
 #endif /* _UAPI_KBASE_GPU_REGMAP_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
index ac6affe..f2329f9 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -143,9 +143,14 @@
  * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
  *   before allocating GPU memory for the context.
  * - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 11.39:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ *   read, poll) on the file descriptor of mali device file that was inherited
+ *   from the parent process.
  */
+
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 38
+#define BASE_UK_VERSION_MINOR 39
 
 /**
  * struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/mali_kbase/BUILD.bazel b/mali_kbase/BUILD.bazel
index e38f617..54dd437 100644
--- a/mali_kbase/BUILD.bazel
+++ b/mali_kbase/BUILD.bazel
@@ -1,27 +1,45 @@
-# NOTE: THIS FILE IS EXPERIMENTAL FOR THE BAZEL MIGRATION AND NOT USED FOR
-# YOUR BUILDS CURRENTLY.
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
 #
-# It is not yet the source of truth for your build. If you're looking to modify
-# the build file, modify the Android.bp file instead. Do *not* modify this file
-# unless you have coordinated with the team managing the Soong to Bazel
-# migration.
 
-load("//build/kleaf:kernel.bzl", "kernel_module")
+load(
+    "//build/kernel/kleaf:kernel.bzl",
+    "kernel_module",
+)
+
+_midgard_modules = [
+    "mali_kbase.ko",
+    "tests/kutf/mali_kutf.ko",
+    "tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test_portal.ko",
+]
 
 kernel_module(
     name = "mali_kbase.cloudripper",
     srcs = glob([
         "**/*.c",
         "**/*.h",
-        "**/Kbuild",
+        "**/*Kbuild",
+        "**/*Makefile",
     ]) + [
+        "//common:kernel_headers",
+        "//common-modules/mali:headers",
+        "//common-modules/mali/drivers/gpu/arm/arbitration",
+        "//common-modules/mali/drivers/xen/arm:xen",
         "//private/google-modules/gpu/common:headers",
     ],
-    outs = [
-        "mali_kbase.ko",
-        "tests/kutf/mali_kutf.ko",
-        "tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test_portal.ko",
-    ],
+    outs = _midgard_modules,
     kernel_build = "//private/gs-google:cloudripper",
     visibility = [
         "//private/gs-google:__pkg__",
@@ -30,3 +48,14 @@ kernel_module(
         "//private/google-modules/gpu/mali_pixel",
     ],
 )
+
+filegroup(
+    name = "midgard_kconfig.cloudripper",
+    srcs = glob([
+        "**/*Kconfig",
+    ]),
+    visibility = [
+        "//common:__pkg__",
+        "//common-modules/mali:__subpackages__",
+    ],
+)
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 9da4141..ff0a0de 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -68,12 +68,11 @@ endif
 # Configurations
 #
 
-# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r43p0-01eac0"'
-
 # We are building for Pixel
 CONFIG_MALI_PLATFORM_NAME="pixel"
 
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= '"r44p1-00dev3"'
 # Set up defaults if not defined by build system
 ifeq ($(CONFIG_MALI_DEBUG), y)
     MALI_UNIT_TEST = 1
@@ -191,7 +190,6 @@ mali_kbase-y := \
     mali_kbase_mem_pool.o \
     mali_kbase_mem_pool_debugfs.o \
     mali_kbase_debugfs_helper.o \
-    mali_kbase_strings.o \
     mali_kbase_as_fault_debugfs.o \
     mali_kbase_regs_history_debugfs.o \
     mali_kbase_dvfs_debugfs.o \
@@ -208,6 +206,10 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \
     mali_kbase_sync_file.o \
     mali_kbase_sync_common.o
 
+mali_kbase-$(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) += \
+        mali_power_gpu_work_period_trace.o \
+        mali_kbase_gpu_metrics.o
+
 ifneq ($(CONFIG_MALI_CSF_SUPPORT),y)
     mali_kbase-y += \
         mali_kbase_jm.o \
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 46e3546..bb25ef4 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -65,11 +65,18 @@ config MALI_NO_MALI
 	  All calls to the simulated hardware will complete immediately as if the hardware
 	  completed the task.
 
+config MALI_NO_MALI_DEFAULT_GPU
+	string "Default GPU for No Mali"
+	depends on MALI_NO_MALI
+	default "tMIx"
+	help
+	  This option sets the default GPU to identify as for No Mali builds.
+
 
 endchoice
 
 menu "Platform specific options"
-source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/platform/Kconfig"
 endmenu
 
 config MALI_CSF_SUPPORT
@@ -193,6 +200,22 @@ config LARGE_PAGE_ALLOC
 
 	  If in doubt, say N
 
+config PAGE_MIGRATION_SUPPORT
+	bool "Enable support for page migration"
+	depends on MALI_MIDGARD && MALI_EXPERT
+	default y
+	default n if ANDROID
+	help
+	  Compile in support for page migration.
+	  If set to disabled ('n') then page migration cannot
+	  be enabled at all, and related symbols are not compiled in.
+	  If not set, page migration is compiled in by default, and
+	  if not explicitly enabled or disabled with the insmod parameter,
+	  page migration becomes automatically enabled with large pages.
+
+	  If in doubt, say Y. To strip out page migration symbols and support,
+	  say N.
+
 config MALI_MEMORY_FULLY_BACKED
 	bool "Enable memory fully physically-backed"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -395,7 +418,16 @@ config MALI_ARBITRATION
 	  virtualization setup for Mali
 	  If unsure, say N.
 
+config MALI_TRACE_POWER_GPU_WORK_PERIOD
+	bool "Enable per-application GPU metrics tracepoints"
+	depends on MALI_MIDGARD
+	default y
+	help
+	  This option enables per-application GPU metrics tracepoints.
+
+	  If unsure, say N.
+
 
-source "drivers/gpu/arm/midgard/tests/Kconfig"
+source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/tests/Kconfig"
 
 endif
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index d851653..59b306b 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -20,8 +20,6 @@
 
 KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
 KDIR ?= $(KERNEL_SRC)
-
-# Ensure build intermediates are in OUT_DIR instead of alongside the source
 M ?= $(shell pwd)
 
 ifeq ($(KDIR),)
@@ -39,6 +37,7 @@ CONFIG_MALI_SYSTEM_TRACE=y
 # Core kbase configuration options
 CONFIG_MALI_EXPERT=y
 CONFIG_MALI_MIDGARD_DVFS=y
+CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD = y
 
 # Pixel integration specific configuration options
 CONFIG_MALI_PLATFORM_NAME="pixel"
@@ -54,164 +53,176 @@ CONFIG_MALI_PIXEL_GPU_SLC ?= y
 # Dependency resolution is done through statements as Kconfig
 # is not supported for out-of-tree builds.
 #
+CONFIGS :=
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+    CONFIG_MALI_MIDGARD ?= m
+    ifeq ($(CONFIG_MALI_MIDGARD),m)
+        CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+        CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD ?= y
+        CONFIG_MALI_GATOR_SUPPORT ?= y
+        CONFIG_MALI_ARBITRATION ?= n
+        CONFIG_MALI_PARTITION_MANAGER ?= n
+
+        ifneq ($(CONFIG_MALI_NO_MALI),y)
+            # Prevent misuse when CONFIG_MALI_NO_MALI=y
+            CONFIG_MALI_REAL_HW ?= y
+            CONFIG_MALI_CORESIGHT = n
+        endif
 
-CONFIG_MALI_MIDGARD ?= m
-ifeq ($(CONFIG_MALI_MIDGARD),m)
-    CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
-    CONFIG_MALI_GATOR_SUPPORT ?= y
-    CONFIG_MALI_ARBITRATION ?= n
-    CONFIG_MALI_PARTITION_MANAGER ?= n
-
-    ifneq ($(CONFIG_MALI_NO_MALI),y)
-        # Prevent misuse when CONFIG_MALI_NO_MALI=y
-        CONFIG_MALI_REAL_HW ?= y
-        CONFIG_MALI_CORESIGHT = n
-    endif
-
-    ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y)
-        # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y
-        CONFIG_MALI_DEVFREQ ?= n
-    else
-        CONFIG_MALI_DEVFREQ ?= y
-    endif
+        ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y)
+            # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y
+            CONFIG_MALI_DEVFREQ ?= n
+        else
+            CONFIG_MALI_DEVFREQ ?= y
+        endif
 
-    ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y)
-        # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y
-        CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
-    endif
+        ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y)
+            # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y
+            CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
+        endif
 
-    ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
-        CONFIG_MALI_CORESIGHT ?= n
-    endif
+        ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
+            CONFIG_MALI_CORESIGHT ?= n
+        endif
 
-    #
-    # Expert/Debug/Test released configurations
-    #
-    ifeq ($(CONFIG_MALI_EXPERT), y)
-        ifeq ($(CONFIG_MALI_NO_MALI), y)
-            CONFIG_MALI_REAL_HW = n
+        #
+        # Expert/Debug/Test released configurations
+        #
+        ifeq ($(CONFIG_MALI_EXPERT), y)
+            ifeq ($(CONFIG_MALI_NO_MALI), y)
+                CONFIG_MALI_REAL_HW = n
+                CONFIG_MALI_NO_MALI_DEFAULT_GPU ?= "tMIx"
 
-        else
-            # Prevent misuse when CONFIG_MALI_NO_MALI=n
-            CONFIG_MALI_REAL_HW = y
-            CONFIG_MALI_ERROR_INJECT = n
-        endif
+            else
+                # Prevent misuse when CONFIG_MALI_NO_MALI=n
+                CONFIG_MALI_REAL_HW = y
+                CONFIG_MALI_ERROR_INJECT = n
+            endif
 
 
-        ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
-            # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
-            CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
-        endif
+            ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
+                # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
+                CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+            endif
 
-        ifeq ($(CONFIG_MALI_DEBUG), y)
-            CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y
-            CONFIG_MALI_SYSTEM_TRACE ?= y
+            ifeq ($(CONFIG_MALI_DEBUG), y)
+                CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y
+                CONFIG_MALI_SYSTEM_TRACE ?= y
 
-            ifeq ($(CONFIG_SYNC_FILE), y)
-                CONFIG_MALI_FENCE_DEBUG ?= y
+                ifeq ($(CONFIG_SYNC_FILE), y)
+                    CONFIG_MALI_FENCE_DEBUG ?= y
+                else
+                    CONFIG_MALI_FENCE_DEBUG = n
+                endif
             else
+                # Prevent misuse when CONFIG_MALI_DEBUG=n
+                CONFIG_MALI_MIDGARD_ENABLE_TRACE = n
+                CONFIG_MALI_SYSTEM_TRACE = n
                 CONFIG_MALI_FENCE_DEBUG = n
             endif
         else
-            # Prevent misuse when CONFIG_MALI_DEBUG=n
+            # Prevent misuse when CONFIG_MALI_EXPERT=n
+            CONFIG_MALI_CORESTACK = n
+            CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
+            CONFIG_LARGE_PAGE_ALLOC = n
+            CONFIG_MALI_PWRSOFT_765 = n
+            CONFIG_MALI_MEMORY_FULLY_BACKED = n
+            CONFIG_MALI_JOB_DUMP = n
+            CONFIG_MALI_NO_MALI = n
+            CONFIG_MALI_REAL_HW = y
+            CONFIG_MALI_ERROR_INJECT = n
+            CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
+            CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+            CONFIG_MALI_HOST_CONTROLS_SC_RAILS = n
+            CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
+            CONFIG_MALI_DEBUG = n
             CONFIG_MALI_MIDGARD_ENABLE_TRACE = n
             CONFIG_MALI_FENCE_DEBUG = n
         endif
-    else
-        # Prevent misuse when CONFIG_MALI_EXPERT=n
-        CONFIG_MALI_CORESTACK = n
-        CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
-        CONFIG_LARGE_PAGE_ALLOC = n
-        CONFIG_MALI_PWRSOFT_765 = n
-        CONFIG_MALI_MEMORY_FULLY_BACKED = n
-        CONFIG_MALI_JOB_DUMP = n
-        CONFIG_MALI_NO_MALI = n
-        CONFIG_MALI_REAL_HW = y
-        CONFIG_MALI_ERROR_INJECT = n
-        CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
-        CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
-        CONFIG_MALI_HOST_CONTROLS_SC_RAILS = n
-        CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
-        CONFIG_MALI_DEBUG = n
-        CONFIG_MALI_MIDGARD_ENABLE_TRACE = n
-        CONFIG_MALI_FENCE_DEBUG = n
-    endif
 
-    ifeq ($(CONFIG_MALI_DEBUG), y)
-        CONFIG_MALI_KUTF ?= y
-        ifeq ($(CONFIG_MALI_KUTF), y)
-            CONFIG_MALI_KUTF_IRQ_TEST ?= y
-            CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
-            CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
+        ifeq ($(CONFIG_MALI_DEBUG), y)
+            CONFIG_MALI_KUTF ?= y
+            ifeq ($(CONFIG_MALI_KUTF), y)
+                CONFIG_MALI_KUTF_IRQ_TEST ?= y
+                CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
+                CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
+                ifeq ($(CONFIG_MALI_DEVFREQ), y)
+                    ifeq ($(CONFIG_MALI_NO_MALI), y)
+                        CONFIG_MALI_KUTF_IPA_UNIT_TEST ?= y
+                    endif
+                endif
+
+            else
+                # Prevent misuse when CONFIG_MALI_KUTF=n
+                CONFIG_MALI_KUTF_IRQ_TEST = n
+                CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
+                CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
+            endif
         else
-            # Prevent misuse when CONFIG_MALI_KUTF=n
+            # Prevent misuse when CONFIG_MALI_DEBUG=n
+            CONFIG_MALI_KUTF = y
             CONFIG_MALI_KUTF_IRQ_TEST = n
             CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
             CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
         endif
     else
-        # Prevent misuse when CONFIG_MALI_DEBUG=n
-        CONFIG_MALI_KUTF = y
+        # Prevent misuse when CONFIG_MALI_MIDGARD=n
+        CONFIG_MALI_ARBITRATION = n
+        CONFIG_MALI_KUTF = n
         CONFIG_MALI_KUTF_IRQ_TEST = n
         CONFIG_MALI_KUTF_CLK_RATE_TRACE = y
         CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
     endif
-else
-    # Prevent misuse when CONFIG_MALI_MIDGARD=n
-    CONFIG_MALI_ARBITRATION = n
-    CONFIG_MALI_KUTF = n
-    CONFIG_MALI_KUTF_IRQ_TEST = n
-    CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
-    CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
-endif
 
-# All Mali CONFIG should be listed here
-CONFIGS := \
-    CONFIG_MALI_MIDGARD \
-    CONFIG_MALI_GATOR_SUPPORT \
-    CONFIG_MALI_ARBITER_SUPPORT \
-    CONFIG_MALI_ARBITRATION \
-    CONFIG_MALI_PARTITION_MANAGER \
-    CONFIG_MALI_REAL_HW \
-    CONFIG_MALI_DEVFREQ \
-    CONFIG_MALI_MIDGARD_DVFS \
-    CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
-    CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
-    CONFIG_MALI_EXPERT \
-    CONFIG_MALI_CORESTACK \
-    CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
-    CONFIG_LARGE_PAGE_ALLOC \
-    CONFIG_MALI_PWRSOFT_765 \
-    CONFIG_MALI_MEMORY_FULLY_BACKED \
-    CONFIG_MALI_JOB_DUMP \
-    CONFIG_MALI_NO_MALI \
-    CONFIG_MALI_ERROR_INJECT \
-    CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
-    CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
-    CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
-    CONFIG_MALI_PRFCNT_SET_PRIMARY \
-    CONFIG_MALI_PRFCNT_SET_SECONDARY \
-    CONFIG_MALI_PRFCNT_SET_TERTIARY \
-    CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \
-    CONFIG_MALI_DEBUG \
-    CONFIG_MALI_MIDGARD_ENABLE_TRACE \
-    CONFIG_MALI_SYSTEM_TRACE \
-    CONFIG_MALI_FENCE_DEBUG \
-    CONFIG_MALI_KUTF \
-    CONFIG_MALI_KUTF_IRQ_TEST \
-    CONFIG_MALI_KUTF_CLK_RATE_TRACE \
-    CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
-    CONFIG_MALI_XEN \
-    CONFIG_MALI_CORESIGHT
-
-# Pixel integration CONFIG options
-CONFIGS += \
-    CONFIG_MALI_PIXEL_GPU_QOS \
-    CONFIG_MALI_PIXEL_GPU_BTS \
-    CONFIG_MALI_PIXEL_GPU_THERMAL \
-    CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING \
-    CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
-    CONFIG_MALI_PIXEL_GPU_SLC
+    # All Mali CONFIG should be listed here
+    CONFIGS := \
+        CONFIG_MALI_MIDGARD \
+        CONFIG_MALI_GATOR_SUPPORT \
+        CONFIG_MALI_ARBITER_SUPPORT \
+        CONFIG_MALI_ARBITRATION \
+        CONFIG_MALI_PARTITION_MANAGER \
+        CONFIG_MALI_REAL_HW \
+        CONFIG_MALI_DEVFREQ \
+        CONFIG_MALI_MIDGARD_DVFS \
+        CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
+        CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
+        CONFIG_MALI_EXPERT \
+        CONFIG_MALI_CORESTACK \
+        CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
+        CONFIG_LARGE_PAGE_ALLOC \
+        CONFIG_MALI_PWRSOFT_765 \
+        CONFIG_MALI_MEMORY_FULLY_BACKED \
+        CONFIG_MALI_JOB_DUMP \
+        CONFIG_MALI_NO_MALI \
+        CONFIG_MALI_ERROR_INJECT \
+        CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
+        CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
+        CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
+        CONFIG_MALI_PRFCNT_SET_PRIMARY \
+        CONFIG_MALI_PRFCNT_SET_SECONDARY \
+        CONFIG_MALI_PRFCNT_SET_TERTIARY \
+        CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \
+        CONFIG_MALI_DEBUG \
+        CONFIG_MALI_MIDGARD_ENABLE_TRACE \
+        CONFIG_MALI_SYSTEM_TRACE \
+        CONFIG_MALI_FENCE_DEBUG \
+        CONFIG_MALI_KUTF \
+        CONFIG_MALI_KUTF_IRQ_TEST \
+        CONFIG_MALI_KUTF_CLK_RATE_TRACE \
+        CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
+        CONFIG_MALI_XEN \
+        CONFIG_MALI_CORESIGHT \
+        CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD
+
+    # Pixel integration CONFIG options
+    CONFIGS += \
+        CONFIG_MALI_PIXEL_GPU_QOS \
+        CONFIG_MALI_PIXEL_GPU_BTS \
+        CONFIG_MALI_PIXEL_GPU_THERMAL \
+        CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING \
+        CONFIG_MALI_PIXEL_GPU_SLC
+
+endif
 
 THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
 -include $(THIS_DIR)/../arbitration/Makefile
@@ -227,7 +238,9 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \
                         $(value config)=$(value $(value config)), \
                         $(value config)=n))
 
-MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+    MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+endif
 
 #
 # EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build
@@ -239,63 +252,66 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
                     $(if $(filter y m,$(value $(value config))), \
                         -D$(value config)=1))
 
-EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+    EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME='\"$(CONFIG_MALI_PLATFORM_NAME)\"'
+    EXTRA_CFLAGS += -DCONFIG_MALI_NO_MALI_DEFAULT_GPU='\"$(CONFIG_MALI_NO_MALI_DEFAULT_GPU)\"'
+endif
 
 #
 # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
 #
 EXTRA_SYMBOLS += $(OUT_DIR)/../google-modules/gpu/mali_pixel/Module.symvers
 
-KBUILD_CFLAGS += -Wall -Werror
+CFLAGS_MODULE += -Wall -Werror
 
 # The following were added to align with W=1 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
-KBUILD_CFLAGS += -Wmissing-declarations
-KBUILD_CFLAGS += -Wmissing-format-attribute
-KBUILD_CFLAGS += -Wmissing-prototypes
-KBUILD_CFLAGS += -Wold-style-definition
+CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter
+CFLAGS_MODULE += -Wmissing-declarations
+CFLAGS_MODULE += -Wmissing-format-attribute
+CFLAGS_MODULE += -Wmissing-prototypes
+CFLAGS_MODULE += -Wold-style-definition
 # The -Wmissing-include-dirs cannot be enabled as the path to some of the
 # included directories change depending on whether it is an in-tree or
 # out-of-tree build.
-KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
-KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
+CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable)
+CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable)
+CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned)
+CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation)
 # The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-shift-negative-value
+CFLAGS_MODULE += -Wno-sign-compare
+CFLAGS_MODULE += -Wno-shift-negative-value
 # This flag is needed to avoid build errors on older kernels
-KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
+CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type)
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
 
 # The following were added to align with W=2 in scripts/Makefile.extrawarn
 # from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wdisabled-optimization
+CFLAGS_MODULE += -Wdisabled-optimization
 # The -Wshadow flag cannot be enabled unless upstream kernels are
 # patched to fix redefinitions of certain built-in functions and
 # global variables.
-KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
-KBUILD_CFLAGS += -Wmissing-field-initializers
+CFLAGS_MODULE += $(call cc-option, -Wlogical-op)
+CFLAGS_MODULE += -Wmissing-field-initializers
 # -Wtype-limits must be disabled due to build failures on kernel 5.x
-KBUILD_CFLAGS += -Wno-type-limits
-KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
+CFLAGS_MODULE += -Wno-type-limits
+CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized)
+CFLAGS_MODULE += $(call cc-option, -Wunused-macros)
 
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
 
 # This warning is disabled to avoid build failures in some kernel versions
-KBUILD_CFLAGS += -Wno-ignored-qualifiers
+CFLAGS_MODULE += -Wno-ignored-qualifiers
 
 ifeq ($(CONFIG_GCOV_KERNEL),y)
-    KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
-    KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+    CFLAGS_MODULE += $(call cc-option, -ftest-coverage)
+    CFLAGS_MODULE += $(call cc-option, -fprofile-arcs)
     EXTRA_CFLAGS += -DGCOV_PROFILE=1
 endif
 
 ifeq ($(CONFIG_MALI_KCOV),y)
-    KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+    CFLAGS_MODULE += $(call cc-option, -fsanitize-coverage=trace-cmp)
     EXTRA_CFLAGS += -DKCOV=1
     EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
 endif
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index 77a528f..2d6fca0 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -196,6 +196,18 @@ config MALI_CORESTACK
 
 	  If unsure, say N.
 
+config PAGE_MIGRATION_SUPPORT
+	bool "Compile with page migration support"
+	depends on BACKEND_KERNEL
+	default y
+	default n if ANDROID
+	help
+	  Compile in support for page migration.
+	  If set to disabled ('n') then page migration cannot
+	  be enabled at all. If set to enabled, then page migration
+	  support is explicitly compiled in. This has no effect when
+	  PAGE_MIGRATION_OVERRIDE is disabled.
+
 choice
 	prompt "Error injection level"
 	depends on MALI_MIDGARD && MALI_EXPERT
@@ -352,5 +364,45 @@ config MALI_HOST_CONTROLS_SC_RAILS
 	  Adapter) inside the GPU to handshake with SoC PMU to control the
 	  power of cores.
 
+config MALI_TRACE_POWER_GPU_WORK_PERIOD
+	bool "Enable per-application GPU metrics tracepoints"
+	depends on MALI_MIDGARD
+	default y
+	help
+	  This option enables per-application GPU metrics tracepoints.
+
+	  If unsure, say N.
+
+choice
+	prompt "CSF Firmware trace mode"
+	depends on MALI_MIDGARD
+	default MALI_FW_TRACE_MODE_MANUAL
+	help
+	  CSF Firmware log operating mode.
+
+config MALI_FW_TRACE_MODE_MANUAL
+	bool "manual mode"
+	depends on MALI_MIDGARD
+	help
+	  firmware log can be read manually by the userspace (and it will
+	  also be dumped automatically into dmesg on GPU reset).
+
+config MALI_FW_TRACE_MODE_AUTO_PRINT
+	bool "automatic printing mode"
+	depends on MALI_MIDGARD
+	help
+	  firmware log will be periodically emptied into dmesg, manual
+	  reading through debugfs is disabled.
+
+config MALI_FW_TRACE_MODE_AUTO_DISCARD
+	bool "automatic discarding mode"
+	depends on MALI_MIDGARD
+	help
+	  firmware log will be periodically discarded, the remaining log can be
+	  read manually by the userspace (and it will also be dumped
+	  automatically into dmesg on GPU reset).
+
+endchoice
+
 source "kernel/drivers/gpu/arm/arbitration/Mconfig"
 source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index 7df24c3..c37cc59 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -22,7 +22,6 @@ mali_kbase-y += \
     backend/gpu/mali_kbase_cache_policy_backend.o \
     backend/gpu/mali_kbase_gpuprops_backend.o \
     backend/gpu/mali_kbase_irq_linux.o \
-    backend/gpu/mali_kbase_js_backend.o \
     backend/gpu/mali_kbase_pm_backend.o \
     backend/gpu/mali_kbase_pm_driver.o \
     backend/gpu/mali_kbase_pm_metrics.o \
@@ -42,7 +41,8 @@ ifeq ($(MALI_USE_CSF),0)
         backend/gpu/mali_kbase_jm_as.o \
         backend/gpu/mali_kbase_debug_job_fault_backend.o \
         backend/gpu/mali_kbase_jm_hw.o \
-        backend/gpu/mali_kbase_jm_rb.o
+        backend/gpu/mali_kbase_jm_rb.o \
+        backend/gpu/mali_kbase_js_backend.o
 endif
 
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
index 7c0abba..86539d5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -43,12 +43,12 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
 	kbdev->current_gpu_coherency_mode = mode;
 
 	if (kbasep_amba_register_present(kbdev)) {
-		u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+		u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE));
 
 		val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode);
-		kbase_reg_write(kbdev, AMBA_ENABLE, val);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val);
 	} else
-		kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(COHERENCY_ENABLE), mode);
 }
 
 u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev)
@@ -69,24 +69,12 @@ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
 					 bool enable)
 {
 	if (kbasep_amba_register_present(kbdev)) {
-		u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+		u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE));
 
 		val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable);
-		kbase_reg_write(kbdev, AMBA_ENABLE, val);
+		kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val);
 
 	} else {
 		WARN(1, "memory_cache_support not supported");
 	}
 }
-
-void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable)
-{
-	if (kbasep_amba_register_present(kbdev)) {
-		u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
-
-		val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable);
-		kbase_reg_write(kbdev, AMBA_ENABLE, val);
-	} else {
-		WARN(1, "invalidate_hint not supported");
-	}
-}
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
index 8cd8090..0103695 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -53,13 +53,4 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev);
  */
 void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
 					 bool enable);
-/**
- * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint
- *                                    in the GPU.
- * @kbdev:    Device pointer
- * @enable:   true for enable.
- *
- * Note: Only for arch version 12.x.1 onwards.
- */
-void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable);
 #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index 8d09347..cca4f74 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -58,8 +58,10 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
 	if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev))
 		return callbacks;
 
-	arbiter_if_node =
-		of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+	arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL);
+	if (!arbiter_if_node)
+		arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+
 	/* Arbitration enabled, override the callback pointer.*/
 	if (arbiter_if_node)
 		callbacks = &arb_clk_rate_trace_ops;
@@ -241,8 +243,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev)
 	if (!clk_rtm->clk_rate_trace_ops)
 		return;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	spin_lock(&clk_rtm->lock);
+	spin_lock_irqsave(&clk_rtm->lock, flags);
 
 	for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
 		struct kbase_clk_data *clk_data = clk_rtm->clks[i];
@@ -258,8 +259,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev)
 	}
 
 	clk_rtm->gpu_idle = false;
-	spin_unlock(&clk_rtm->lock);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	spin_unlock_irqrestore(&clk_rtm->lock, flags);
 }
 
 void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c
index e121b41..cd3b29d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -59,7 +59,7 @@ static int job_slot_reg_snapshot[] = {
 	JS_CONFIG_NEXT
 };
 
-/*MMU_REG(r)*/
+/*MMU_CONTROL_REG(r)*/
 static int mmu_reg_snapshot[] = {
 	MMU_IRQ_MASK,
 	MMU_IRQ_STATUS
@@ -118,15 +118,14 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
 
 	/* get the MMU registers*/
 	for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
-		kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+		kctx->reg_dump[offset] = MMU_CONTROL_REG(mmu_reg_snapshot[i]);
 		offset += 2;
 	}
 
 	/* get the Address space registers*/
 	for (j = 0; j < as_number; j++) {
 		for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
-			kctx->reg_dump[offset] =
-					MMU_AS_REG(j, as_reg_snapshot[i]);
+			kctx->reg_dump[offset] = MMU_STAGE1_REG(MMU_AS_REG(j, as_reg_snapshot[i]));
 			offset += 2;
 		}
 	}
diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
index ef09c6b..b95277c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -99,7 +99,7 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
 
 	atomic_inc(&kbdev->faults_pending);
 
-	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+	val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS));
 
 #ifdef CONFIG_MALI_DEBUG
 	if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -298,7 +298,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+	val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS));
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
@@ -310,7 +310,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
 	kbasep_irq_test_data.triggered = 1;
 	wake_up(&kbasep_irq_test_data.wait);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), val);
 
 	return IRQ_HANDLED;
 }
@@ -344,8 +344,8 @@ static int kbasep_common_test_interrupt(
 		break;
 	case MMU_IRQ_TAG:
 		test_handler = kbase_mmu_irq_test_handler;
-		rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
-		mask_offset = MMU_REG(MMU_IRQ_MASK);
+		rawstat_offset = MMU_CONTROL_REG(MMU_IRQ_RAWSTAT);
+		mask_offset = MMU_CONTROL_REG(MMU_IRQ_MASK);
 		break;
 	case GPU_IRQ_TAG:
 		/* already tested by pm_driver - bail out */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 72926bc..dd8f4d9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -585,7 +585,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 			count += nr_done;
 
 			while (nr_done) {
-				if (nr_done == 1) {
+				if (likely(nr_done == 1)) {
 					kbase_gpu_complete_hw(kbdev, i,
 								completion_code,
 								job_tail,
@@ -604,6 +604,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
 							BASE_JD_EVENT_DONE,
 							0,
 							&end_timestamp);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+					/* Increment the end timestamp value by 1 ns to
+					 * avoid having the same value for 'start_time_ns'
+					 * and 'end_time_ns' for the 2nd atom whose job
+					 * completion IRQ got merged with the 1st atom.
+					 */
+					end_timestamp = ktime_add(end_timestamp, ns_to_ktime(1));
+#endif
 				}
 				nr_done--;
 			}
@@ -1061,12 +1069,12 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
 			i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
 	}
 	dev_err(kbdev->dev, "  MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)),
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
 	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x    JOB_IRQ_MASK=0x%08x     MMU_IRQ_MASK=0x%08x",
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
 		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
+		kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)));
 	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index bfd55a6..380a530 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -47,7 +47,7 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
 #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
 static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size)
 {
-	snprintf(js_string, js_size, "job_slot_%u", js);
+	(void)scnprintf(js_string, js_size, "job_slot_%u", js);
 	return js_string;
 }
 #endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index f4094a3..66f068a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -32,6 +32,9 @@
 #include <hwcnt/mali_kbase_hwcnt_context.h>
 #include <mali_kbase_reset_gpu.h>
 #include <mali_kbase_kinstr_jm.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#endif
 #include <backend/gpu/mali_kbase_cache_policy_backend.h>
 #include <device/mali_kbase_device.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
@@ -274,6 +277,59 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
 	return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
 }
 
+/**
+ * trace_atom_completion_for_gpu_metrics - Report the completion of atom for the
+ *                                         purpose of emitting power/gpu_work_period
+ *                                         tracepoint.
+ *
+ * @katom:         Pointer to the atom that completed execution on GPU.
+ * @end_timestamp: Pointer to the timestamp of atom completion. May be NULL, in
+ *                 which case current time will be used.
+ *
+ * The function would also report the start for an atom that was in the HEAD_NEXT
+ * register.
+ *
+ * Note: Caller must hold the HW access lock.
+ */
+static inline void trace_atom_completion_for_gpu_metrics(
+			struct kbase_jd_atom *const katom,
+			ktime_t *end_timestamp)
+{
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	u64 complete_ns;
+	struct kbase_context *kctx = katom->kctx;
+	struct kbase_jd_atom *queued =
+		kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1);
+
+#ifdef CONFIG_MALI_DEBUG
+	WARN_ON(!kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 0));
+#endif
+
+	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+	if (unlikely(queued == katom))
+		return;
+
+	/* A protected atom and a non-protected atom cannot be in the RB_SUBMITTED
+	 * state at the same time in the job slot ringbuffer. Atom submission state
+	 * machine prevents the submission of a non-protected atom until all
+	 * protected atoms have completed and GPU has exited the protected mode.
+	 * This implies that if the queued atom is in RB_SUBMITTED state, it shall
+	 * be a protected atom and so we can return early.
+	 */
+	if (unlikely(kbase_jd_katom_is_protected(katom)))
+		return;
+
+	if (likely(end_timestamp))
+		complete_ns = ktime_to_ns(*end_timestamp);
+	else
+		complete_ns = ktime_get_raw_ns();
+
+	kbase_gpu_metrics_ctx_end_activity(kctx, complete_ns);
+	if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+		kbase_gpu_metrics_ctx_start_activity(queued->kctx, complete_ns);
+#endif
+}
 
 static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 					struct kbase_jd_atom *katom,
@@ -290,6 +346,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
 		break;
 
 	case KBASE_ATOM_GPU_RB_SUBMITTED:
+		trace_atom_completion_for_gpu_metrics(katom, end_timestamp);
 		kbase_kinstr_jm_atom_hw_release(katom);
 		/* Inform power management at start/finish of atom so it can
 		 * update its GPU utilisation metrics. Mark atom as not
@@ -865,6 +922,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 
 		for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
 			bool cores_ready;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+			bool trace_atom_submit_for_gpu_metrics = true;
+#endif
 			int ret;
 
 			if (!katom[idx])
@@ -975,12 +1035,21 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 			case KBASE_ATOM_GPU_RB_READY:
 
 				if (idx == 1) {
+					enum kbase_atom_gpu_rb_state atom_0_gpu_rb_state =
+						katom[0]->gpu_rb_state;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+					trace_atom_submit_for_gpu_metrics =
+						(atom_0_gpu_rb_state ==
+						 KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB);
+#endif
+
 					/* Only submit if head atom or previous
 					 * atom already submitted
 					 */
-					if ((katom[0]->gpu_rb_state !=
+					if ((atom_0_gpu_rb_state !=
 						KBASE_ATOM_GPU_RB_SUBMITTED &&
-						katom[0]->gpu_rb_state !=
+						atom_0_gpu_rb_state !=
 					KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
 						break;
 
@@ -1017,7 +1086,15 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
 							&katom[idx]->start_timestamp);
 
 					/* Inform platform at start/finish of atom */
+
 					kbasep_platform_event_work_begin(katom[idx]);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+					if (likely(trace_atom_submit_for_gpu_metrics &&
+						   !kbase_jd_katom_is_protected(katom[idx])))
+						kbase_gpu_metrics_ctx_start_activity(
+							katom[idx]->kctx,
+							ktime_to_ns(katom[idx]->start_timestamp));
+#endif
 				} else {
 					if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
 						kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
@@ -1079,6 +1156,25 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
 					KBASE_KATOM_FLAG_FAIL_BLOCKER)));
 }
 
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+						struct kbase_jd_atom *katom,
+						u32 action,
+						bool disjoint)
+{
+	struct kbase_context *kctx = katom->kctx;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+	kbase_gpu_mark_atom_for_return(kbdev, katom);
+	kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
+					  katom->sched_priority);
+
+	if (disjoint)
+		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+									katom);
+}
+
 /**
  * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
  *                       related to a failed JSn_HEAD atom
@@ -1129,9 +1225,9 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple
 	     kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) {
 		kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
 				JS_COMMAND_NOP);
-		next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
 
 		if (completion_code == BASE_JD_EVENT_STOPPED) {
+			kbase_gpu_remove_atom(kbdev, next_katom, JS_COMMAND_SOFT_STOP, false);
 			KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom,
 				&kbdev->gpu_props.props.raw_props.js_features
 					[next_katom->slot_nr]);
@@ -1140,10 +1236,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple
 			KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx,
 				&kbdev->gpu_props.props.raw_props.js_features
 					[next_katom->slot_nr]);
-		}
+		} else {
+			next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
 
-		if (next_katom->core_req & BASE_JD_REQ_PERMON)
-			kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+			if (next_katom->core_req & BASE_JD_REQ_PERMON)
+				kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+		}
 
 		/* On evicting the next_katom, the last submission kctx on the
 		 * given job slot then reverts back to the one that owns katom.
@@ -1528,25 +1626,6 @@ static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int
 	kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority);
 }
 
-static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
-						struct kbase_jd_atom *katom,
-						u32 action,
-						bool disjoint)
-{
-	struct kbase_context *kctx = katom->kctx;
-
-	lockdep_assert_held(&kbdev->hwaccess_lock);
-
-	katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
-	kbase_gpu_mark_atom_for_return(kbdev, katom);
-	kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
-					  katom->sched_priority);
-
-	if (disjoint)
-		kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
-									katom);
-}
-
 static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
 {
 	if (katom->x_post_dep) {
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index 0ed04bb..ff4e114 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,28 +28,18 @@
 #include <mali_kbase_reset_gpu.h>
 #include <backend/gpu/mali_kbase_jm_internal.h>
 #include <backend/gpu/mali_kbase_js_internal.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+
+#endif
 
-#if !MALI_USE_CSF
 /*
  * Hold the runpool_mutex for this
  */
-static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_running_ctxs)
 {
-	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
-	int nr_running_ctxs;
-
 	lockdep_assert_held(&kbdev->js_data.runpool_mutex);
 
-	/* Timer must stop if we are suspending */
-	if (backend->suspend_timer)
-		return false;
-
-	/* nr_contexts_pullable is updated with the runpool_mutex. However, the
-	 * locking in the caller gives us a barrier that ensures
-	 * nr_contexts_pullable is up-to-date for reading
-	 */
-	nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
-
 #ifdef CONFIG_MALI_DEBUG
 	if (kbdev->js_data.softstop_always) {
 		/* Debug support for allowing soft-stop on a single context */
@@ -273,18 +263,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
 
 	return HRTIMER_NORESTART;
 }
-#endif /* !MALI_USE_CSF */
 
 void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 {
-#if !MALI_USE_CSF
 	struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
 	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
 	unsigned long flags;
+	/* Timer must stop if we are suspending */
+	const bool suspend_timer = backend->suspend_timer;
+	const int nr_running_ctxs =
+		atomic_read(&kbdev->js_data.nr_contexts_runnable);
 
 	lockdep_assert_held(&js_devdata->runpool_mutex);
 
-	if (!timer_callback_should_run(kbdev)) {
+	if (suspend_timer || !timer_callback_should_run(kbdev, nr_running_ctxs)) {
 		/* Take spinlock to force synchronisation with timer */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		backend->timer_running = false;
@@ -298,7 +290,8 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 		hrtimer_cancel(&backend->scheduling_timer);
 	}
 
-	if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+	if (!suspend_timer && timer_callback_should_run(kbdev, nr_running_ctxs) &&
+	    !backend->timer_running) {
 		/* Take spinlock to force synchronisation with timer */
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		backend->timer_running = true;
@@ -309,36 +302,59 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
 
 		KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u);
 	}
-#else /* !MALI_USE_CSF */
-	CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	if (unlikely(suspend_timer)) {
+		js_devdata->gpu_metrics_timer_needed = false;
+		/* Cancel the timer as System suspend is happening */
+		hrtimer_cancel(&js_devdata->gpu_metrics_timer);
+		js_devdata->gpu_metrics_timer_running = false;
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		/* Explicitly emit the tracepoint on System suspend */
+		kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns());
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return;
+	}
+
+	if (!nr_running_ctxs) {
+		/* Just set the flag to not restart the timer on expiry */
+		js_devdata->gpu_metrics_timer_needed = false;
+		return;
+	}
+
+	/* There are runnable contexts so the timer is needed */
+	if (!js_devdata->gpu_metrics_timer_needed) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+		js_devdata->gpu_metrics_timer_needed = true;
+		/* No need to restart the timer if it is already running. */
+		if (!js_devdata->gpu_metrics_timer_running) {
+			hrtimer_start(&js_devdata->gpu_metrics_timer,
+				      HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_emit_interval()),
+				      HRTIMER_MODE_REL);
+			js_devdata->gpu_metrics_timer_running = true;
+		}
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+	}
+#endif
 }
 
 int kbase_backend_timer_init(struct kbase_device *kbdev)
 {
-#if !MALI_USE_CSF
 	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
 
 	hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
 							HRTIMER_MODE_REL);
 	backend->scheduling_timer.function = timer_callback;
 	backend->timer_running = false;
-#else /* !MALI_USE_CSF */
-	CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
 
 	return 0;
 }
 
 void kbase_backend_timer_term(struct kbase_device *kbdev)
 {
-#if !MALI_USE_CSF
 	struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
 
 	hrtimer_cancel(&backend->scheduling_timer);
-#else /* !MALI_USE_CSF */
-	CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
 }
 
 void kbase_backend_timer_suspend(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
index 9ce5075..6eedc00 100644
--- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -19,8 +19,9 @@
  *
  */
 
+#include <linux/version_compat_defs.h>
+
 #include <mali_kbase.h>
-#include <mali_kbase_bits.h>
 #include <mali_kbase_config_defaults.h>
 #include <device/mali_kbase_device.h>
 #include "mali_kbase_l2_mmu_config.h"
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index dd16fb2..46bcdc7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -484,13 +484,6 @@ void *gpu_device_get_data(void *model)
 
 #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
 
-/* SCons should pass in a default GPU, but other ways of building (e.g.
- * in-tree) won't, so define one here in case.
- */
-#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
-#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
-#endif
-
 static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
 module_param(no_mali_gpu, charp, 0000);
 MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
@@ -1378,10 +1371,10 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value)
 		dummy->l2_config = value;
 	}
 #if MALI_USE_CSF
-	else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) &&
-			 addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET +
-						(CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
-		if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
+	else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET &&
+		 addr < CSF_HW_DOORBELL_PAGE_OFFSET +
+				 (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) {
+		if (addr == CSF_HW_DOORBELL_PAGE_OFFSET)
 			hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
 	} else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
 		   (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
@@ -1409,13 +1402,13 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value)
 		}
 	}
 #endif
-	else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+	else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) {
 		hw_error_status.mmu_irq_mask = value;
-	} else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
+	} else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) {
 		hw_error_status.mmu_irq_rawstat &= (~value);
-	} else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) {
-		int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
-									>> 6;
+	} else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) &&
+		   (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) {
+		int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6;
 
 		switch (addr & 0x3F) {
 		case AS_COMMAND:
@@ -1926,10 +1919,9 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 	} else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO)
 				&& addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) {
 		*value = 0;
-	} else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)
-				&& addr <= MMU_AS_REG(15, AS_STATUS)) {
-		int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
-									>> 6;
+	} else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
+		   addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) {
+		int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6;
 
 		switch (addr & 0x3F) {
 		case AS_TRANSTAB_LO:
@@ -1973,11 +1965,11 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 			*value = 0;
 			break;
 		}
-	} else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+	} else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) {
 		*value = hw_error_status.mmu_irq_mask;
-	} else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) {
+	} else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) {
 		*value = hw_error_status.mmu_irq_rawstat;
-	} else if (addr == MMU_REG(MMU_IRQ_STATUS)) {
+	} else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) {
 		*value = hw_error_status.mmu_irq_mask &
 						hw_error_status.mmu_irq_rawstat;
 	}
@@ -1985,8 +1977,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 	else if (addr == IPA_CONTROL_REG(STATUS)) {
 		*value = (ipa_control_timer_enabled << 31);
 	} else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) &&
-		   (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(
-				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
+		   (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
 		u32 counter_index =
 			(addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3;
 		bool is_low_word =
@@ -1995,8 +1986,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW,
 						    counter_index, is_low_word);
 	} else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) &&
-		   (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(
-				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
+		   (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
 		u32 counter_index =
 			(addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3;
 		bool is_low_word =
@@ -2005,8 +1995,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS,
 						    counter_index, is_low_word);
 	} else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) &&
-		   (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(
-				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
+		   (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
 		u32 counter_index =
 			(addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3;
 		bool is_low_word =
@@ -2015,8 +2004,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
 		*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER,
 						    counter_index, is_low_word);
 	} else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) &&
-		   (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(
-				    IPA_CTL_MAX_VAL_CNT_IDX)))) {
+		   (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
 		u32 counter_index =
 			(addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3;
 		bool is_low_word =
@@ -2214,16 +2202,3 @@ int gpu_model_control(void *model,
 
 	return 0;
 }
-
-/**
- * kbase_is_gpu_removed - Has the GPU been removed.
- * @kbdev:    Kbase device pointer
- *
- * This function would return true if the GPU has been removed.
- * It is stubbed here
- * Return: Always false
- */
-bool kbase_is_gpu_removed(struct kbase_device *kbdev)
-{
-	return false;
-}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
index e90e4df..67e00e9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -95,8 +95,7 @@ static void serve_mmu_irq(struct work_struct *work)
 	if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) {
 		u32 val;
 
-		while ((val = kbase_reg_read(kbdev,
-					MMU_REG(MMU_IRQ_STATUS)))) {
+		while ((val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)))) {
 			/* Handle the IRQ */
 			kbase_mmu_interrupt(kbdev, val);
 		}
@@ -156,7 +155,7 @@ KBASE_EXPORT_TEST_API(kbase_reg_write);
 u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
 {
 	unsigned long flags;
-	u32 val;
+	u32 val = 0;
 
 	spin_lock_irqsave(&kbdev->reg_op_lock, flags);
 	midgard_model_read_reg(kbdev->model, offset, &val);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index abbb9c8..46c5ffd 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -169,6 +169,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
 	kbdev->pm.backend.gpu_powered = false;
 	kbdev->pm.backend.gpu_ready = false;
 	kbdev->pm.suspending = false;
+	kbdev->pm.resuming = false;
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	kbase_pm_set_gpu_lost(kbdev, false);
 #endif
@@ -590,11 +591,13 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
 {
 	struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
 	unsigned long flags;
-	int ret = 0;
+	int ret;
 
 	WARN_ON(kbdev->pm.active_count);
 
-	kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	ret = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	if (ret)
+		return ret;
 
 	kbase_pm_lock(kbdev);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -679,60 +682,6 @@ unlock_hwaccess:
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
-static bool is_poweroff_in_progress(struct kbase_device *kbdev)
-{
-	bool ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
-	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-	return ret;
-}
-
-void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
-{
-#define POWEROFF_TIMEOUT_MSEC 500
-	long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC);
-	remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait,
-			is_poweroff_in_progress(kbdev), remaining);
-	if (!remaining) {
-		/* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will
-		 * definitely be called, so it's safe to continue waiting for it.
-		 */
-		if (!work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) {
-			unsigned long flags;
-			kbasep_platform_event_core_dump(kbdev, "poweroff work timeout");
-			dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims",
-				POWEROFF_TIMEOUT_MSEC);
-			kbase_gpu_timeout_debug_message(kbdev);
-#if MALI_USE_CSF
-			//csf.scheduler.state should be accessed with scheduler lock!
-			//callchains go through this function though holding that lock
-			//so just print without locking.
-			dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state);
-			dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0));
-#endif
-			//Attempt another state machine transition prompt.
-			dev_err(kbdev->dev, "Attempt to prompt state machine");
-			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-			kbase_pm_update_state(kbdev);
-			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
-			dev_err(kbdev->dev, "GPU state after re-prompt of state machine");
-			kbase_gpu_timeout_debug_message(kbdev);
-
-			dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d",
-				is_poweroff_in_progress(kbdev));
-		}
-		wait_event_killable(kbdev->pm.backend.poweroff_wait,
-			is_poweroff_in_progress(kbdev));
-	}
-#undef POWEROFF_TIMEOUT_MSEC
-}
-KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
-
 /**
  * is_gpu_powered_down - Check whether GPU is powered down
  *
@@ -986,7 +935,13 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
 
 	kbase_pm_unlock(kbdev);
 
-	kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	ret = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	if (ret) {
+#if !MALI_USE_CSF
+		kbase_backend_timer_resume(kbdev);
+#endif /* !MALI_USE_CSF */
+		return ret;
+	}
 #endif
 
 	WARN_ON(kbdev->pm.backend.gpu_powered);
@@ -1002,6 +957,8 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 {
 	kbase_pm_lock(kbdev);
 
+	/* System resume callback has begun */
+	kbdev->pm.resuming = true;
 	kbdev->pm.suspending = false;
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	if (kbase_pm_is_gpu_lost(kbdev)) {
@@ -1016,7 +973,6 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
 	kbase_backend_timer_resume(kbdev);
 #endif /* !MALI_USE_CSF */
 
-	wake_up_all(&kbdev->pm.resume_wait);
 	kbase_pm_unlock(kbdev);
 }
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 2c69ac9..7c891c1 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -51,9 +51,6 @@
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 #include <arbiter/mali_kbase_arbiter_pm.h>
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
-#if MALI_USE_CSF
-#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-#endif
 
 #if MALI_USE_CSF
 #include <linux/delay.h>
@@ -699,8 +696,8 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
 
 	/* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */
 	while (--max_loops &&
-	       kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
-			      AS_STATUS_AS_ACTIVE_INT)
+	       kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) &
+		       AS_STATUS_AS_ACTIVE_INT)
 		;
 
 	if (!WARN_ON_ONCE(max_loops == 0))
@@ -2442,26 +2439,29 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
 #define PM_TIMEOUT_MS (5000) /* 5s */
 #endif
 
-void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
+void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg)
+{
 	unsigned long flags;
+
+	dev_err(kbdev->dev, "%s", timeout_msg);
 #if !MALI_USE_CSF
 	CSTD_UNUSED(flags);
 	dev_err(kbdev->dev, "Desired state :\n");
-	dev_err(kbdev->dev, "  Shader=%016llx\n",
+	dev_err(kbdev->dev, "\tShader=%016llx\n",
 			kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0);
 #else
 	dev_err(kbdev->dev, "GPU pm state :\n");
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	dev_err(kbdev->dev, "  scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count);
-	dev_err(kbdev->dev, "  poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d",
+	dev_err(kbdev->dev, "\tscheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count);
+	dev_err(kbdev->dev, "\tpoweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d",
 		kbdev->pm.backend.poweron_required,
 		kbdev->pm.active_count,
 		kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off);
-	dev_err(kbdev->dev, "  gpu_poweroff_wait_work pending %d",
+	dev_err(kbdev->dev, "\tgpu_poweroff_wait_work pending %d",
 		work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work));
-	dev_err(kbdev->dev, "  MCU desired = %d\n",
+	dev_err(kbdev->dev, "\tMCU desired = %d\n",
 		kbase_pm_is_mcu_desired(kbdev));
-	dev_err(kbdev->dev, "  MCU sw state = %d\n",
+	dev_err(kbdev->dev, "\tMCU sw state = %d\n",
 		kbdev->pm.backend.mcu_state);
 	dev_err(kbdev->dev, "\tL2 desired = %d (locked_off: %d)\n",
 		kbase_pm_is_l2_desired(kbdev), kbdev->pm.backend.policy_change_clamp_state_to_off);
@@ -2474,17 +2474,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 #endif
 	dev_err(kbdev->dev, "Current state :\n");
-	dev_err(kbdev->dev, "  Shader=%08x%08x\n",
+	dev_err(kbdev->dev, "\tShader=%08x%08x\n",
 			kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(SHADER_READY_HI)),
 			kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(SHADER_READY_LO)));
-	dev_err(kbdev->dev, "  Tiler =%08x%08x\n",
+	dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
 			kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(TILER_READY_HI)),
 			kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(TILER_READY_LO)));
-	dev_err(kbdev->dev, "  L2    =%08x%08x\n",
+	dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
 			kbase_reg_read(kbdev,
 				GPU_CONTROL_REG(L2_READY_HI)),
 			kbase_reg_read(kbdev,
@@ -2493,17 +2493,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
 	kbase_csf_debug_dump_registers(kbdev);
 #endif
 	dev_err(kbdev->dev, "Cores transitioning :\n");
-	dev_err(kbdev->dev, "  Shader=%08x%08x\n",
+	dev_err(kbdev->dev, "\tShader=%08x%08x\n",
 			kbase_reg_read(kbdev, GPU_CONTROL_REG(
 					SHADER_PWRTRANS_HI)),
 			kbase_reg_read(kbdev, GPU_CONTROL_REG(
 					SHADER_PWRTRANS_LO)));
-	dev_err(kbdev->dev, "  Tiler =%08x%08x\n",
+	dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
 			kbase_reg_read(kbdev, GPU_CONTROL_REG(
 					TILER_PWRTRANS_HI)),
 			kbase_reg_read(kbdev, GPU_CONTROL_REG(
 					TILER_PWRTRANS_LO)));
-	dev_err(kbdev->dev, "  L2    =%08x%08x\n",
+	dev_err(kbdev->dev, "\tL2    =%08x%08x\n",
 			kbase_reg_read(kbdev, GPU_CONTROL_REG(
 					L2_PWRTRANS_HI)),
 			kbase_reg_read(kbdev, GPU_CONTROL_REG(
@@ -2512,12 +2512,9 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
 	dump_stack();
 }
 
-static void kbase_pm_timed_out(struct kbase_device *kbdev)
+static void kbase_pm_timed_out(struct kbase_device *kbdev, const char *timeout_msg)
 {
-	dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
-	kbase_gpu_timeout_debug_message(kbdev);
-	dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
-
+	kbase_gpu_timeout_debug_message(kbdev, timeout_msg);
 	/* pixel: If either:
 	 *   1. L2/MCU power transition timed out, or,
 	 *   2. kbase state machine fell out of sync with the hw state,
@@ -2530,6 +2527,7 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev)
 	 * We have already lost work if we end up here, so send a powercycle to reset the hw,
 	 * which is more reliable.
 	 */
+	dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
 	if (kbase_prepare_to_reset_gpu(kbdev,
 				       RESET_FLAGS_HWC_UNRECOVERABLE_ERROR |
 				       RESET_FLAGS_FORCE_PM_HW_RESET))
@@ -2570,7 +2568,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
 			.info = GPU_UEVENT_INFO_L2_PM_TIMEOUT
 		};
 		pixel_gpu_uevent_send(kbdev, &evt);
-		kbase_pm_timed_out(kbdev);
+		kbase_pm_timed_out(kbdev, "Wait for desired PM state with L2 powered timed out");
 		err = -ETIMEDOUT;
 	} else if (remaining < 0) {
 		dev_info(
@@ -2582,7 +2580,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
 	return err;
 }
 
-int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
+static int pm_wait_for_desired_state(struct kbase_device *kbdev, bool killable_wait)
 {
 	unsigned long flags;
 	long remaining;
@@ -2600,31 +2598,42 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
 
 	/* Wait for cores */
 #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
-	remaining = wait_event_killable_timeout(
-		kbdev->pm.backend.gpu_in_desired_state_wait,
-		kbase_pm_is_in_desired_state(kbdev), timeout);
+	if (killable_wait)
+		remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
+							kbase_pm_is_in_desired_state(kbdev),
+							timeout);
 #else
-	remaining = wait_event_timeout(
-		kbdev->pm.backend.gpu_in_desired_state_wait,
-		kbase_pm_is_in_desired_state(kbdev), timeout);
+	killable_wait = false;
 #endif
-
+	if (!killable_wait)
+		remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
+					       kbase_pm_is_in_desired_state(kbdev), timeout);
 	if (!remaining) {
 		const struct gpu_uevent evt = {
 			.type = GPU_UEVENT_TYPE_KMD_ERROR,
 			.info = GPU_UEVENT_INFO_PM_TIMEOUT
 		};
 		pixel_gpu_uevent_send(kbdev, &evt);
-		kbase_pm_timed_out(kbdev);
+		kbase_pm_timed_out(kbdev, "Wait for power transition timed out");
 		err = -ETIMEDOUT;
 	} else if (remaining < 0) {
-		dev_info(kbdev->dev,
-			 "Wait for desired PM state got interrupted");
+		WARN_ON_ONCE(!killable_wait);
+		dev_info(kbdev->dev, "Wait for power transition got interrupted");
 		err = (int)remaining;
 	}
 
 	return err;
 }
+
+int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev)
+{
+	return pm_wait_for_desired_state(kbdev, true);
+}
+
+int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
+{
+	return pm_wait_for_desired_state(kbdev, false);
+}
 KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
 
 #if MALI_USE_CSF
@@ -2674,7 +2683,7 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
 #endif
 
 	if (!remaining) {
-		kbase_pm_timed_out(kbdev);
+		kbase_pm_timed_out(kbdev, "Wait for cores down scaling timed out");
 		err = -ETIMEDOUT;
 	} else if (remaining < 0) {
 		dev_info(
@@ -2687,6 +2696,96 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
 }
 #endif
 
+static bool is_poweroff_wait_in_progress(struct kbase_device *kbdev)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	ret = kbdev->pm.backend.poweroff_wait_in_progress;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return ret;
+}
+
+static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool killable_wait)
+{
+	long remaining;
+#if MALI_USE_CSF
+	/* gpu_poweroff_wait_work would be subjected to the kernel scheduling
+	 * and so the wait time can't only be the function of GPU frequency.
+	 */
+	const unsigned int extra_wait_time_ms = 2000;
+	const long timeout = kbase_csf_timeout_in_jiffies(
+		kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + extra_wait_time_ms);
+#else
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+	/* Handling of timeout error isn't supported for arbiter builds */
+	const long timeout = MAX_SCHEDULE_TIMEOUT;
+#else
+	const long timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
+#endif
+#endif
+	int err = 0;
+
+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
+	if (killable_wait)
+		remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait,
+							!is_poweroff_wait_in_progress(kbdev),
+							timeout);
+#else
+	killable_wait = false;
+#endif
+
+	if (!killable_wait)
+		remaining = wait_event_timeout(kbdev->pm.backend.poweroff_wait,
+					       !is_poweroff_wait_in_progress(kbdev), timeout);
+	if (!remaining) {
+		/* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will
+		 * definitely be called, so it's safe to continue waiting for it.
+		 */
+		if (work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) {
+			wait_event_killable(kbdev->pm.backend.poweroff_wait,
+			                    !is_poweroff_wait_in_progress(kbdev));
+		} else {
+			unsigned long flags;
+			kbasep_platform_event_core_dump(kbdev, "poweroff work timeout");
+			kbase_gpu_timeout_debug_message(kbdev, "failed to wait for poweroff worker");
+#if MALI_USE_CSF
+			//csf.scheduler.state should be accessed with scheduler lock!
+			//callchains go through this function though holding that lock
+			//so just print without locking.
+			dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state);
+			dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0));
+#endif
+			//Attempt another state machine transition prompt.
+			dev_err(kbdev->dev, "Attempt to prompt state machine");
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			kbase_pm_update_state(kbdev);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+			kbase_gpu_timeout_debug_message(kbdev, "GPU state after re-prompt of state machine");
+			err = -ETIMEDOUT;
+		}
+	} else if (remaining < 0) {
+		WARN_ON_ONCE(!killable_wait);
+		dev_info(kbdev->dev, "Wait for poweroff work got interrupted");
+		err = (int)remaining;
+	}
+	return err;
+}
+
+int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
+{
+	return pm_wait_for_poweroff_work_complete(kbdev, true);
+}
+
+int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
+{
+	return pm_wait_for_poweroff_work_complete(kbdev, false);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
+
 void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 {
 	unsigned long flags;
@@ -2704,12 +2803,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
 #if MALI_USE_CSF
 	/* Enable only the Page fault bits part */
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFF);
 #else
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
 #endif
 }
 
@@ -2729,8 +2828,8 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
 
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
 }
 
 void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
@@ -3147,9 +3246,13 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 	kbdev->hw_quirks_tiler = 0;
 	kbdev->hw_quirks_mmu = 0;
 
-	if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) {
-		dev_info(kbdev->dev,
-			 "Found quirks_gpu = [0x%x] in Devicetree\n",
+	/* Read the "-" versions of the properties and fall back to
+	 * the "_" versions if these are not found
+	 */
+
+	if (!of_property_read_u32(np, "quirks-gpu", &kbdev->hw_quirks_gpu) ||
+	    !of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) {
+		dev_info(kbdev->dev, "Found quirks_gpu = [0x%x] in Devicetree\n",
 			 kbdev->hw_quirks_gpu);
 	} else {
 		error = kbase_set_gpu_quirks(kbdev, prod_id);
@@ -3157,33 +3260,30 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
 			return error;
 	}
 
-	if (!of_property_read_u32(np, "quirks_sc",
-				&kbdev->hw_quirks_sc)) {
-		dev_info(kbdev->dev,
-			"Found quirks_sc = [0x%x] in Devicetree\n",
-			kbdev->hw_quirks_sc);
+	if (!of_property_read_u32(np, "quirks-sc", &kbdev->hw_quirks_sc) ||
+	    !of_property_read_u32(np, "quirks_sc", &kbdev->hw_quirks_sc)) {
+		dev_info(kbdev->dev, "Found quirks_sc = [0x%x] in Devicetree\n",
+			 kbdev->hw_quirks_sc);
 	} else {
 		error = kbase_set_sc_quirks(kbdev, prod_id);
 		if (error)
 			return error;
 	}
 
-	if (!of_property_read_u32(np, "quirks_tiler",
-				&kbdev->hw_quirks_tiler)) {
-		dev_info(kbdev->dev,
-			"Found quirks_tiler = [0x%x] in Devicetree\n",
-			kbdev->hw_quirks_tiler);
+	if (!of_property_read_u32(np, "quirks-tiler", &kbdev->hw_quirks_tiler) ||
+	    !of_property_read_u32(np, "quirks_tiler", &kbdev->hw_quirks_tiler)) {
+		dev_info(kbdev->dev, "Found quirks_tiler = [0x%x] in Devicetree\n",
+			 kbdev->hw_quirks_tiler);
 	} else {
 		error = kbase_set_tiler_quirks(kbdev);
 		if (error)
 			return error;
 	}
 
-	if (!of_property_read_u32(np, "quirks_mmu",
-				&kbdev->hw_quirks_mmu)) {
-		dev_info(kbdev->dev,
-			"Found quirks_mmu = [0x%x] in Devicetree\n",
-			kbdev->hw_quirks_mmu);
+	if (!of_property_read_u32(np, "quirks-mmu", &kbdev->hw_quirks_mmu) ||
+	    !of_property_read_u32(np, "quirks_mmu", &kbdev->hw_quirks_mmu)) {
+		dev_info(kbdev->dev, "Found quirks_mmu = [0x%x] in Devicetree\n",
+			 kbdev->hw_quirks_mmu);
 	} else {
 		error = kbase_set_mmu_quirks(kbdev);
 	}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index 9e29236..d7f19fb 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -224,7 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
  * power off in progress and kbase_pm_context_active() was called instead of
  * kbase_csf_scheduler_pm_active().
  *
- * Return: 0 on success, error code on error
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error.
  */
 int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 #else
@@ -247,12 +247,27 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
  * must ensure that this is not the case by, for example, calling
  * kbase_pm_wait_for_poweroff_work_complete()
  *
- * Return: 0 on success, error code on error
+ * Return: 0 on success, or -ETIMEDOUT error code on timeout error.
  */
 int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
 #endif
 
 /**
+ * kbase_pm_killable_wait_for_desired_state - Wait for the desired power state to be
+ *                                            reached in a killable state.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is same as kbase_pm_wait_for_desired_state(), expect that it would
+ * allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the
+ *         wait was interrupted.
+ */
+int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev);
+
+/**
  * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
  *
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -467,8 +482,26 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
  * This function effectively just waits for the @gpu_poweroff_wait_work work
  * item to complete, if it was enqueued. GPU may not have been powered down
  * before this function returns.
+ *
+ * Return: 0 on success, error code on error
  */
-void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
+int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_killable_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to
+ *                                                     complete in killable state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is same as kbase_pm_wait_for_poweroff_work_complete(), expect that
+ * it would allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the
+ *         wait was interrupted.
+ */
+int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
 
 /**
  * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete
@@ -857,6 +890,8 @@ static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev)
 {
 	bool in_desired_state = true;
 
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
 	if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
 		in_desired_state = false;
 	else if (!kbase_pm_is_mcu_desired(kbdev) &&
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index f5dc008..7d7650c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -54,7 +54,9 @@ void kbase_pm_policy_init(struct kbase_device *kbdev)
 	unsigned long flags;
 	int i;
 
-	if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) {
+	/* Read "power-policy" property and fallback to "power_policy" if not found */
+	if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) ||
+	    (of_property_read_string(np, "power_policy", &power_policy_name) == 0)) {
 		for (i = 0; i < ARRAY_SIZE(all_policy_list); i++)
 			if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) {
 				default_policy = all_policy_list[i];
@@ -298,6 +300,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 	bool reset_gpu = false;
 	bool reset_op_prevented = true;
 	struct kbase_csf_scheduler *scheduler = NULL;
+	u32 pwroff;
+	bool switching_to_always_on;
 #endif
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -306,6 +310,16 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
 	KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id);
 
 #if MALI_USE_CSF
+	pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+	switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops;
+	if (pwroff == 0 && !switching_to_always_on) {
+		dev_warn(kbdev->dev,
+			"power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n");
+		dev_warn(kbdev->dev,
+			"power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n");
+		kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev);
+	}
+
 	scheduler = &kbdev->csf.scheduler;
 	KBASE_DEBUG_ASSERT(scheduler != NULL);
 
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 7a4d662..28365c0 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -29,6 +29,39 @@
 #include <device/mali_kbase_device.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <mali_kbase_config_defaults.h>
+#include <linux/version_compat_defs.h>
+
+struct kbase_timeout_info {
+	char *selector_str;
+	u64 timeout_cycles;
+};
+
+#if MALI_USE_CSF
+static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
+	[CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES,
+							       CSF_FIRMWARE_PING_TIMEOUT_CYCLES) },
+	[CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES },
+	[CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES },
+	[CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES },
+	[CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT",
+					CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES },
+	[CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT",
+					CSF_FIRMWARE_PING_TIMEOUT_CYCLES },
+	[CSF_SCHED_PROTM_PROGRESS_TIMEOUT] = { "CSF_SCHED_PROTM_PROGRESS_TIMEOUT",
+					       DEFAULT_PROGRESS_TIMEOUT_CYCLES },
+	[MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT",
+					   MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES },
+	[KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT",
+					KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES },
+};
+#else
+static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
+	[MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT",
+					   MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES },
+	[JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT",
+					 JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES },
+};
+#endif
 
 void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
 					  u64 *cycle_counter,
@@ -108,94 +141,130 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
 #endif
 }
 
-unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
-				  enum kbase_timeout_selector selector)
+static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev)
+{
+	u64 freq_khz = kbdev->lowest_gpu_freq_khz;
+
+	if (!freq_khz) {
+		dev_dbg(kbdev->dev,
+			"Lowest frequency uninitialized! Using reference frequency for scaling");
+		return DEFAULT_REF_TIMEOUT_FREQ_KHZ;
+	}
+
+	return freq_khz;
+}
+
+void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+				 unsigned int timeout_ms)
 {
+	char *selector_str;
+
+	if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+		selector = KBASE_DEFAULT_TIMEOUT;
+		selector_str = timeout_info[selector].selector_str;
+		dev_warn(kbdev->dev,
+			 "Unknown timeout selector passed, falling back to default: %s\n",
+			 timeout_info[selector].selector_str);
+	}
+	selector_str = timeout_info[selector].selector_str;
+
+	kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms;
+	dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms);
+}
+
+void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+			      u64 timeout_cycles, u32 cycle_multiplier)
+{
+	u64 final_cycles;
+	u64 timeout;
+	u64 freq_khz = kbase_device_get_scaling_frequency(kbdev);
+
+	if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+		selector = KBASE_DEFAULT_TIMEOUT;
+		dev_warn(kbdev->dev,
+			 "Unknown timeout selector passed, falling back to default: %s\n",
+			 timeout_info[selector].selector_str);
+	}
+
+	/* If the multiplication overflows, we will have unsigned wrap-around, and so might
+	 * end up with a shorter timeout. In those cases, we then want to have the largest
+	 * timeout possible that will not run into these issues. Note that this will not
+	 * wait for U64_MAX/frequency ms, as it will be clamped to a max of UINT_MAX
+	 * milliseconds by subsequent steps.
+	 */
+	if (check_mul_overflow(timeout_cycles, (u64)cycle_multiplier, &final_cycles))
+		final_cycles = U64_MAX;
+
 	/* Timeout calculation:
 	 * dividing number of cycles by freq in KHz automatically gives value
 	 * in milliseconds. nr_cycles will have to be multiplied by 1e3 to
 	 * get result in microseconds, and 1e6 to get result in nanoseconds.
 	 */
+	timeout = div_u64(final_cycles, freq_khz);
+
+	if (unlikely(timeout > UINT_MAX)) {
+		dev_dbg(kbdev->dev,
+			"Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
+			timeout, timeout_info[selector].selector_str,
+			kbase_device_get_scaling_frequency(kbdev));
+		timeout = UINT_MAX;
+	}
 
-	u64 timeout, nr_cycles = 0;
-	u64 freq_khz;
+	kbase_device_set_timeout_ms(kbdev, selector, (unsigned int)timeout);
+}
 
-	/* Only for debug messages, safe default in case it's mis-maintained */
-	const char *selector_str = "(unknown)";
+/**
+ * kbase_timeout_scaling_init - Initialize the table of scaled timeout
+ *                              values associated with a @kbase_device.
+ *
+ * @kbdev:	KBase device pointer.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int kbase_timeout_scaling_init(struct kbase_device *kbdev)
+{
+	int err;
+	enum kbase_timeout_selector selector;
 
-	if (!kbdev->lowest_gpu_freq_khz) {
-		dev_dbg(kbdev->dev,
-			"Lowest frequency uninitialized! Using reference frequency for scaling");
-		freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
-	} else {
-		freq_khz = kbdev->lowest_gpu_freq_khz;
+	/* First, we initialize the minimum and maximum device frequencies, which
+	 * are used to compute the timeouts.
+	 */
+	err = kbase_pm_gpu_freq_init(kbdev);
+	if (unlikely(err < 0)) {
+		dev_dbg(kbdev->dev, "Could not initialize GPU frequency\n");
+		return err;
 	}
 
-	switch (selector) {
-	case MMU_AS_INACTIVE_WAIT_TIMEOUT:
-		selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
-		nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
-		break;
-	case KBASE_TIMEOUT_SELECTOR_COUNT:
-	default:
-#if !MALI_USE_CSF
-		WARN(1, "Invalid timeout selector used! Using default value");
-		nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
-		break;
-	case JM_DEFAULT_JS_FREE_TIMEOUT:
-		selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
-		nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
-		break;
-#else
-		/* Use Firmware timeout if invalid selection */
-		WARN(1,
-		     "Invalid timeout selector used! Using CSF Firmware timeout");
-		fallthrough;
-	case CSF_FIRMWARE_TIMEOUT:
-		selector_str = "CSF_FIRMWARE_TIMEOUT";
-		/* Any FW timeout cannot be longer than the FW ping interval, after which
-		 * the firmware_aliveness_monitor will be triggered and may restart
-		 * the GPU if the FW is unresponsive.
+	dev_dbg(kbdev->dev, "Scaling kbase timeouts:\n");
+	for (selector = 0; selector < KBASE_TIMEOUT_SELECTOR_COUNT; selector++) {
+		u32 cycle_multiplier = 1;
+		u64 nr_cycles = timeout_info[selector].timeout_cycles;
+#if MALI_USE_CSF
+		/* Special case: the scheduler progress timeout can be set manually,
+		 * and does not have a canonical length defined in the headers. Hence,
+		 * we query it once upon startup to get a baseline, and change it upon
+		 * every invocation of the appropriate functions
 		 */
-		nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES);
-
-		if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES)
-			dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n",
-				 selector_str);
-		break;
-	case CSF_PM_TIMEOUT:
-		selector_str = "CSF_PM_TIMEOUT";
-		nr_cycles = CSF_PM_TIMEOUT_CYCLES;
-		break;
-	case CSF_GPU_RESET_TIMEOUT:
-		selector_str = "CSF_GPU_RESET_TIMEOUT";
-		nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
-		break;
-	case CSF_CSG_SUSPEND_TIMEOUT:
-		selector_str = "CSF_CSG_SUSPEND_TIMEOUT";
-		nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES;
-		break;
-	case CSF_FIRMWARE_BOOT_TIMEOUT:
-		selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT";
-		nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES;
-		break;
-	case CSF_FIRMWARE_PING_TIMEOUT:
-		selector_str = "CSF_FIRMWARE_PING_TIMEOUT";
-		nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES;
-		break;
-	case CSF_SCHED_PROTM_PROGRESS_TIMEOUT:
-		selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT";
-		nr_cycles = kbase_csf_timeout_get(kbdev);
-		break;
+		if (selector == CSF_SCHED_PROTM_PROGRESS_TIMEOUT)
+			nr_cycles = kbase_csf_timeout_get(kbdev);
 #endif
+
+		/* Since we are in control of the iteration bounds for the selector,
+		 * we don't have to worry about bounds checking when setting the timeout.
+		 */
+		kbase_device_set_timeout(kbdev, selector, nr_cycles, cycle_multiplier);
 	}
+	return 0;
+}
 
-	timeout = div_u64(nr_cycles, freq_khz);
-	if (WARN(timeout > UINT_MAX,
-		 "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
-		 (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
-		timeout = UINT_MAX;
-	return (unsigned int)timeout;
+unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector)
+{
+	if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+		dev_warn(kbdev->dev, "Querying wrong selector, falling back to default\n");
+		selector = KBASE_DEFAULT_TIMEOUT;
+	}
+
+	return kbdev->backend_time.device_scaled_timeouts[selector];
 }
 KBASE_EXPORT_TEST_API(kbase_get_timeout_ms);
 
@@ -247,18 +316,21 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t
 
 int kbase_backend_time_init(struct kbase_device *kbdev)
 {
+	int err = 0;
 #if MALI_USE_CSF
 	u64 cpu_ts = 0;
 	u64 gpu_ts = 0;
 	u64 freq;
 	u64 common_factor;
 
+	kbase_pm_register_access_enable(kbdev);
 	get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
 	freq = arch_timer_get_cntfrq();
 
 	if (!freq) {
 		dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
-		return -EINVAL;
+		err = -EINVAL;
+		goto disable_registers;
 	}
 
 	common_factor = gcd(NSEC_PER_SEC, freq);
@@ -268,12 +340,23 @@ int kbase_backend_time_init(struct kbase_device *kbdev)
 
 	if (!kbdev->backend_time.divisor) {
 		dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
-		return -EINVAL;
+		err = -EINVAL;
+		goto disable_registers;
 	}
 
 	kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
 							kbdev->backend_time.divisor);
 #endif
 
-	return 0;
+	if (kbase_timeout_scaling_init(kbdev)) {
+		dev_warn(kbdev->dev, "Could not initialize timeout scaling");
+		err = -EINVAL;
+	}
+
+#if MALI_USE_CSF
+disable_registers:
+	kbase_pm_register_access_disable(kbdev);
+#endif
+
+	return err;
 }
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index e82dd12..381b1fe 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -68,6 +68,9 @@ bob_defaults {
     large_page_alloc: {
         kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
     },
+    page_migration_support: {
+        kbuild_options: ["CONFIG_PAGE_MIGRATION_SUPPORT=y"],
+    },
     mali_memory_fully_backed: {
         kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
     },
@@ -143,6 +146,18 @@ bob_defaults {
     mali_coresight: {
         kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
     },
+    mali_fw_trace_mode_manual: {
+        kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_MANUAL=y"],
+    },
+    mali_fw_trace_mode_auto_print: {
+        kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT=y"],
+    },
+    mali_fw_trace_mode_auto_discard: {
+        kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD=y"],
+    },
+    mali_trace_power_gpu_work_period: {
+        kbuild_options: ["CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD=y"],
+    },
     kbuild_options: [
         "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
         "MALI_CUSTOMER_RELEASE={{.release}}",
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index 9aa661a..45a5a6c 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -124,7 +124,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 	bool is_compat,
 	base_context_create_flags const flags,
 	unsigned long const api_version,
-	struct file *const filp)
+	struct kbase_file *const kfile)
 {
 	struct kbase_context *kctx;
 	unsigned int i = 0;
@@ -143,9 +143,11 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 
 	kctx->kbdev = kbdev;
 	kctx->api_version = api_version;
-	kctx->filp = filp;
+	kctx->kfile = kfile;
 	kctx->create_flags = flags;
 
+	memcpy(kctx->comm, current->comm, sizeof(current->comm));
+
 	if (is_compat)
 		kbase_ctx_flag_set(kctx, KCTX_COMPAT);
 #if defined(CONFIG_64BIT)
@@ -213,6 +215,16 @@ void kbase_destroy_context(struct kbase_context *kctx)
 			kctx->tgid, kctx->id);
 	}
 
+	/* Have synchronized against the System suspend and incremented the
+	 * pm.active_count. So any subsequent invocation of System suspend
+	 * callback would get blocked.
+	 * If System suspend callback was already in progress then the above loop
+	 * would have waited till the System resume callback has begun.
+	 * So wait for the System resume callback to also complete as we want to
+	 * avoid context termination during System resume also.
+	 */
+	wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev));
+
 	kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
 
 	kbase_context_term_partial(kctx, ARRAY_SIZE(context_init));
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 7acb3f6..39595d9 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -179,7 +179,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 	bool is_compat,
 	base_context_create_flags const flags,
 	unsigned long const api_version,
-	struct file *const filp)
+	struct kbase_file *const kfile)
 {
 	struct kbase_context *kctx;
 	unsigned int i = 0;
@@ -198,7 +198,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
 
 	kctx->kbdev = kbdev;
 	kctx->api_version = api_version;
-	kctx->filp = filp;
+	kctx->kfile = kfile;
 	kctx->create_flags = flags;
 
 	if (is_compat)
@@ -258,6 +258,17 @@ void kbase_destroy_context(struct kbase_context *kctx)
 		wait_event(kbdev->pm.resume_wait,
 			   !kbase_pm_is_suspending(kbdev));
 	}
+
+	/* Have synchronized against the System suspend and incremented the
+	 * pm.active_count. So any subsequent invocation of System suspend
+	 * callback would get blocked.
+	 * If System suspend callback was already in progress then the above loop
+	 * would have waited till the System resume callback has begun.
+	 * So wait for the System resume callback to also complete as we want to
+	 * avoid context termination during System resume also.
+	 */
+	wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev));
+
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 	atomic_dec(&kbdev->pm.gpu_users_waiting);
 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 84d56f7..70941ef 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -190,7 +190,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	kctx->pid = current->pid;
 
 	/* Check if this is a Userspace created context */
-	if (likely(kctx->filp)) {
+	if (likely(kctx->kfile)) {
 		struct pid *pid_struct;
 
 		rcu_read_lock();
@@ -264,7 +264,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	if (err) {
 		dev_err(kctx->kbdev->dev,
 			"(err:%d) failed to insert kctx to kbase_process", err);
-		if (likely(kctx->filp)) {
+		if (likely(kctx->kfile)) {
 			mmdrop(kctx->process_mm);
 			put_task_struct(kctx->task);
 		}
@@ -356,7 +356,7 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
-	if (likely(kctx->filp)) {
+	if (likely(kctx->kfile)) {
 		mmdrop(kctx->process_mm);
 		put_task_struct(kctx->task);
 	}
diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h
index 7c90e27..22cb00c 100644
--- a/mali_kbase/context/mali_kbase_context.h
+++ b/mali_kbase/context/mali_kbase_context.h
@@ -56,8 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx);
  *               BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
  * @api_version: Application program interface version, as encoded in
  *               a single integer by the KBASE_API_VERSION macro.
- * @filp:        Pointer to the struct file corresponding to device file
- *               /dev/malixx instance, passed to the file's open method.
+ * @kfile:       Pointer to the object representing the /dev/malixx device
+ *               file instance. Shall be passed as NULL for internally created
+ *               contexts.
  *
  * Up to one context can be created for each client that opens the device file
  * /dev/malixx. Context creation is deferred until a special ioctl() system call
@@ -69,7 +70,7 @@ struct kbase_context *
 kbase_create_context(struct kbase_device *kbdev, bool is_compat,
 	base_context_create_flags const flags,
 	unsigned long api_version,
-	struct file *filp);
+	struct kbase_file *const kfile);
 
 /**
  * kbase_destroy_context - Destroy a kernel base context.
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index c5438f0..c626092 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,7 @@ mali_kbase-y += \
     csf/mali_kbase_csf_csg_debugfs.o \
     csf/mali_kbase_csf_kcpu_debugfs.o \
     csf/mali_kbase_csf_sync_debugfs.o \
+    csf/mali_kbase_csf_kcpu_fence_debugfs.o \
     csf/mali_kbase_csf_protected_memory.o \
     csf/mali_kbase_csf_tiler_heap_debugfs.o \
     csf/mali_kbase_csf_cpu_queue_debugfs.o \
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
index 4336705..bbf2e4e 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -64,12 +64,19 @@
  * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency
  *                                          listener
  *
- * @listener: GPU clock frequency listener.
- * @kbdev:    Pointer to kbase device.
+ * @listener:     GPU clock frequency listener.
+ * @kbdev:        Pointer to kbase device.
+ * @clk_chg_wq:   Dedicated workqueue to process the work item corresponding to
+ *                a clock rate notification.
+ * @clk_chg_work: Work item to process the clock rate change
+ * @rate:         The latest notified rate change, in unit of Hz
  */
 struct kbase_ipa_control_listener_data {
 	struct kbase_clk_rate_listener listener;
 	struct kbase_device *kbdev;
+	struct workqueue_struct *clk_chg_wq;
+	struct work_struct clk_chg_work;
+	atomic_t rate;
 };
 
 static u32 timer_value(u32 gpu_rate)
@@ -271,52 +278,61 @@ kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener,
 				     u32 clk_index, u32 clk_rate_hz)
 {
 	if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) {
-		size_t i;
 		struct kbase_ipa_control_listener_data *listener_data =
-			container_of(listener,
-				     struct kbase_ipa_control_listener_data,
-				     listener);
-		struct kbase_device *kbdev = listener_data->kbdev;
-		struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
-
-		lockdep_assert_held(&kbdev->hwaccess_lock);
-		if (!kbdev->pm.backend.gpu_ready) {
-			dev_err(kbdev->dev,
-				"%s: GPU frequency cannot change while GPU is off",
-				__func__);
-			return;
-		}
+			container_of(listener, struct kbase_ipa_control_listener_data, listener);
+
+		/* Save the rate and delegate the job to a work item */
+		atomic_set(&listener_data->rate, clk_rate_hz);
+		queue_work(listener_data->clk_chg_wq, &listener_data->clk_chg_work);
+	}
+}
 
-		/* Interrupts are already disabled and interrupt state is also saved */
-		spin_lock(&ipa_ctrl->lock);
+static void kbase_ipa_ctrl_rate_change_worker(struct work_struct *data)
+{
+	struct kbase_ipa_control_listener_data *listener_data =
+		container_of(data, struct kbase_ipa_control_listener_data, clk_chg_work);
+	struct kbase_device *kbdev = listener_data->kbdev;
+	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+	unsigned long flags;
+	u32 rate;
+	size_t i;
 
-		for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
-			struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-			if (session->active) {
-				size_t j;
+	if (!kbdev->pm.backend.gpu_ready) {
+		dev_err(kbdev->dev, "%s: GPU frequency cannot change while GPU is off", __func__);
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		return;
+	}
 
-				for (j = 0; j < session->num_prfcnts; j++) {
-					struct kbase_ipa_control_prfcnt *prfcnt =
-						&session->prfcnts[j];
+	spin_lock(&ipa_ctrl->lock);
+	/* Picking up the latest notified rate */
+	rate = (u32)atomic_read(&listener_data->rate);
 
-					if (prfcnt->gpu_norm)
-						calc_prfcnt_delta(kbdev, prfcnt, true);
-				}
-			}
-		}
+	for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+		struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
 
-		ipa_ctrl->cur_gpu_rate = clk_rate_hz;
+		if (session->active) {
+			size_t j;
 
-		/* Update the timer for automatic sampling if active sessions
-		 * are present. Counters have already been manually sampled.
-		 */
-		if (ipa_ctrl->num_active_sessions > 0) {
-			kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
-					timer_value(ipa_ctrl->cur_gpu_rate));
+			for (j = 0; j < session->num_prfcnts; j++) {
+				struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[j];
+
+				if (prfcnt->gpu_norm)
+					calc_prfcnt_delta(kbdev, prfcnt, true);
+			}
 		}
-		spin_unlock(&ipa_ctrl->lock);
 	}
+
+	ipa_ctrl->cur_gpu_rate = rate;
+	/* Update the timer for automatic sampling if active sessions
+	 * are present. Counters have already been manually sampled.
+	 */
+	if (ipa_ctrl->num_active_sessions > 0)
+		kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), timer_value(rate));
+
+	spin_unlock(&ipa_ctrl->lock);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 }
 
 void kbase_ipa_control_init(struct kbase_device *kbdev)
@@ -344,11 +360,27 @@ void kbase_ipa_control_init(struct kbase_device *kbdev)
 	listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
 				GFP_KERNEL);
 	if (listener_data) {
-		listener_data->listener.notify =
-			kbase_ipa_control_rate_change_notify;
-		listener_data->kbdev = kbdev;
-		ipa_ctrl->rtm_listener_data = listener_data;
-	}
+		listener_data->clk_chg_wq =
+			alloc_workqueue("ipa_ctrl_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+		if (listener_data->clk_chg_wq) {
+			INIT_WORK(&listener_data->clk_chg_work, kbase_ipa_ctrl_rate_change_worker);
+			listener_data->listener.notify = kbase_ipa_control_rate_change_notify;
+			listener_data->kbdev = kbdev;
+			ipa_ctrl->rtm_listener_data = listener_data;
+			/* Initialise to 0, which is out of normal notified rates */
+			atomic_set(&listener_data->rate, 0);
+		} else {
+			dev_warn(kbdev->dev,
+				 "%s: failed to allocate workqueue, clock rate update disabled",
+				 __func__);
+			kfree(listener_data);
+			listener_data = NULL;
+		}
+	} else
+		dev_warn(kbdev->dev,
+			 "%s: failed to allocate memory, IPA control clock rate update disabled",
+			 __func__);
+
 	spin_lock_irqsave(&clk_rtm->lock, flags);
 	if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP])
 		ipa_ctrl->cur_gpu_rate =
@@ -370,8 +402,10 @@ void kbase_ipa_control_term(struct kbase_device *kbdev)
 
 	WARN_ON(ipa_ctrl->num_active_sessions);
 
-	if (listener_data)
+	if (listener_data) {
 		kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener);
+		destroy_workqueue(listener_data->clk_chg_wq);
+	}
 	kfree(ipa_ctrl->rtm_listener_data);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -997,14 +1031,11 @@ void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
 					       u32 clk_index, u32 clk_rate_hz)
 {
 	struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
-	struct kbase_ipa_control_listener_data *listener_data =
-		ipa_ctrl->rtm_listener_data;
-	unsigned long flags;
+	struct kbase_ipa_control_listener_data *listener_data = ipa_ctrl->rtm_listener_data;
 
-	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbase_ipa_control_rate_change_notify(&listener_data->listener,
-					     clk_index, clk_rate_hz);
-	spin_lock_irqrestore(&kbdev->hwaccess_lock, flags);
+	kbase_ipa_control_rate_change_notify(&listener_data->listener, clk_index, clk_rate_hz);
+	/* Ensure the callback has taken effect before returning back to the test caller */
+	flush_work(&listener_data->clk_chg_work);
 }
 KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test);
 #endif
@@ -1057,4 +1088,3 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
 		}
 	}
 }
-
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 2e3ced3..8eaedde 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -38,6 +38,7 @@
 #include <linux/protected_memory_allocator.h>
 #include <tl/mali_kbase_tracepoints.h>
 #include "mali_kbase_csf_mcu_shared_reg.h"
+#include <linux/version_compat_defs.h>
 
 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
@@ -171,19 +172,19 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx,
 
 static void init_user_io_pages(struct kbase_queue *queue)
 {
-	u32 *input_addr = (u32 *)(queue->user_io_addr);
-	u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+	u64 *input_addr = queue->user_io_addr;
+	u64 *output_addr64 = queue->user_io_addr + PAGE_SIZE / sizeof(u64);
+	u32 *output_addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64));
 
-	input_addr[CS_INSERT_LO/4] = 0;
-	input_addr[CS_INSERT_HI/4] = 0;
-
-	input_addr[CS_EXTRACT_INIT_LO/4] = 0;
-	input_addr[CS_EXTRACT_INIT_HI/4] = 0;
-
-	output_addr[CS_EXTRACT_LO/4] = 0;
-	output_addr[CS_EXTRACT_HI/4] = 0;
-
-	output_addr[CS_ACTIVE/4] = 0;
+	/*
+	 * CS_INSERT and CS_EXTRACT registers contain 64-bit memory addresses which
+	 * should be accessed atomically. Here we update them 32-bits at a time, but
+	 * as this is initialisation code, non-atomic accesses are safe.
+	 */
+	input_addr[CS_INSERT_LO / sizeof(*input_addr)] = 0;
+	input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] = 0;
+	output_addr64[CS_EXTRACT_LO / sizeof(*output_addr64)] = 0;
+	output_addr32[CS_ACTIVE / sizeof(*output_addr32)] = 0;
 }
 
 static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
@@ -205,7 +206,7 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx,
 	struct page *page_list[2];
 	pgprot_t cpu_map_prot;
 	unsigned long flags;
-	char *user_io_addr;
+	uint64_t *user_io_addr;
 	int ret = 0;
 	size_t i;
 
@@ -246,7 +247,7 @@ unlock:
 
 static void term_queue_group(struct kbase_queue_group *group);
 static void get_queue(struct kbase_queue *queue);
-static void release_queue(struct kbase_queue *queue);
+static bool release_queue(struct kbase_queue *queue);
 
 /**
  * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
@@ -400,7 +401,16 @@ static void get_queue(struct kbase_queue *queue)
 	WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
 }
 
-static void release_queue(struct kbase_queue *queue)
+/**
+ * release_queue() - Release a reference to a GPU queue
+ *
+ * @queue: The queue to release.
+ *
+ * Return: true if the queue has been released.
+ *
+ * The queue will be released when its reference count reaches zero.
+ */
+static bool release_queue(struct kbase_queue *queue)
 {
 	lockdep_assert_held(&queue->kctx->csf.lock);
 	if (kbase_refcount_dec_and_test(&queue->refcount)) {
@@ -410,7 +420,6 @@ static void release_queue(struct kbase_queue *queue)
 		dev_dbg(queue->kctx->kbdev->dev,
 			"Remove any pending command queue fatal from ctx %d_%d",
 			queue->kctx->tgid, queue->kctx->id);
-		kbase_csf_event_remove_error(queue->kctx, &queue->error);
 
 		/* After this the Userspace would be able to free the
 		 * memory for GPU queue. In case the Userspace missed
@@ -423,7 +432,11 @@ static void release_queue(struct kbase_queue *queue)
 		kbase_gpu_vm_unlock(queue->kctx);
 
 		kfree(queue);
+
+		return true;
 	}
+
+	return false;
 }
 
 static void oom_event_worker(struct work_struct *data);
@@ -531,37 +544,25 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
 
 	queue->size = (queue_size << PAGE_SHIFT);
 	queue->csi_index = KBASEP_IF_NR_INVALID;
-	queue->enabled = false;
 
 	queue->priority = reg->priority;
+	/* Default to a safe value, this would be updated on binding */
+	queue->group_priority = KBASE_QUEUE_GROUP_PRIORITY_LOW;
 	kbase_refcount_set(&queue->refcount, 1);
 
-	queue->group = NULL;
 	queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
 	queue->handle = BASEP_MEM_INVALID_HANDLE;
 	queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
 
-	queue->status_wait = 0;
-	queue->sync_ptr = 0;
-	queue->sync_value = 0;
-
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-	queue->saved_cmd_ptr = 0;
-#endif
-
-	queue->sb_status = 0;
 	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
 
-	atomic_set(&queue->pending, 0);
-
 	INIT_LIST_HEAD(&queue->link);
-	INIT_LIST_HEAD(&queue->error.link);
+	atomic_set(&queue->pending_kick, 0);
+	INIT_LIST_HEAD(&queue->pending_kick_link);
 	INIT_WORK(&queue->oom_event_work, oom_event_worker);
 	INIT_WORK(&queue->cs_error_work, cs_error_worker);
 	list_add(&queue->link, &kctx->csf.queue_list);
 
-	queue->extract_ofs = 0;
-
 	region->user_data = queue;
 
 	/* Initialize the cs_trace configuration parameters, When buffer_size
@@ -636,6 +637,22 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
 static void unbind_queue(struct kbase_context *kctx,
 		struct kbase_queue *queue);
 
+static void wait_pending_queue_kick(struct kbase_queue *queue)
+{
+	struct kbase_context *const kctx = queue->kctx;
+
+	/* Drain a pending queue kick if any. It should no longer be
+	 * possible to issue further queue kicks at this point: either the
+	 * queue has been unbound, or the context is being terminated.
+	 *
+	 * Signal kbase_csf_scheduler_kthread() to allow for the
+	 * eventual completion of the current iteration. Once it's done the
+	 * event_wait wait queue shall be signalled.
+	 */
+	complete(&kctx->kbdev->csf.scheduler.kthread_signal);
+	wait_event(kctx->kbdev->csf.event_wait, atomic_read(&queue->pending_kick) == 0);
+}
+
 void kbase_csf_queue_terminate(struct kbase_context *kctx,
 			      struct kbase_ioctl_cs_queue_terminate *term)
 {
@@ -673,6 +690,18 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
 			queue->queue_reg->user_data = NULL;
 		kbase_gpu_vm_unlock(kctx);
 
+		rt_mutex_unlock(&kctx->csf.lock);
+		/* The GPU reset can be allowed now as the queue has been unbound. */
+		if (reset_prevented) {
+			kbase_reset_gpu_allow(kbdev);
+			reset_prevented = false;
+		}
+		wait_pending_queue_kick(queue);
+		/* The work items can be cancelled as Userspace is terminating the queue */
+		cancel_work_sync(&queue->oom_event_work);
+		cancel_work_sync(&queue->cs_error_work);
+		rt_mutex_lock(&kctx->csf.lock);
+
 		release_queue(queue);
 	}
 
@@ -717,6 +746,7 @@ int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_
 	bind->out.mmap_handle = queue->handle;
 	group->bound_queues[bind->in.csi_index] = queue;
 	queue->group = group;
+	queue->group_priority = group->priority;
 	queue->csi_index = bind->in.csi_index;
 	queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS;
 
@@ -726,12 +756,20 @@ out:
 	return ret;
 }
 
-static struct kbase_queue_group *get_bound_queue_group(
-					struct kbase_queue *queue)
+/**
+ * get_bound_queue_group - Get the group to which a queue was bound
+ *
+ * @queue: Pointer to the queue for this group
+ *
+ * Return: The group to which this queue was bound, or NULL on error.
+ */
+static struct kbase_queue_group *get_bound_queue_group(struct kbase_queue *queue)
 {
 	struct kbase_context *kctx = queue->kctx;
 	struct kbase_queue_group *group;
 
+	lockdep_assert_held(&kctx->csf.lock);
+
 	if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND)
 		return NULL;
 
@@ -753,63 +791,6 @@ static struct kbase_queue_group *get_bound_queue_group(
 	return group;
 }
 
-static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
-{
-	kthread_queue_work(&kctx->csf.pending_submission_worker, &kctx->csf.pending_submission_work);
-}
-
-/**
- * pending_submission_worker() - Work item to process pending kicked GPU command queues.
- *
- * @work: Pointer to pending_submission_work.
- *
- * This function starts all pending queues, for which the work
- * was previously submitted via ioctl call from application thread.
- * If the queue is already scheduled and resident, it will be started
- * right away, otherwise once the group is made resident.
- */
-static void pending_submission_worker(struct kthread_work *work)
-{
-	struct kbase_context *kctx =
-		container_of(work, struct kbase_context, csf.pending_submission_work);
-	struct kbase_device *kbdev = kctx->kbdev;
-	struct kbase_queue *queue;
-	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
-
-	if (err) {
-		dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
-		return;
-	}
-
-	rt_mutex_lock(&kctx->csf.lock);
-
-	/* Iterate through the queue list and schedule the pending ones for submission. */
-	list_for_each_entry(queue, &kctx->csf.queue_list, link) {
-		if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
-			struct kbase_queue_group *group = get_bound_queue_group(queue);
-			int ret;
-
-			if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
-				dev_dbg(kbdev->dev, "queue is not bound to a group");
-				continue;
-			}
-
-			ret = kbase_csf_scheduler_queue_start(queue);
-			if (unlikely(ret)) {
-				dev_dbg(kbdev->dev, "Failed to start queue");
-				if (ret == -EBUSY) {
-					atomic_cmpxchg(&queue->pending, 0, 1);
-					enqueue_gpu_submission_work(kctx);
-				}
-			}
-		}
-	}
-
-	rt_mutex_unlock(&kctx->csf.lock);
-
-	kbase_reset_gpu_allow(kbdev);
-}
-
 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
 {
 	if (WARN_ON(slot < 0))
@@ -902,7 +883,6 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
 			 struct kbase_ioctl_cs_queue_kick *kick)
 {
 	struct kbase_device *kbdev = kctx->kbdev;
-	bool trigger_submission = false;
 	struct kbase_va_region *region;
 	int err = 0;
 
@@ -920,9 +900,19 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
 	if (!kbase_is_region_invalid_or_free(region)) {
 		struct kbase_queue *queue = region->user_data;
 
-		if (queue) {
-			atomic_cmpxchg(&queue->pending, 0, 1);
-			trigger_submission = true;
+		if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) {
+			spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+			if (list_empty(&queue->pending_kick_link)) {
+				/* Queue termination shall block until this
+				 * kick has been handled.
+				 */
+				atomic_inc(&queue->pending_kick);
+				list_add_tail(
+					&queue->pending_kick_link,
+					&kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
+				complete(&kbdev->csf.scheduler.kthread_signal);
+			}
+			spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
 		}
 	} else {
 		dev_dbg(kbdev->dev,
@@ -931,9 +921,6 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
 	}
 	kbase_gpu_vm_unlock(kctx);
 
-	if (likely(trigger_submission))
-		enqueue_gpu_submission_work(kctx);
-
 	return err;
 }
 
@@ -1222,6 +1209,9 @@ static int create_queue_group(struct kbase_context *const kctx,
 		} else {
 			int err = 0;
 
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+			group->prev_act = false;
+#endif
 			group->kctx = kctx;
 			group->handle = group_handle;
 			group->csg_nr = KBASEP_CSG_NR_INVALID;
@@ -1246,6 +1236,7 @@ static int create_queue_group(struct kbase_context *const kctx,
 
 			group->dvs_buf = create->in.dvs_buf;
 
+
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 			group->deschedule_deferred_cnt = 0;
 #endif
@@ -1256,8 +1247,6 @@ static int create_queue_group(struct kbase_context *const kctx,
 			INIT_LIST_HEAD(&group->link);
 			INIT_LIST_HEAD(&group->link_to_schedule);
 			INIT_LIST_HEAD(&group->error_fatal.link);
-			INIT_LIST_HEAD(&group->error_timeout.link);
-			INIT_LIST_HEAD(&group->error_tiler_oom.link);
 			INIT_WORK(&group->timer_event_work, timer_event_worker);
 			kthread_init_work(&group->protm_event_work, protm_event_worker);
 			bitmap_zero(group->protm_pending_bitmap,
@@ -1307,7 +1296,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 	const u32 compute_count = hweight64(create->in.compute_mask);
 	size_t i;
 
-	for (i = 0; i < sizeof(create->in.padding); i++) {
+	for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
 		if (create->in.padding[i] != 0) {
 			dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
 			return -EINVAL;
@@ -1316,8 +1305,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 
 	rt_mutex_lock(&kctx->csf.lock);
 
-	if ((create->in.tiler_max > tiler_count) ||
-	    (create->in.fragment_max > fragment_count) ||
+	if ((create->in.tiler_max > tiler_count) || (create->in.fragment_max > fragment_count) ||
 	    (create->in.compute_max > compute_count)) {
 		dev_dbg(kctx->kbdev->dev,
 			"Invalid maximum number of endpoints for a queue group");
@@ -1335,8 +1323,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
 		dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
 			 create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
 		err = -EINVAL;
-	} else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
-		   create->in.dvs_buf) {
+	} else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && create->in.dvs_buf) {
 		dev_warn(
 			kctx->kbdev->dev,
 			"GPU does not support DVS but userspace is trying to use it");
@@ -1512,8 +1499,6 @@ static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
 		"Remove any pending group fatal error from context %pK\n",
 		(void *)group->kctx);
 
-	kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
-	kbase_csf_event_remove_error(kctx, &group->error_timeout);
 	kbase_csf_event_remove_error(kctx, &group->error_fatal);
 }
 
@@ -1681,61 +1666,79 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
 
 	kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq",
 					WQ_UNBOUND, 1);
-	if (unlikely(!kctx->csf.wq))
-		goto out;
 
-	err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
-					   &kctx->csf.pending_submission_worker, "mali_submit");
-	if (err) {
-		dev_err(kctx->kbdev->dev, "error initializing pending submission worker thread");
-		goto out_err_submission_kthread;
-	}
+	if (likely(kctx->csf.wq)) {
+		err = kbase_csf_scheduler_context_init(kctx);
 
-	err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
-					   &kctx->csf.protm_event_worker, "mali_protm_event");
-	if (err) {
-		dev_err(kctx->kbdev->dev, "error initializing protm event worker thread");
-		goto out_err_protm_kthread;
-	}
+		if (likely(!err)) {
+			err = kbase_csf_kcpu_queue_context_init(kctx);
 
-	err = kbase_csf_scheduler_context_init(kctx);
-	if (unlikely(err))
-		goto out_err_scheduler_context;
+			if (likely(!err)) {
+				err = kbase_csf_tiler_heap_context_init(kctx);
 
-	err = kbase_csf_kcpu_queue_context_init(kctx);
-	if (unlikely(err))
-		goto out_err_kcpu_queue_context;
+				if (likely(!err)) {
+					rt_mutex_init(&kctx->csf.lock);
 
-	err = kbase_csf_tiler_heap_context_init(kctx);
-	if (unlikely(err))
-		goto out_err_tiler_heap_context;
+					err = kbasep_ctx_user_reg_page_mapping_init(kctx);
 
-	rt_mutex_init(&kctx->csf.lock);
-	kthread_init_work(&kctx->csf.pending_submission_work,
-			  pending_submission_worker);
+					if (likely(!err)) {
+						err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
+						                                   &kctx->csf.protm_event_worker, "mali_protm_event");
+						if (unlikely(err)) {
+							dev_err(kctx->kbdev->dev, "error initializing protm event worker thread");
+							kbasep_ctx_user_reg_page_mapping_term(kctx);
+						}
+					}
 
-	err = kbasep_ctx_user_reg_page_mapping_init(kctx);
-	if (unlikely(err))
-		goto out_err_user_reg_page_mapping_init;
+					if (unlikely(err))
+						kbase_csf_tiler_heap_context_term(kctx);
+				}
 
-	return err;
+				if (unlikely(err))
+					kbase_csf_kcpu_queue_context_term(kctx);
+			}
+
+			if (unlikely(err))
+				kbase_csf_scheduler_context_term(kctx);
+		}
+
+		if (unlikely(err))
+			destroy_workqueue(kctx->csf.wq);
+	}
 
-out_err_user_reg_page_mapping_init:
-	kbase_csf_tiler_heap_context_term(kctx);
-out_err_tiler_heap_context:
-	kbase_csf_kcpu_queue_context_term(kctx);
-out_err_kcpu_queue_context:
-	kbase_csf_scheduler_context_term(kctx);
-out_err_scheduler_context:
-	kbase_destroy_kworker_stack(&kctx->csf.protm_event_worker);
-out_err_protm_kthread:
-	kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker);
-out_err_submission_kthread:
-	destroy_workqueue(kctx->csf.wq);
-out:
 	return err;
 }
 
+void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx,
+						       struct kbase_fault *fault)
+{
+	struct base_gpu_queue_group_error err_payload =
+		(struct base_gpu_queue_group_error){ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+						     .payload = { .fatal_group = {
+									  .sideband = fault->addr,
+									  .status = fault->status,
+								  } } };
+	struct kbase_device *kbdev = kctx->kbdev;
+	const u32 num_groups = kbdev->csf.global_iface.group_num;
+	unsigned long flags;
+	int csg_nr;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	kbase_csf_scheduler_spin_lock(kbdev, &flags);
+	for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
+		struct kbase_queue_group *const group =
+			kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+
+		if (!group || (group->kctx != kctx))
+			continue;
+
+		group->faulted = true;
+		kbase_csf_add_group_fatal_error(group, &err_payload);
+	}
+	kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
 void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 		struct kbase_fault *fault)
 {
@@ -1777,6 +1780,9 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 
 		if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) {
 			term_queue_group(group);
+			/* This would effectively be a NOP if the fatal error was already added to
+			 * the error_list by kbase_csf_ctx_report_page_fault_for_active_groups().
+			 */
 			kbase_csf_add_group_fatal_error(group, &err_payload);
 			reported = true;
 		}
@@ -1833,8 +1839,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 	if (reset_prevented)
 		kbase_reset_gpu_allow(kbdev);
 
-	kthread_cancel_work_sync(&kctx->csf.pending_submission_work);
-
 	/* Now that all queue groups have been terminated, there can be no
 	 * more OoM or timer event interrupts but there can be inflight work
 	 * items. Destroying the wq will implicitly flush those work items.
@@ -1873,6 +1877,12 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 		queue = list_first_entry(&kctx->csf.queue_list,
 						struct kbase_queue, link);
 
+		list_del_init(&queue->link);
+
+		rt_mutex_unlock(&kctx->csf.lock);
+		wait_pending_queue_kick(queue);
+		rt_mutex_lock(&kctx->csf.lock);
+
 		/* The reference held when the IO mapping was created on bind
 		 * would have been dropped otherwise the termination of Kbase
 		 * context itself wouldn't have kicked-in. So there shall be
@@ -1880,15 +1890,13 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
 		 * registered.
 		 */
 		WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
-		list_del_init(&queue->link);
+
 		release_queue(queue);
 	}
 
 	rt_mutex_unlock(&kctx->csf.lock);
 
-	kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker);
 	kbase_destroy_kworker_stack(&kctx->csf.protm_event_worker);
-
 	kbasep_ctx_user_reg_page_mapping_term(kctx);
 	kbase_csf_tiler_heap_context_term(kctx);
 	kbase_csf_kcpu_queue_context_term(kctx);
@@ -1992,16 +2000,13 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
 					  } } } };
 
 	kbase_csf_event_add_error(group->kctx,
-				  &group->error_tiler_oom,
+				  &group->error_fatal,
 				  &error);
 	kbase_event_wakeup_sync(group->kctx);
 }
 
 static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
 {
-	int err;
-	const unsigned int cache_flush_wait_timeout_ms = 2000;
-
 	kbase_pm_lock(kbdev);
 	/* With the advent of partial cache flush, dirty cache lines could
 	 * be left in the GPU L2 caches by terminating the queue group here
@@ -2011,17 +2016,12 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
 	 */
 	if (kbdev->pm.backend.gpu_powered) {
 		kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
-		err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
-
-		if (err) {
+		if (kbase_gpu_wait_cache_clean_timeout(kbdev,
+						       kbdev->mmu_or_gpu_cache_op_wait_time_ms))
 			dev_warn(
 				kbdev->dev,
-				"[%llu] Timeout waiting for cache clean to complete after fatal error",
+				"[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC to complete after fatal error",
 				kbase_backend_get_cycle_cnt(kbdev));
-
-			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
-				kbase_reset_gpu(kbdev);
-		}
 	}
 
 	kbase_pm_unlock(kbdev);
@@ -2153,7 +2153,6 @@ static void oom_event_worker(struct work_struct *data)
 	rt_mutex_lock(&kctx->csf.lock);
 
 	kbase_queue_oom_event(queue);
-	release_queue(queue);
 
 	rt_mutex_unlock(&kctx->csf.lock);
 	kbase_reset_gpu_allow(kbdev);
@@ -2180,7 +2179,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group)
 		 "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
 		 kbase_csf_timeout_get(group->kctx->kbdev));
 
-	kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
+	kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
 	kbase_event_wakeup_sync(group->kctx);
 }
 
@@ -2406,12 +2405,10 @@ handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
 	if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
 	    (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
 		if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
-			get_queue(queue);
 			queue->cs_error = cs_fault;
 			queue->cs_error_info = cs_fault_info;
 			queue->cs_error_fatal = false;
-			if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
-				release_queue(queue);
+			queue_work(queue->kctx->csf.wq, &queue->cs_error_work);
 			return;
 		}
 	}
@@ -2422,31 +2419,29 @@ handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
 	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
 }
 
-static void report_queue_fatal_error(struct kbase_queue *const queue,
-				     u32 cs_fatal, u64 cs_fatal_info,
-				     u8 group_handle)
+static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fatal,
+				     u64 cs_fatal_info, struct kbase_queue_group *group)
 {
-	struct base_csf_notification error = {
-		.type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
-		.payload = {
-			.csg_error = {
-				.handle = group_handle,
-				.error = {
-					.error_type =
-					BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
-					.payload = {
-						.fatal_queue = {
-						.sideband = cs_fatal_info,
-						.status = cs_fatal,
-						.csi_index = queue->csi_index,
-						}
-					}
-				}
-			}
-		}
-	};
+	struct base_csf_notification
+		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+			  .payload = {
+				  .csg_error = {
+					  .error = { .error_type =
+							     BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+						     .payload = { .fatal_queue = {
+									  .sideband = cs_fatal_info,
+									  .status = cs_fatal,
+								  } } } } } };
+
+	if (!queue)
+		return;
+
+	if (WARN_ON_ONCE(!group))
+		return;
 
-	kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
+	error.payload.csg_error.handle = group->handle;
+	error.payload.csg_error.error.payload.fatal_queue.csi_index = queue->csi_index;
+	kbase_csf_event_add_error(queue->kctx, &group->error_fatal, &error);
 	kbase_event_wakeup_sync(queue->kctx);
 }
 
@@ -2461,10 +2456,10 @@ static void cs_error_worker(struct work_struct *const data)
 {
 	struct kbase_queue *const queue =
 		container_of(data, struct kbase_queue, cs_error_work);
+	const u32 cs_fatal_exception_type = CS_FATAL_EXCEPTION_TYPE_GET(queue->cs_error);
 	struct kbase_context *const kctx = queue->kctx;
 	struct kbase_device *const kbdev = kctx->kbdev;
 	struct kbase_queue_group *group;
-	u8 group_handle;
 	bool reset_prevented = false;
 	int err;
 
@@ -2511,14 +2506,22 @@ static void cs_error_worker(struct work_struct *const data)
 	}
 #endif
 
-	group_handle = group->handle;
 	term_queue_group(group);
 	flush_gpu_cache_on_fatal_error(kbdev);
-	report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
-				 group_handle);
+	/* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the
+	 * page fault handler disables the AS of faulty context. Need to skip reporting the
+	 * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info.
+	 * Page fault handler will report the fatal error with full page fault info.
+	 */
+	if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && group->faulted) {
+		dev_dbg(kbdev->dev,
+			"Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d",
+			queue->csi_index, group->handle, kctx->tgid, kctx->id);
+	} else {
+		report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group);
+	}
 
 unlock:
-	release_queue(queue);
 	rt_mutex_unlock(&kctx->csf.lock);
 	if (reset_prevented)
 		kbase_reset_gpu_allow(kbdev);
@@ -2580,12 +2583,10 @@ handle_fatal_event(struct kbase_queue *const queue,
 			if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
 				kbase_reset_gpu(queue->kctx->kbdev);
 		}
-		get_queue(queue);
 		queue->cs_error = cs_fatal;
 		queue->cs_error_info = cs_fatal_info;
 		queue->cs_error_fatal = true;
-		if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
-			release_queue(queue);
+		queue_work(queue->kctx->csf.wq, &queue->cs_error_work);
 	}
 
 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
@@ -2672,7 +2673,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 
 			if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
 			     (cs_ack & CS_ACK_TILER_OOM_MASK))) {
-				get_queue(queue);
 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
 							 group, queue, cs_req ^ cs_ack);
 				if (!queue_work(wq, &queue->oom_event_work)) {
@@ -2686,7 +2686,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
 						"Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
 						queue->csi_index, group->handle, queue->kctx->tgid,
 						queue->kctx->id);
-					release_queue(queue);
 				}
 			}
 
@@ -2797,17 +2796,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
 
 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
 
-	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
-		kbase_csf_firmware_csg_input_mask(ginfo,
-			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
-
-		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
-
-		/* SYNC_UPDATE events shall invalidate GPU idle event */
-		atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
-
-		kbase_csf_event_signal_cpu_only(group->kctx);
-	}
+	kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, req, ack);
 
 	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
 		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
@@ -3117,13 +3106,16 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 	do {
 		unsigned long flags;
 		u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
-		struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
 		bool glb_idle_irq_received = false;
 
 		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
 		order_job_irq_clear_with_iface_mem_read();
 
 		if (csg_interrupts != 0) {
+			struct irq_idle_and_protm_track track = { .protm_grp = NULL,
+								  .idle_seq = U32_MAX,
+								  .idle_slot = S8_MAX };
+
 			kbase_csf_scheduler_spin_lock(kbdev, &flags);
 			/* Looping through and track the highest idle and protm groups */
 			while (csg_interrupts != 0) {
@@ -3220,6 +3212,24 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
 }
 
+void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev,
+				      struct kbase_csf_cmd_stream_group_info *ginfo,
+				      struct kbase_queue_group *group, u32 req, u32 ack)
+{
+	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
+		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
+
+		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
+
+		/* SYNC_UPDATE events shall invalidate GPU idle event */
+		atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
+
+		kbase_csf_event_signal_cpu_only(group->kctx);
+	}
+}
+
 void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev)
 {
 	if (kbdev->csf.db_filp) {
@@ -3258,6 +3268,28 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 	return 0;
 }
 
+void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev)
+{
+	size_t i;
+
+	for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i)
+		INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]);
+	spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock);
+}
+
+void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev)
+{
+	size_t i;
+
+	spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+	for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) {
+		if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i]))
+			dev_warn(kbdev->dev,
+				 "Some GPU queue kicks for priority %zu were not handled", i);
+	}
+	spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+}
+
 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
 {
 	if (kbdev->csf.user_reg.filp) {
@@ -3290,7 +3322,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 	}
 
 	page = as_page(phys);
-	addr = kmap_atomic(page);
+	addr = kbase_kmap_atomic(page);
 
 	/* Write a special value for the latest flush register inside the
 	 * dummy page
@@ -3299,7 +3331,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 
 	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
 				     DMA_BIDIRECTIONAL);
-	kunmap_atomic(addr);
+	kbase_kunmap_atomic(addr);
 
 	kbdev->csf.user_reg.filp = filp;
 	kbdev->csf.user_reg.dummy_page = phys;
@@ -3320,3 +3352,60 @@ u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority)
 
 	return out_priority;
 }
+
+void kbase_csf_process_queue_kick(struct kbase_queue *queue)
+{
+	struct kbase_context *kctx = queue->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	bool retry_kick = false;
+	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+
+	if (err) {
+		dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue");
+		goto out_release_queue;
+	}
+
+	rt_mutex_lock(&kctx->csf.lock);
+
+	if (queue->bind_state != KBASE_CSF_QUEUE_BOUND)
+		goto out_allow_gpu_reset;
+
+	err = kbase_csf_scheduler_queue_start(queue);
+	if (unlikely(err)) {
+		dev_dbg(kbdev->dev, "Failed to start queue");
+		if (err == -EBUSY) {
+			retry_kick = true;
+
+			spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+			if (list_empty(&queue->pending_kick_link)) {
+				/* A failed queue kick shall be pushed to the
+				 * back of the queue to avoid potential abuse.
+				 */
+				list_add_tail(
+					&queue->pending_kick_link,
+					&kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
+				spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+			} else {
+				spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+				WARN_ON(atomic_read(&queue->pending_kick) == 0);
+			}
+
+			complete(&kbdev->csf.scheduler.kthread_signal);
+		}
+	}
+
+out_allow_gpu_reset:
+	if (likely(!retry_kick)) {
+		WARN_ON(atomic_read(&queue->pending_kick) == 0);
+		atomic_dec(&queue->pending_kick);
+	}
+
+	rt_mutex_unlock(&kctx->csf.lock);
+
+	kbase_reset_gpu_allow(kbdev);
+
+	return;
+out_release_queue:
+	WARN_ON(atomic_read(&queue->pending_kick) == 0);
+	atomic_dec(&queue->pending_kick);
+}
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 35d0331..29119e1 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -49,8 +49,8 @@
 #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
 
 /* 60ms optimizes power while minimizing latency impact for UI test cases. */
-#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US (600)
-#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (60000) /* Default 60 milliseconds */
+#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS (600 * 1000)
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_NS (60 * 1000 * 1000) /* Default 60 milliseconds */
 
 /* Idle hysteresis time can be scaled down when GPU sleep feature is used */
 #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
@@ -78,6 +78,18 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
 		struct kbase_fault *fault);
 
 /**
+ * kbase_csf_ctx_report_page_fault_for_active_groups - Notify Userspace about GPU page fault
+ *                                                   for active groups of the faulty context.
+ *
+ * @kctx:       Pointer to faulty kbase context.
+ * @fault:      Pointer to the fault.
+ *
+ * This function notifies the event notification thread of the GPU page fault.
+ */
+void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx,
+						       struct kbase_fault *fault);
+
+/**
  * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space.
  *
  * @kctx:	Pointer to the kbase context which is being terminated.
@@ -315,6 +327,19 @@ void kbase_csf_add_group_fatal_error(
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val);
 
 /**
+ * kbase_csf_handle_csg_sync_update - Handle SYNC_UPDATE notification for the group.
+ *
+ * @kbdev: The kbase device to handle the SYNC_UPDATE interrupt.
+ * @ginfo: Pointer to the CSG interface used by the @group
+ * @group: Pointer to the GPU command queue group.
+ * @req:   CSG_REQ register value corresponding to @group.
+ * @ack:   CSG_ACK register value corresponding to @group.
+ */
+void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev,
+				      struct kbase_csf_cmd_stream_group_info *ginfo,
+				      struct kbase_queue_group *group, u32 req, u32 ack);
+
+/**
  * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates
  *                                   the update of userspace mapping of HW
  *                                   doorbell page.
@@ -363,6 +388,22 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
 
 /**
+ * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling
+ *                                     GPU queue kicks.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_pending_gpuq_kicks_init - De-initialize the data used for handling
+ *                                     GPU queue kicks.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev);
+
+/**
  * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
@@ -505,4 +546,17 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
 #endif
 }
 
+/**
+ * kbase_csf_process_queue_kick() - Process a pending kicked GPU command queue.
+ *
+ * @queue: Pointer to the queue to process.
+ *
+ * This function starts the pending queue, for which the work
+ * was previously submitted via ioctl call from application thread.
+ * If the queue is already scheduled and resident, it will be started
+ * right away, otherwise once the group is made resident.
+ */
+void kbase_csf_process_queue_kick(struct kbase_queue *queue);
+
+
 #endif /* _KBASE_CSF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
index 44221b0..a319a4a 100644
--- a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -126,30 +126,24 @@ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
 int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
 		u64 buffer, size_t buf_size)
 {
-	int err = 0;
-
 	size_t alloc_size = buf_size;
 	char *dump_buffer;
 
 	if (!buffer || !alloc_size)
-		goto done;
+		return 0;
 
 	alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1);
 	dump_buffer = kzalloc(alloc_size, GFP_KERNEL);
-	if (ZERO_OR_NULL_PTR(dump_buffer)) {
-		err = -ENOMEM;
-		goto done;
-	}
+	if (!dump_buffer)
+		return -ENOMEM;
 
 	WARN_ON(kctx->csf.cpu_queue.buffer != NULL);
 
-	err = copy_from_user(dump_buffer,
+	if (copy_from_user(dump_buffer,
 			u64_to_user_ptr(buffer),
-			buf_size);
-	if (err) {
+			buf_size)) {
 		kfree(dump_buffer);
-		err = -EFAULT;
-		goto done;
+		return -EFAULT;
 	}
 
 	rt_mutex_lock(&kctx->csf.lock);
@@ -161,13 +155,12 @@ int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
 		kctx->csf.cpu_queue.buffer = dump_buffer;
 		kctx->csf.cpu_queue.buffer_size = buf_size;
 		complete_all(&kctx->csf.cpu_queue.dump_cmp);
-	} else {
+	} else
 		kfree(dump_buffer);
-	}
 
 	rt_mutex_unlock(&kctx->csf.lock);
-done:
-	return err;
+
+	return 0;
 }
 #else
 /*
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index a45b588..c94e656 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -287,7 +287,8 @@ static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file,
 static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 		struct kbase_queue *queue)
 {
-	u32 *addr;
+	u64 *addr;
+	u32 *addr32;
 	u64 cs_extract;
 	u64 cs_insert;
 	u32 cs_active;
@@ -309,12 +310,14 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
 		    !queue->group))
 		return;
 
-	addr = (u32 *)queue->user_io_addr;
-	cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32);
+	addr = queue->user_io_addr;
+	cs_insert = addr[CS_INSERT_LO / sizeof(*addr)];
 
-	addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
-	cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32);
-	cs_active = addr[CS_ACTIVE/4];
+	addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr);
+	cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)];
+
+	addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(*addr));
+	cs_active = addr32[CS_ACTIVE / sizeof(*addr32)];
 
 #define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \
 	"Bind Idx,     Ringbuf addr,     Size, Prio,    Insert offset,   Extract offset, Active, Doorbell\n"
@@ -446,22 +449,20 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
 				group->csg_nr);
 			seq_puts(file, "*** The following group-record is likely stale\n");
 		}
+			seq_puts(
+				file,
+				"GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
+			seq_printf(
+				file,
+				"%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
+				group->handle, group->csg_nr, slot_priority, group->run_state,
+				group->priority, CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c),
+				CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r),
+				CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c),
+				CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
+				CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
+				CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), exclusive, idle);
 
-		seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
-		seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
-			group->handle,
-			group->csg_nr,
-			slot_priority,
-			group->run_state,
-			group->priority,
-			CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c),
-			CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r),
-			CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c),
-			CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
-			CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
-			CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
-			exclusive,
-			idle);
 	} else {
 		seq_puts(file, "GroupID, CSG NR, Run State, Priority\n");
 		seq_printf(file, "%7d, %6d, %9d, %8d\n",
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index cb4e5eb..ef973b7 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -265,15 +265,18 @@ enum kbase_queue_group_priority {
  * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
  *                  Shader, L2 and MCU state.
  * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
- * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended.
+ * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended.
  * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
  * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
  *                             to a ping from KBase.
  * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
  * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
- *                                of a MMU operation
+ *                                of a MMU operation.
+ * @KCPU_FENCE_SIGNAL_TIMEOUT: Waiting time in ms for triggering a KCPU queue sync state dump
  * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
  *                                the enum.
+ * @KBASE_DEFAULT_TIMEOUT: Default timeout used when an invalid selector is passed
+ *                         to the pre-computed timeout getter.
  */
 enum kbase_timeout_selector {
 	CSF_FIRMWARE_TIMEOUT,
@@ -284,9 +287,11 @@ enum kbase_timeout_selector {
 	CSF_FIRMWARE_PING_TIMEOUT,
 	CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
 	MMU_AS_INACTIVE_WAIT_TIMEOUT,
+	KCPU_FENCE_SIGNAL_TIMEOUT,
 
 	/* Must be the last in the enum */
-	KBASE_TIMEOUT_SELECTOR_COUNT
+	KBASE_TIMEOUT_SELECTOR_COUNT,
+	KBASE_DEFAULT_TIMEOUT = CSF_FIRMWARE_TIMEOUT
 };
 
 /**
@@ -324,6 +329,14 @@ struct kbase_csf_notification {
  *                  It is in page units.
  * @link:        Link to the linked list of GPU command queues created per
  *               GPU address space.
+ * @pending_kick:      Indicates whether there is a pending kick to be handled.
+ * @pending_kick_link: Link to the linked list of GPU command queues that have
+ *                     been kicked, but the kick has not yet been processed.
+ *                     This link would be deleted right before the kick is
+ *                     handled to allow for future kicks to occur in the mean
+ *                     time. For this reason, this must not be used to check
+ *                     for the presence of a pending queue kick. @pending_kick
+ *                     should be used instead.
  * @refcount:    Reference count, stands for the number of times the queue
  *               has been referenced. The reference is taken when it is
  *               created, when it is bound to the group and also when the
@@ -336,6 +349,7 @@ struct kbase_csf_notification {
  * @base_addr:      Base address of the CS buffer.
  * @size:           Size of the CS buffer.
  * @priority:       Priority of this queue within the group.
+ * @group_priority: Priority of the group to which this queue has been bound.
  * @bind_state:     Bind state of the queue as enum @kbase_csf_queue_bind_state
  * @csi_index:      The ID of the assigned CS hardware interface.
  * @enabled:        Indicating whether the CS is running, or not.
@@ -363,7 +377,6 @@ struct kbase_csf_notification {
  * @trace_offset_ptr:  Pointer to the CS trace buffer offset variable.
  * @trace_buffer_size: CS trace buffer size for the queue.
  * @trace_cfg:         CS trace configuration parameters.
- * @error:          GPU command queue fatal information to pass to user space.
  * @cs_error_work:    Work item to handle the CS fatal event reported for this
  *                    queue or the CS fault event if dump on fault is enabled
  *                    and acknowledgment for CS fault event needs to be done
@@ -373,7 +386,6 @@ struct kbase_csf_notification {
  * @cs_error:         Records information about the CS fatal event or
  *                    about CS fault event if dump on fault is enabled.
  * @cs_error_fatal:   Flag to track if the CS fault or CS fatal event occurred.
- * @pending:          Indicating whether the queue has new submitted work.
  * @extract_ofs: The current EXTRACT offset, this is only updated when handling
  *               the GLB IDLE IRQ if the idle timeout value is non-0 in order
  *               to help detect a queue's true idle status.
@@ -386,11 +398,13 @@ struct kbase_queue {
 	struct kbase_context *kctx;
 	u64 user_io_gpu_va;
 	struct tagged_addr phys[2];
-	char *user_io_addr;
+	u64 *user_io_addr;
 	u64 handle;
 	int doorbell_nr;
 	unsigned long db_file_offset;
 	struct list_head link;
+	atomic_t pending_kick;
+	struct list_head pending_kick_link;
 	kbase_refcount_t refcount;
 	struct kbase_queue_group *group;
 	struct kbase_va_region *queue_reg;
@@ -398,6 +412,7 @@ struct kbase_queue {
 	u64 base_addr;
 	u32 size;
 	u8 priority;
+	u8 group_priority;
 	s8 csi_index;
 	enum kbase_csf_queue_bind_state bind_state;
 	bool enabled;
@@ -410,12 +425,10 @@ struct kbase_queue {
 	u64 trace_offset_ptr;
 	u32 trace_buffer_size;
 	u32 trace_cfg;
-	struct kbase_csf_notification error;
 	struct work_struct cs_error_work;
 	u64 cs_error_info;
 	u32 cs_error;
 	bool cs_error_fatal;
-	atomic_t pending;
 	u64 extract_ofs;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	u64 saved_cmd_ptr;
@@ -514,10 +527,6 @@ struct kbase_protected_suspend_buffer {
  *                         have pending protected mode entry requests.
  * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be
  *               returned to userspace if such an error has occurred.
- * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT
- *                 to be returned to userspace if such an error has occurred.
- * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM
- *                   to be returned to userspace if such an error has occurred.
  * @timer_event_work: Work item to handle the progress timeout fatal event
  *                    for the group.
  * @deschedule_deferred_cnt: Counter keeping a track of the number of threads
@@ -544,6 +553,7 @@ struct kbase_queue_group {
 	u8 compute_max;
 	u8 csi_handlers;
 
+
 	u64 tiler_mask;
 	u64 fragment_mask;
 	u64 compute_mask;
@@ -566,8 +576,6 @@ struct kbase_queue_group {
 	DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP);
 
 	struct kbase_csf_notification error_fatal;
-	struct kbase_csf_notification error_timeout;
-	struct kbase_csf_notification error_tiler_oom;
 
 	struct work_struct timer_event_work;
 
@@ -582,6 +590,12 @@ struct kbase_queue_group {
 #endif
 	void *csg_reg;
 	u8 csg_reg_bind_retries;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	/**
+	 * @prev_act: Previous CSG activity transition in a GPU metrics.
+	 */
+	bool prev_act;
+#endif
 };
 
 /**
@@ -834,8 +848,6 @@ struct kbase_csf_user_reg_context {
  * @link:             Link to this csf context in the 'runnable_kctxs' list of
  *                    the scheduler instance
  * @sched:            Object representing the scheduler's context
- * @pending_submission_worker: Worker for the pending submission work item
- * @pending_submission_work: Work item to process pending kicked GPU command queues.
  * @protm_event_worker: Worker to process requests to enter protected mode.
  * @cpu_queue:        CPU queue information. Only be available when DEBUG_FS
  *                    is enabled.
@@ -855,8 +867,6 @@ struct kbase_csf_context {
 	struct workqueue_struct *wq;
 	struct list_head link;
 	struct kbase_csf_scheduler_context sched;
-	struct kthread_worker pending_submission_worker;
-	struct kthread_work pending_submission_work;
 	struct kthread_worker protm_event_worker;
 #if IS_ENABLED(CONFIG_DEBUG_FS)
 	struct kbase_csf_cpu_queue_context cpu_queue;
@@ -1004,21 +1014,19 @@ struct kbase_csf_mcu_shared_regions {
  *                          "tock" schedule operation concluded. Used for
  *                          evaluating the exclusion window for in-cycle
  *                          schedule operation.
+ * @csf_worker:             Dedicated kthread_worker to execute the @tick_work.
  * @timer_enabled:          Whether the CSF scheduler wakes itself up for
  *                          periodic scheduling tasks. If this value is 0
  *                          then it will only perform scheduling under the
  *                          influence of external factors e.g., IRQs, IOCTLs.
- * @csf_worker:             Dedicated kthread_worker to execute the @tick_work.
  * @tick_timer:             High-resolution timer employed to schedule tick
  *                          workqueue items (kernel-provided delayed_work
  *                          items do not use hrtimer and for some reason do
  *                          not provide sufficiently reliable periodicity).
- * @tick_work:              Work item that performs the "schedule on tick"
- *                          operation to implement timeslice-based scheduling.
- * @tock_work:              Work item that would perform the schedule on tock
- *                          operation to implement the asynchronous scheduling.
- * @pending_tock_work:      Indicates that the tock work item should re-execute
- *                          once it's finished instead of going back to sleep.
+ * @pending_tick_work:      Indicates that kbase_csf_scheduler_kthread() should perform
+ *                          a scheduling tick.
+ * @pending_tock_work:      Indicates that kbase_csf_scheduler_kthread() should perform
+ *                          a scheduling tock.
  * @ping_work:              Work item that would ping the firmware at regular
  *                          intervals, only if there is a single active CSG
  *                          slot, to check if firmware is alive and would
@@ -1064,13 +1072,6 @@ struct kbase_csf_mcu_shared_regions {
  *                          after GPU and L2 cache have been powered up. So when
  *                          this count is zero, MCU will not be powered up.
  * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds.
- * @tick_timer_active:      Indicates whether the @tick_timer is effectively
- *                          active or not, as the callback function of
- *                          @tick_timer will enqueue @tick_work only if this
- *                          flag is true. This is mainly useful for the case
- *                          when scheduling tick needs to be advanced from
- *                          interrupt context, without actually deactivating
- *                          the @tick_timer first and then enqueing @tick_work.
  * @tick_protm_pending_seq: Scan out sequence number of the group that has
  *                          protected mode execution pending for the queue(s)
  *                          bound to it and will be considered first for the
@@ -1097,6 +1098,12 @@ struct kbase_csf_mcu_shared_regions {
  * @mcu_regs_data:          Scheduler MCU shared regions data for managing the
  *                          shared interface mappings for on-slot queues and
  *                          CSG suspend buffers.
+ * @kthread_signal:         Used to wake up the GPU queue submission
+ *                          thread when a queue needs attention.
+ * @kthread_running:        Whether the GPU queue submission thread should keep
+ *                          executing.
+ * @gpuq_kthread:           High-priority thread used to handle GPU queue
+ *                          submissions.
  */
 struct kbase_csf_scheduler {
 	struct rt_mutex lock;
@@ -1118,11 +1125,10 @@ struct kbase_csf_scheduler {
 	DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS);
 	DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS);
 	unsigned long last_schedule;
-	bool timer_enabled;
 	struct kthread_worker csf_worker;
+	atomic_t timer_enabled;
 	struct hrtimer tick_timer;
-	struct kthread_work tick_work;
-	struct kthread_delayed_work tock_work;
+	atomic_t pending_tick_work;
 	atomic_t pending_tock_work;
 	struct delayed_work ping_work;
 	struct kbase_context *top_ctx;
@@ -1140,7 +1146,6 @@ struct kbase_csf_scheduler {
 	u32 non_idle_scanout_grps;
 	u32 pm_active_count;
 	unsigned int csg_scheduling_period_ms;
-	bool tick_timer_active;
 	u32 tick_protm_pending_seq;
 #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
 	struct work_struct sc_rails_off_work;
@@ -1151,6 +1156,15 @@ struct kbase_csf_scheduler {
 	ktime_t protm_enter_time;
 	struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
 	struct kbase_csf_mcu_shared_regions mcu_regs_data;
+	struct completion kthread_signal;
+	bool kthread_running;
+	struct task_struct *gpuq_kthread;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	/**
+	 *  @gpu_metrics_tb: Handler of firmware trace buffer for gpu_metrics
+	 */
+	struct firmware_trace_buffer *gpu_metrics_tb;
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 };
 
 /*
@@ -1167,9 +1181,9 @@ struct kbase_csf_scheduler {
 	GLB_PROGRESS_TIMER_TIMEOUT_SCALE)
 
 /*
- * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds.
+ * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of nanosecond.
  */
-#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800)
+#define DEFAULT_GLB_PWROFF_TIMEOUT_NS (800 * 1000)
 
 /*
  * In typical operations, the management of the shader core power transitions
@@ -1389,7 +1403,7 @@ struct kbase_csf_mcu_fw {
 /*
  * Firmware log polling period.
  */
-#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25
+#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT 25
 
 /**
  * enum kbase_csf_firmware_log_mode - Firmware log operating mode
@@ -1401,10 +1415,16 @@ struct kbase_csf_mcu_fw {
  * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log
  * will be periodically emptied into dmesg, manual reading through debugfs is
  * disabled.
+ *
+ * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD: Automatic discarding mode, firmware
+ * log will be periodically discarded, the remaining log can be read manually by
+ * the userspace (and it will also be dumped automatically into dmesg on GPU
+ * reset).
  */
 enum kbase_csf_firmware_log_mode {
 	KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL,
-	KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT
+	KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT,
+	KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD
 };
 
 /**
@@ -1418,6 +1438,7 @@ enum kbase_csf_firmware_log_mode {
  * @dump_buf:                  Buffer used for dumping the log.
  * @func_call_list_va_start:   Virtual address of the start of the call list of FW log functions.
  * @func_call_list_va_end:     Virtual address of the end of the call list of FW log functions.
+ * @poll_period_ms:            Firmware log polling period in milliseconds.
  */
 struct kbase_csf_firmware_log {
 	enum kbase_csf_firmware_log_mode mode;
@@ -1426,6 +1447,7 @@ struct kbase_csf_firmware_log {
 	u8 *dump_buf;
 	u32 func_call_list_va_start;
 	u32 func_call_list_va_end;
+	atomic_t poll_period_ms;
 };
 
 /**
@@ -1521,7 +1543,7 @@ struct kbase_csf_user_reg {
  *                          image.
  * @shared_interface:       Pointer to the interface object containing info for
  *                          the memory area shared between firmware & host.
- * @shared_reg_rbtree:      RB tree of the memory regions allocated from the
+ * @mcu_shared_zone:        Memory zone tracking memory regions allocated from the
  *                          shared interface segment in MCU firmware address
  *                          space.
  * @db_filp:                Pointer to a dummy file, that alongwith
@@ -1584,22 +1606,28 @@ struct kbase_csf_user_reg {
  *                          fatal event.
  * @coredump_work:          Work item for initiating a platform core dump.
  * @ipa_control:            IPA Control component manager.
- * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input
- *                          in unit of micro-seconds. The firmware does not use
+ * @mcu_core_pwroff_dur_ns: Sysfs attribute for the glb_pwroff timeout input
+ *                          in unit of nanoseconds. The firmware does not use
  *                          it directly.
  * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input
  *                             in interface required format, ready to be used
  *                             directly in the firmware.
+ * @mcu_core_pwroff_dur_count_modifier: Update csffw_glb_req_cfg_pwroff_timer
+ *                                      to make the shr(10) modifier conditional
+ *                                      on new flag in GLB_PWROFF_TIMER_CONFIG
  * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into
  *                              the glb_pwoff register. This is separated from
  *                              the @p mcu_core_pwroff_dur_count as an update
  *                              to the latter is asynchronous.
- * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
- *                          window in unit of microseconds. The firmware does not
+ * @gpu_idle_hysteresis_ns: Sysfs attribute for the idle hysteresis time
+ *                          window in unit of nanoseconds. The firmware does not
  *                          use it directly.
  * @gpu_idle_dur_count:     The counterpart of the hysteresis time window in
  *                          interface required format, ready to be used
  *                          directly in the firmware.
+ * @gpu_idle_dur_count_modifier: Update csffw_glb_req_idle_enable to make the shr(10)
+ *                               modifier conditional on the new flag
+ *                               in GLB_IDLE_TIMER_CONFIG.
  * @fw_timeout_ms:          Timeout value (in milliseconds) used when waiting
  *                          for any request sent to the firmware.
  * @hwcnt:                  Contain members required for handling the dump of
@@ -1611,6 +1639,12 @@ struct kbase_csf_user_reg {
  * @dof:                    Structure for dump on fault.
  * @user_reg:               Collective information to support the mapping to
  *                          USER Register page for user processes.
+ * @pending_gpuq_kicks:     Lists of GPU queue that have been kicked but not
+ *                          yet processed, categorised by queue group's priority.
+ * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and
+ *                           kbase_queue.pending_kick_link.
+ * @quirks_ext:             Pointer to an allocated buffer containing the firmware
+ *                          workarounds configuration.
  */
 struct kbase_csf_device {
 	struct kbase_mmu_table mcu_mmu;
@@ -1620,7 +1654,7 @@ struct kbase_csf_device {
 	struct kobject *fw_cfg_kobj;
 	struct kbase_csf_trace_buffers firmware_trace_buffers;
 	void *shared_interface;
-	struct rb_root shared_reg_rbtree;
+	struct kbase_reg_zone mcu_shared_zone;
 	struct file *db_filp;
 	u32 db_file_offsets;
 	struct tagged_addr dummy_db_page;
@@ -1642,11 +1676,13 @@ struct kbase_csf_device {
 	struct work_struct fw_error_work;
 	struct work_struct coredump_work;
 	struct kbase_ipa_control ipa_control;
-	u32 mcu_core_pwroff_dur_us;
+	u32 mcu_core_pwroff_dur_ns;
 	u32 mcu_core_pwroff_dur_count;
+	u32 mcu_core_pwroff_dur_count_modifier;
 	u32 mcu_core_pwroff_reg_shadow;
-	u32 gpu_idle_hysteresis_us;
+	u32 gpu_idle_hysteresis_ns;
 	u32 gpu_idle_dur_count;
+	u32 gpu_idle_dur_count_modifier;
 	unsigned int fw_timeout_ms;
 	struct kbase_csf_hwcnt hwcnt;
 	struct kbase_csf_mcu_fw fw;
@@ -1662,6 +1698,9 @@ struct kbase_csf_device {
 	struct kbase_debug_coresight_device coresight;
 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
 	struct kbase_csf_user_reg user_reg;
+	struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
+	spinlock_t pending_gpuq_kicks_lock;
+	u32 *quirks_ext;
 };
 
 /**
@@ -1678,10 +1717,6 @@ struct kbase_csf_device {
  * @bf_data:           Data relating to Bus fault.
  * @gf_data:           Data relating to GPU fault.
  * @current_setup:     Stores the MMU configuration for this address space.
- * @is_unresponsive:   Flag to indicate MMU is not responding.
- *                     Set if a MMU command isn't completed within
- *                     &kbase_device:mmu_as_inactive_wait_time_ms.
- *                     Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
  */
 struct kbase_as {
 	int number;
@@ -1693,7 +1728,6 @@ struct kbase_as {
 	struct kbase_fault bf_data;
 	struct kbase_fault gf_data;
 	struct kbase_mmu_setup current_setup;
-	bool is_unresponsive;
 };
 
 #endif /* _KBASE_CSF_DEFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index a4f561b..22f9aeb 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -52,11 +52,12 @@
 #include <mmu/mali_kbase_mmu.h>
 #include <asm/arch_timer.h>
 #include <linux/delay.h>
+#include <linux/version_compat_defs.h>
 
-#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
+#define MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN ((size_t)20)
 
-static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
-module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
+static char default_fw_name[MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
+module_param_string(fw_name, default_fw_name, sizeof(default_fw_name), 0644);
 MODULE_PARM_DESC(fw_name, "firmware image");
 
 /* The waiting time for firmware to boot */
@@ -78,7 +79,6 @@ MODULE_PARM_DESC(fw_debug,
 	"Enables effective use of a debugger for debugging firmware code.");
 #endif
 
-
 #define FIRMWARE_HEADER_MAGIC		(0xC3F13A6Eul)
 #define FIRMWARE_HEADER_VERSION_MAJOR	(0ul)
 #define FIRMWARE_HEADER_VERSION_MINOR	(3ul)
@@ -188,7 +188,7 @@ struct firmware_timeline_metadata {
 /* The shared interface area, used for communicating with firmware, is managed
  * like a virtual memory zone. Reserve the virtual space from that zone
  * corresponding to shared interface entry parsed from the firmware image.
- * The shared_reg_rbtree should have been initialized before calling this
+ * The MCU_SHARED_ZONE should have been initialized before calling this
  * function.
  */
 static int setup_shared_iface_static_region(struct kbase_device *kbdev)
@@ -201,8 +201,7 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
 	if (!interface)
 		return -EINVAL;
 
-	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
-				      interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
+	reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, interface->num_pages_aligned);
 	if (reg) {
 		mutex_lock(&kbdev->csf.reg_lock);
 		ret = kbase_add_va_region_rbtree(kbdev, reg,
@@ -308,7 +307,7 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
 static int wait_ready(struct kbase_device *kbdev)
 {
 	const ktime_t wait_loop_start = ktime_get_raw();
-	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms;
 	s64 diff;
 
 	do {
@@ -316,7 +315,8 @@ static int wait_ready(struct kbase_device *kbdev)
 
 		for (i = 0; i < 1000; i++) {
 			/* Wait for the MMU status to indicate there is no active command */
-			if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+			if (!(kbase_reg_read(kbdev,
+					     MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) &
 			      AS_STATUS_AS_ACTIVE))
 				return 0;
 		}
@@ -449,7 +449,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
 
 	for (page_num = 0; page_num < page_limit; ++page_num) {
 		struct page *const page = as_page(phys[page_num]);
-		char *const p = kmap_atomic(page);
+		char *const p = kbase_kmap_atomic(page);
 		u32 const copy_len = min_t(u32, PAGE_SIZE, data_len);
 
 		if (copy_len > 0) {
@@ -466,7 +466,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
 
 		kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]),
 					     PAGE_SIZE, DMA_TO_DEVICE);
-		kunmap_atomic(p);
+		kbase_kunmap_atomic(p);
 	}
 }
 
@@ -533,6 +533,7 @@ out:
  *                     within the 2MB pages aligned allocation.
  * @is_small_page: This is an output flag used to select between the small and large page
  *                 to be used for the FW entry allocation.
+ * @force_small_page: Use 4kB pages to allocate memory needed for FW loading
  *
  * Go through all the already initialized interfaces and find if a previously
  * allocated large page can be used to store contents of new FW interface entry.
@@ -544,7 +545,7 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
 						  const u32 flags, struct tagged_addr **phys,
 						  struct protected_memory_allocation ***pma,
 						  u32 num_pages, u32 *num_pages_aligned,
-						  bool *is_small_page)
+						  bool *is_small_page, bool force_small_page)
 {
 	struct kbase_csf_firmware_interface *interface = NULL;
 	struct kbase_csf_firmware_interface *target_interface = NULL;
@@ -560,6 +561,8 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
 	*phys = NULL;
 	*pma = NULL;
 
+	if (force_small_page)
+		goto out;
 
 	/* If the section starts at 2MB aligned boundary,
 	 * then use 2MB page(s) for it.
@@ -653,7 +656,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	struct protected_memory_allocation **pma = NULL;
 	bool reuse_pages = false;
 	bool is_small_page = true;
-	bool ignore_page_migration = true;
+	bool force_small_page = false;
 
 	if (data_end < data_start) {
 		dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -696,16 +699,15 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	num_pages = (virtual_end - virtual_start)
 		>> PAGE_SHIFT;
 
-	if(!protected_mode) {
-		reuse_pages = entry_find_large_page_to_reuse(
-			kbdev, virtual_start, virtual_end, flags, &phys, &pma,
-			num_pages, &num_pages_aligned, &is_small_page);
-	}
-	else {
-		num_pages_aligned = num_pages;
+	if(protected_mode) {
+		force_small_page = true;
 		dev_warn(kbdev->dev, "Protected memory allocation requested for %u bytes (%u pages), serving with small pages and tight allocation.", (virtual_end - virtual_start), num_pages);
 	}
 
+retry_alloc:
+	reuse_pages = entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags,
+						     &phys, &pma, num_pages, &num_pages_aligned,
+						     &is_small_page, force_small_page);
 	if (!reuse_pages)
 		phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
 
@@ -716,16 +718,18 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 		if (!reuse_pages) {
 			pma = kbase_csf_protected_memory_alloc(
 				kbdev, phys, num_pages_aligned, is_small_page);
-		}
-
-		if (!pma) {
-			/* If we can't allocate sufficient memory for FW - bail out and leave protected execution unsupported by termintating the allocator. */
-			dev_warn(kbdev->dev,
-			"Protected memory allocation failed during FW initialization - Firmware protected mode entry will not be supported");
-			kbase_csf_protected_memory_term(kbdev);
-			kbdev->csf.pma_dev = NULL;
-			kfree(phys);
-			return 0;
+			if (!pma) {
+				/* If we can't allocate sufficient memory for FW - bail out and leave protected execution unsupported by termintating the allocator. */
+				dev_warn(kbdev->dev,
+				"Protected memory allocation failed during FW initialization - Firmware protected mode entry will not be supported");
+				kbase_csf_protected_memory_term(kbdev);
+				kbdev->csf.pma_dev = NULL;
+				kfree(phys);
+				return 0;
+			}
+		} else if (WARN_ON(!pma)) {
+			ret = -EINVAL;
+			goto out;
 		}
 	} else {
 		if (!reuse_pages) {
@@ -733,14 +737,22 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 				kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
 							    is_small_page),
 				num_pages_aligned, phys, false, NULL);
-			ignore_page_migration = false;
 		}
 	}
 
 	if (ret < 0) {
-		dev_err(kbdev->dev,
-			"Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n",
-			num_pages_aligned, virtual_start);
+		dev_warn(
+			kbdev->dev,
+			"Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x using %s ",
+			num_pages_aligned, virtual_start,
+			is_small_page ? "small pages" : "large page");
+		WARN_ON(reuse_pages);
+		if (!is_small_page) {
+			dev_warn(kbdev->dev, "Retrying by using small pages");
+			force_small_page = true;
+			kfree(phys);
+			goto retry_alloc;
+		}
 		goto out;
 	}
 
@@ -843,8 +855,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 		ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
 						      virtual_start >> PAGE_SHIFT, phys,
 						      num_pages_aligned, mem_flags,
-						      KBASE_MEM_GROUP_CSF_FW, NULL, NULL,
-						      ignore_page_migration);
+						      KBASE_MEM_GROUP_CSF_FW, NULL, NULL);
 
 		if (ret != 0) {
 			dev_err(kbdev->dev, "Failed to insert firmware pages\n");
@@ -1316,7 +1327,7 @@ static inline void access_firmware_memory_common(struct kbase_device *kbdev,
 	u32 page_num = offset_bytes >> PAGE_SHIFT;
 	u32 offset_in_page = offset_bytes & ~PAGE_MASK;
 	struct page *target_page = as_page(interface->phys[page_num]);
-	uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page);
+	uintptr_t cpu_addr = (uintptr_t)kbase_kmap_atomic(target_page);
 	u32 *addr = (u32 *)(cpu_addr + offset_in_page);
 
 	if (read) {
@@ -1331,7 +1342,7 @@ static inline void access_firmware_memory_common(struct kbase_device *kbdev,
 			sizeof(u32), DMA_BIDIRECTIONAL);
 	}
 
-	kunmap_atomic((u32 *)cpu_addr);
+	kbase_kunmap_atomic((u32 *)cpu_addr);
 }
 
 static inline void access_firmware_memory(struct kbase_device *kbdev,
@@ -1713,6 +1724,11 @@ static void enable_shader_poweroff_timer(struct kbase_device *const kbdev,
 
 	kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER,
 					pwroff_reg);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_PWROFF_TIMER_CONFIG,
+					     kbdev->csf.mcu_core_pwroff_dur_count_modifier,
+					     GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK);
+
 	set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK);
 
 	/* Save the programed reg value in its shadow field */
@@ -1739,6 +1755,11 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
 
 	kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
 					kbdev->csf.gpu_idle_dur_count);
+
+	kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG,
+					     kbdev->csf.gpu_idle_dur_count_modifier,
+					     GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK);
+
 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
 					     GLB_REQ_IDLE_ENABLE_MASK);
 	dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
@@ -2000,6 +2021,10 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
 		return;
 #endif
 
+	err = kbase_csf_firmware_cfg_fw_wa_enable(kbdev);
+	if (WARN_ON(err))
+		return;
+
 	/* Reboot the firmware */
 	kbase_csf_firmware_enable_mcu(kbdev);
 }
@@ -2042,13 +2067,13 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
-static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us)
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ns, u32 *modifier)
 {
 #define MICROSECONDS_PER_SECOND 1000000u
 #define HYSTERESIS_VAL_UNIT_SHIFT (10)
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = arch_timer_get_cntfrq();
-	u64 dur_val = dur_us;
+	u64 dur_val = dur_ns;
 	u32 cnt_val_u32, reg_val_u32;
 	bool src_system_timestamp = freq > 0;
 
@@ -2066,21 +2091,24 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_u
 			"Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
 	}
 
-	/* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */
-	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
-	dur_val = div_u64(dur_val, 1000000);
+	/* Formula for dur_val = (dur/1e9) * freq_HZ) */
+	dur_val = dur_val * freq;
+	dur_val = div_u64(dur_val, NSEC_PER_SEC);
+	if (dur_val < S32_MAX) {
+		*modifier = 1;
+	} else {
+		dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT;
+		*modifier = 0;
+	}
 
 	/* Interface limits the value field to S32_MAX */
 	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
 
 	reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32);
 	/* add the source flag */
-	if (src_system_timestamp)
-		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
-				GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
-	else
-		reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
-				GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER);
+	reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(
+		reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
+						     GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER));
 
 	return reg_val_u32;
 }
@@ -2091,19 +2119,21 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 	u32 dur;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	dur = kbdev->csf.gpu_idle_hysteresis_us;
+	dur = kbdev->csf.gpu_idle_hysteresis_ns;
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	return dur;
 }
 
-u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur_ns)
 {
 	unsigned long flags;
+	u32 modifier = 0;
+
 #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
-	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US);
+	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS, &modifier);
 #else
-	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur_ns, &modifier);
 #endif
 
 	/* The 'fw_load_lock' is taken to synchronize against the deferred
@@ -2112,19 +2142,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->fw_load_lock);
 	if (unlikely(!kbdev->csf.firmware_inited)) {
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_us = dur;
+		kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbdev->csf.gpu_idle_dur_count_modifier = modifier;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		mutex_unlock(&kbdev->fw_load_lock);
 		goto end;
 	}
 	mutex_unlock(&kbdev->fw_load_lock);
 
+	if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
+		dev_warn(kbdev->dev,
+			 "Failed to prevent GPU reset when updating idle_hysteresis_time");
+		return kbdev->csf.gpu_idle_dur_count;
+	}
+
 	kbase_csf_scheduler_pm_active(kbdev);
-	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+	if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) {
 		dev_err(kbdev->dev,
 			"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
 		kbase_csf_scheduler_pm_idle(kbdev);
+		kbase_reset_gpu_allow(kbdev);
+
 		return kbdev->csf.gpu_idle_dur_count;
 	}
 
@@ -2153,8 +2192,9 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 		wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
 
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_us = dur;
+		kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbdev->csf.gpu_idle_dur_count_modifier = modifier;
 		kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
@@ -2164,8 +2204,9 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 		 * enabled
 		 */
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_us = dur;
+		kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbdev->csf.gpu_idle_dur_count_modifier = modifier;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 	}
 	kbase_csf_scheduler_unlock(kbdev);
@@ -2173,11 +2214,11 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	mutex_unlock(&kbdev->csf.reg_lock);
 #endif
 
-	dev_dbg(kbdev->dev, "GPU suspend timeout updated: %i us (0x%.8x)",
-		kbdev->csf.gpu_idle_hysteresis_us,
+	dev_dbg(kbdev->dev, "GPU suspend timeout updated: %i ns (0x%.8x)",
+		kbdev->csf.gpu_idle_hysteresis_ns,
 		kbdev->csf.gpu_idle_dur_count);
 	kbase_csf_scheduler_pm_idle(kbdev);
-
+	kbase_reset_gpu_allow(kbdev);
 end:
 	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
 		hysteresis_val);
@@ -2185,14 +2226,18 @@ end:
 	return hysteresis_val;
 }
 
-static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_ns,
+					    u32 *modifier)
 {
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = arch_timer_get_cntfrq();
-	u64 dur_val = dur_us;
+	u64 dur_val = dur_ns;
 	u32 cnt_val_u32, reg_val_u32;
 	bool src_system_timestamp = freq > 0;
 
+	const struct kbase_pm_policy *current_policy = kbase_pm_get_policy(kbdev);
+	bool always_on = current_policy == &kbase_pm_always_on_policy_ops;
+
 	if (!src_system_timestamp) {
 		/* Get the cycle_counter source alternative */
 		spin_lock(&kbdev->pm.clk_rtm.lock);
@@ -2207,21 +2252,32 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
 			"Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!");
 	}
 
-	/* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
-	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
-	dur_val = div_u64(dur_val, 1000000);
+	/* Formula for dur_val = (dur/1e9) * freq_HZ) */
+	dur_val = dur_val * freq;
+	dur_val = div_u64(dur_val, NSEC_PER_SEC);
+	if (dur_val < S32_MAX) {
+		*modifier = 1;
+	} else {
+		dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT;
+		*modifier = 0;
+	}
 
-	/* Interface limits the value field to S32_MAX */
-	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+	if (dur_val == 0 && !always_on) {
+		/* Lower Bound - as 0 disables timeout and host controls shader-core power management. */
+		cnt_val_u32 = 1;
+	} else if (dur_val > S32_MAX) {
+		/* Upper Bound - as interface limits the field to S32_MAX */
+		cnt_val_u32 = S32_MAX;
+	} else {
+		cnt_val_u32 = (u32)dur_val;
+	}
 
 	reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32);
 	/* add the source flag */
-	if (src_system_timestamp)
-		reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
-				GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
-	else
-		reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
-				GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER);
+	reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(
+				reg_val_u32,
+				(src_system_timestamp ? GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
+							GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER));
 
 	return reg_val_u32;
 }
@@ -2232,20 +2288,23 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+	pwroff = kbdev->csf.mcu_core_pwroff_dur_ns;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	return pwroff;
 }
 
-u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur_ns)
 {
 	unsigned long flags;
-	const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur);
+	u32 modifier = 0;
+
+	const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur_ns, &modifier);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbdev->csf.mcu_core_pwroff_dur_us = dur;
+	kbdev->csf.mcu_core_pwroff_dur_ns = dur_ns;
 	kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
+	kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
@@ -2253,6 +2312,11 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
 	return pwroff;
 }
 
+u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev)
+{
+	return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS);
+}
+
 /**
  * kbase_device_csf_iterator_trace_init - Send request to enable iterator
  *                                        trace port.
@@ -2264,19 +2328,25 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
 static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
 {
 	/* Enable the iterator trace port if supported by the GPU.
-	 * It requires the GPU to have a nonzero "iter_trace_enable"
+	 * It requires the GPU to have a nonzero "iter-trace-enable"
 	 * property in the device tree, and the FW must advertise
 	 * this feature in GLB_FEATURES.
 	 */
 	if (kbdev->pm.backend.gpu_powered) {
-		/* check device tree for iterator trace enable property */
+		/* check device tree for iterator trace enable property
+		 * and fallback to "iter_trace_enable" if it is not found
+		 */
 		const void *iter_trace_param = of_get_property(
 					       kbdev->dev->of_node,
-					       "iter_trace_enable", NULL);
+					       "iter-trace-enable", NULL);
 
 		const struct kbase_csf_global_iface *iface =
 						&kbdev->csf.global_iface;
 
+		if (!iter_trace_param)
+			iter_trace_param =
+				of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL);
+
 		if (iter_trace_param) {
 			u32 iter_trace_value = be32_to_cpup(iter_trace_param);
 
@@ -2324,6 +2394,8 @@ static void coredump_worker(struct work_struct *data)
 
 int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 {
+	u32 modifier = 0;
+
 	init_waitqueue_head(&kbdev->csf.event_wait);
 	kbdev->csf.interrupt_received = false;
 
@@ -2336,11 +2408,13 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	 */
 	kbdev->csf.mcu_core_pwroff_dur_count = 1;
 #else
-	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+	kbdev->csf.mcu_core_pwroff_dur_ns = DEFAULT_GLB_PWROFF_TIMEOUT_NS;
 	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
-		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS, &modifier);
+	kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
 #endif
 
+	kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
@@ -2352,6 +2426,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_WORK(&kbdev->csf.coredump_work, coredump_worker);
 
 	mutex_init(&kbdev->csf.reg_lock);
+	kbase_csf_pending_gpuq_kicks_init(kbdev);
 
 	kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL };
 
@@ -2360,21 +2435,25 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 
 void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 {
+	kbase_csf_pending_gpuq_kicks_term(kbdev);
 	mutex_destroy(&kbdev->csf.reg_lock);
 }
 
 int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 {
-	kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
+	u32 modifier = 0;
+
+	kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS;
+
 #ifdef KBASE_PM_RUNTIME
 	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+		kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
 #endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns);
 
 #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
 	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
-		kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US);
+		kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS, &modifier);
 
 	/* Set to the lowest possible value for FW to immediately write
 	 * to the power off register to disable the cores.
@@ -2382,10 +2461,12 @@ int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 	kbdev->csf.mcu_core_pwroff_dur_count = 1;
 #else
 	kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
-		kbdev, kbdev->csf.gpu_idle_hysteresis_us);
-	kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+		kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier);
+	kbdev->csf.gpu_idle_dur_count_modifier = modifier;
+	kbdev->csf.mcu_core_pwroff_dur_ns = DEFAULT_GLB_PWROFF_TIMEOUT_NS;
 	kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
-		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+		kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS, &modifier);
+	kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
 #endif
 
 	return 0;
@@ -2401,6 +2482,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	u32 entry_end_offset;
 	u32 entry_offset;
 	int ret;
+	const char *fw_name = default_fw_name;
 
 	lockdep_assert_held(&kbdev->fw_load_lock);
 
@@ -2424,6 +2506,33 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 		goto err_out;
 	}
 
+#if IS_ENABLED(CONFIG_OF)
+	/* If we can't read CSF firmware name from DTB,
+	 * fw_name is not modified and remains the default.
+	 */
+	ret = of_property_read_string(kbdev->dev->of_node, "firmware-name", &fw_name);
+	if (ret == -EINVAL) {
+		/* Property doesn't exist in DTB, and fw_name already points to default FW name
+		 * so just reset return value and continue.
+		 */
+		ret = 0;
+	} else if (ret == -ENODATA) {
+		dev_warn(kbdev->dev,
+			 "\"firmware-name\" DTB property contains no data, using default FW name");
+		/* Reset return value so FW does not fail to load */
+		ret = 0;
+	} else if (ret == -EILSEQ) {
+		/* This is reached when the size of the fw_name buffer is too small for the string
+		 * stored in the DTB and the null terminator.
+		 */
+		dev_warn(kbdev->dev,
+			 "\"firmware-name\" DTB property value too long, using default FW name.");
+		/* Reset return value so FW does not fail to load */
+		ret = 0;
+	}
+
+#endif /* IS_ENABLED(CONFIG_OF) */
+
 	if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) {
 		dev_err(kbdev->dev,
 				"Failed to load firmware image '%s'\n",
@@ -2534,6 +2643,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	}
 #endif
 
+	ret = kbase_csf_firmware_cfg_fw_wa_init(kbdev);
+	if (ret != 0) {
+		dev_err(kbdev->dev, "Failed to initialize firmware workarounds");
+		goto err_out;
+	}
+
 	/* Make sure L2 cache is powered up */
 	kbase_pm_wait_for_l2_powered(kbdev);
 
@@ -2568,6 +2683,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto err_out;
 
+	ret = kbase_csf_firmware_log_init(kbdev);
+	if (ret != 0) {
+		dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
+		goto err_out;
+	}
+
 	ret = kbase_csf_firmware_cfg_init(kbdev);
 	if (ret != 0)
 		goto err_out;
@@ -2576,12 +2697,6 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
 	if (ret != 0)
 		goto err_out;
 
-	ret = kbase_csf_firmware_log_init(kbdev);
-	if (ret != 0) {
-		dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
-		goto err_out;
-	}
-
 	if (kbdev->csf.fw_core_dump.available)
 		kbase_csf_firmware_core_dump_init(kbdev);
 
@@ -2607,10 +2722,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 
 	WARN(ret, "failed to wait for GPU reset");
 
-	kbase_csf_firmware_log_term(kbdev);
-
 	kbase_csf_firmware_cfg_term(kbdev);
 
+	kbase_csf_firmware_log_term(kbdev);
+
 	kbase_csf_timeout_term(kbdev);
 
 	kbase_csf_free_dummy_user_reg_page(kbdev);
@@ -2638,6 +2753,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 
 	unload_mmu_tables(kbdev);
 
+	kbase_csf_firmware_cfg_fw_wa_term(kbdev);
+
 	kbase_csf_firmware_trace_buffers_term(kbdev);
 
 	while (!list_empty(&kbdev->csf.firmware_interfaces)) {
@@ -3014,7 +3131,9 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
 
 	/* Ensure GPU is powered-up until we complete config update.*/
 	kbase_csf_scheduler_pm_active(kbdev);
-	kbase_csf_scheduler_wait_mcu_active(kbdev);
+	err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev);
+	if (err)
+		goto exit;
 
 	/* The 'reg_lock' is also taken and is held till the update is
 	 * complete, to ensure the config update gets serialized.
@@ -3031,6 +3150,7 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
 				      GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK);
 	mutex_unlock(&kbdev->csf.reg_lock);
 
+exit:
 	kbase_csf_scheduler_pm_idle(kbdev);
 	return err;
 }
@@ -3176,8 +3296,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	if (!cpu_addr)
 		goto vmap_error;
 
-	va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
-					 KBASE_REG_ZONE_MCU_SHARED);
+	va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages);
 	if (!va_reg)
 		goto va_region_alloc_error;
 
@@ -3193,7 +3312,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
 					      &phys[0], num_pages, gpu_map_properties,
-					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
+					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL);
 	if (ret)
 		goto mmu_insert_pages_error;
 
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index 9e85c1d..d8ed8d6 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -56,7 +56,7 @@
 #define CSF_NUM_DOORBELL ((u8)24)
 
 /* Offset to the first HW doorbell page */
-#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000)
+#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)DOORBELLS_BASE)
 
 /* Size of HW Doorbell page, used to calculate the offset to subsequent pages */
 #define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000)
@@ -870,6 +870,22 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev);
 u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur);
 
 /**
+ * kbase_csf_firmware_reset_mcu_core_pwroff_time - Reset the MCU shader Core power-off
+ *                                               time value
+ *
+ * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Sets the MCU Shader Core power-off time value to the default.
+ *
+ * The configured MCU shader Core power-off timer will only have effect when the host
+ * driver has delegated the shader cores' power management to MCU.
+ *
+ * Return: the actual internal core power-off timer value in register defined
+ *         format.
+ */
+u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev);
+
+/**
  * kbase_csf_interface_version - Helper function to build the full firmware
  *                               interface version in a format compatible with
  *                               GLB_VERSION register
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
index 13a816b..48ddbb5 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
 #define HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME "Host controls SC rails"
 #endif
 
+#define CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME "WA_CFG0"
 
 /**
  * struct firmware_config - Configuration item within the MCU firmware
@@ -117,7 +118,7 @@ static ssize_t show_fw_cfg(struct kobject *kobj,
 		return -EINVAL;
 	}
 
-	return snprintf(buf, PAGE_SIZE, "%u\n", val);
+	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
 }
 
 static ssize_t store_fw_cfg(struct kobject *kobj,
@@ -150,6 +151,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
 			    HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME))
 			return -EPERM;
 #endif
+		if (!strcmp(config->name,
+			    CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME))
+			return -EPERM;
 
 		if ((val < config->min) || (val > config->max))
 			return -EINVAL;
@@ -275,6 +279,19 @@ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
 		kbase_csf_read_firmware_memory(kbdev, config->address,
 			&config->cur_val);
 
+		if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) &&
+		    (config->cur_val)) {
+			err = kbase_csf_firmware_log_toggle_logging_calls(config->kbdev,
+				config->cur_val);
+
+			if (err) {
+				kobject_put(&config->kobj);
+				dev_err(kbdev->dev, "Failed to enable logging (result: %d)", err);
+				return err;
+			}
+		}
+
+
 		err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type,
 				kbdev->csf.fw_cfg_kobj, "%s", config->name);
 		if (err) {
@@ -361,6 +378,25 @@ int kbase_csf_firmware_cfg_find_config_address(struct kbase_device *kbdev, const
 	return -ENOENT;
 }
 
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev)
+{
+	struct firmware_config *config;
+
+	/* "quirks_ext" property is optional */
+	if (!kbdev->csf.quirks_ext)
+		return 0;
+
+	list_for_each_entry(config, &kbdev->csf.firmware_config, node) {
+		if (strcmp(config->name, CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME))
+			continue;
+		dev_info(kbdev->dev, "External quirks 0: 0x%08x", kbdev->csf.quirks_ext[0]);
+		kbase_csf_update_firmware_memory(kbdev, config->address, kbdev->csf.quirks_ext[0]);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
 #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
 int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
 {
@@ -379,6 +415,54 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
 }
 #endif
 
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
+{
+	int ret;
+	int entry_count;
+	size_t entry_bytes;
+
+	/* "quirks-ext" property is optional and may have no value.
+	 * Also try fallback "quirks_ext" property if it doesn't exist.
+	 */
+	entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext");
+
+	if (entry_count == -EINVAL)
+		entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext");
+
+	if (entry_count == -EINVAL || entry_count == -ENODATA)
+		return 0;
+
+	entry_bytes = entry_count * sizeof(u32);
+	kbdev->csf.quirks_ext = kzalloc(entry_bytes, GFP_KERNEL);
+	if (!kbdev->csf.quirks_ext)
+		return -ENOMEM;
+
+	ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks-ext", kbdev->csf.quirks_ext,
+					 entry_count);
+
+	if (ret == -EINVAL)
+		ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks_ext",
+						 kbdev->csf.quirks_ext, entry_count);
+
+	if (ret == -EINVAL || ret == -ENODATA) {
+		/* This is unexpected since the property is already accessed for counting the number
+		 * of its elements.
+		 */
+		dev_err(kbdev->dev, "\"quirks_ext\" DTB property data read failed");
+		return ret;
+	}
+	if (ret == -EOVERFLOW) {
+		dev_err(kbdev->dev, "\"quirks_ext\" DTB property data size exceeds 32 bits");
+		return ret;
+	}
+
+	return kbase_csf_firmware_cfg_fw_wa_enable(kbdev);
+}
+
+void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev)
+{
+	kfree(kbdev->csf.quirks_ext);
+}
 
 #else
 int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
@@ -404,4 +488,15 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
 	return 0;
 }
 #endif
+
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
 #endif /* CONFIG_SYSFS */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
index bf99c46..f565290 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -97,5 +97,37 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
  */
 int kbase_csf_firmware_cfg_find_config_address(struct kbase_device *kbdev, const char *name,
 					       u32 *addr);
+/**
+ * kbase_csf_firmware_cfg_fw_wa_enable() - Enable firmware workarounds configuration.
+ *
+ * @kbdev:     Kbase device structure
+ *
+ * Look for the config entry that enables support in FW for workarounds and set it according to
+ * the firmware workaround configuration before the initial boot or reload of firmware.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_cfg_fw_wa_init() - Initialize firmware workarounds configuration.
+ *
+ * @kbdev:     Kbase device structure
+ *
+ * Retrieve and save the firmware workarounds configuration from device-tree "quirks_ext" property.
+ * Then, look for the config entry that enables support in FW for workarounds and set it according
+ * to the configuration before the initial firmware boot.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_cfg_fw_wa_term - Delete local cache for firmware workarounds configuration.
+ *
+ * @kbdev: Pointer to the Kbase device
+ *
+ */
+void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev);
 
 #endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
index ce8e4af..493e1c8 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,7 @@
 #include <linux/file.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
+#include <linux/version_compat_defs.h>
 
 #include "mali_kbase.h"
 #include "mali_kbase_csf_firmware_core_dump.h"
@@ -507,7 +508,7 @@ static int fw_core_dump_create(struct kbase_device *kbdev)
 
 	/* Ensure MCU is active before requesting the core dump. */
 	kbase_csf_scheduler_pm_active(kbdev);
-	err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+	err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev);
 	if (!err)
 		err = kbase_csf_firmware_req_core_dump(kbdev);
 
@@ -666,9 +667,9 @@ static int fw_core_dump_seq_show(struct seq_file *m, void *v)
 
 	/* Write the current page. */
 	page = as_page(data->interface->phys[data->page_num]);
-	p = kmap_atomic(page);
+	p = kbase_kmap_atomic(page);
 	seq_write(m, p, FW_PAGE_SIZE);
-	kunmap_atomic(p);
+	kbase_kunmap_atomic(p);
 
 	return 0;
 }
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
index 77d3b1e..89df839 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_log.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -55,7 +55,7 @@ static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val)
 {
 	struct kbase_device *kbdev = (struct kbase_device *)data;
 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
 
 	if (tb == NULL) {
 		dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
@@ -70,7 +70,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
 {
 	struct kbase_device *kbdev = (struct kbase_device *)data;
 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
 	u64 new_mask;
 	unsigned int enable_bits_count;
 
@@ -115,7 +115,7 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us
 	int ret;
 
 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
 
 	if (tb == NULL) {
 		dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
@@ -125,8 +125,9 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us
 	if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
 		return -EBUSY;
 
-	/* Reading from userspace is only allowed in manual mode */
-	if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) {
+	/* Reading from userspace is only allowed in manual mode or auto-discard mode */
+	if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL &&
+			fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -176,8 +177,9 @@ static int kbase_csf_firmware_log_mode_write(void *data, u64 val)
 		cancel_delayed_work_sync(&fw_log->poll_work);
 		break;
 	case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT:
+	case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD:
 		schedule_delayed_work(&fw_log->poll_work,
-				      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+				      msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms)));
 		break;
 	default:
 		ret = -EINVAL;
@@ -191,6 +193,24 @@ out:
 	return ret;
 }
 
+static int kbase_csf_firmware_log_poll_period_read(void *data, u64 *val)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+	*val = atomic_read(&fw_log->poll_period_ms);
+	return 0;
+}
+
+static int kbase_csf_firmware_log_poll_period_write(void *data, u64 val)
+{
+	struct kbase_device *kbdev = (struct kbase_device *)data;
+	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+	atomic_set(&fw_log->poll_period_ms, val);
+	return 0;
+}
+
 DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops,
 			 kbase_csf_firmware_log_enable_mask_read,
 			 kbase_csf_firmware_log_enable_mask_write, "%llx\n");
@@ -204,56 +224,135 @@ static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = {
 
 DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read,
 			 kbase_csf_firmware_log_mode_write, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_poll_period_fops,
+			 kbase_csf_firmware_log_poll_period_read,
+			 kbase_csf_firmware_log_poll_period_write, "%llu\n");
 
 #endif /* CONFIG_DEBUG_FS */
 
+static void kbase_csf_firmware_log_discard_buffer(struct kbase_device *kbdev)
+{
+	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+	struct firmware_trace_buffer *tb =
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
+
+	if (tb == NULL) {
+		dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware log discard skipped");
+		return;
+	}
+
+	if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
+		return;
+
+	kbase_csf_firmware_trace_buffer_discard(tb);
+
+	atomic_set(&fw_log->busy, 0);
+}
+
 static void kbase_csf_firmware_log_poll(struct work_struct *work)
 {
 	struct kbase_device *kbdev =
 		container_of(work, struct kbase_device, csf.fw_log.poll_work.work);
 	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
 
-	schedule_delayed_work(&fw_log->poll_work,
-			      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+	if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT)
+		kbase_csf_firmware_log_dump_buffer(kbdev);
+	else if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD)
+		kbase_csf_firmware_log_discard_buffer(kbdev);
+	else
+		return;
 
-	kbase_csf_firmware_log_dump_buffer(kbdev);
+	schedule_delayed_work(&fw_log->poll_work,
+			      msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms)));
 }
 
 int kbase_csf_firmware_log_init(struct kbase_device *kbdev)
 {
 	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+	int err = 0;
+#if defined(CONFIG_DEBUG_FS)
+	struct dentry *dentry;
+#endif /* CONFIG_DEBUG_FS */
 
 	/* Add one byte for null-termination */
 	fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL);
-	if (fw_log->dump_buf == NULL)
-		return -ENOMEM;
+	if (fw_log->dump_buf == NULL) {
+		err = -ENOMEM;
+		goto out;
+	}
 
 	/* Ensure null-termination for all strings */
 	fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0;
 
+	/* Set default log polling period */
+	atomic_set(&fw_log->poll_period_ms, KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT);
+
+	INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
+#ifdef CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD
+	fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD;
+	schedule_delayed_work(&fw_log->poll_work,
+			      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT));
+#elif defined(CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT)
+	fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT;
+	schedule_delayed_work(&fw_log->poll_work,
+			      msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT));
+#else /* CONFIG_MALI_FW_TRACE_MODE_MANUAL */
 	fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL;
+#endif
 
 	atomic_set(&fw_log->busy, 0);
-	INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
 
-#if defined(CONFIG_DEBUG_FS)
-	debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev,
-			    &kbase_csf_firmware_log_enable_mask_fops);
-	debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
-			    &kbasep_csf_firmware_log_debugfs_fops);
-	debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
-			    &kbase_csf_firmware_log_mode_fops);
-#endif /* CONFIG_DEBUG_FS */
+#if !defined(CONFIG_DEBUG_FS)
+	return 0;
+#else /* !CONFIG_DEBUG_FS */
+	dentry = debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory,
+				     kbdev, &kbase_csf_firmware_log_enable_mask_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_trace_enable_mask\n");
+		err = -ENOENT;
+		goto free_out;
+	}
+	dentry = debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
+				     &kbasep_csf_firmware_log_debugfs_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_traces\n");
+		err = -ENOENT;
+		goto free_out;
+	}
+	dentry = debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
+				     &kbase_csf_firmware_log_mode_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_trace_mode\n");
+		err = -ENOENT;
+		goto free_out;
+	}
+	dentry = debugfs_create_file("fw_trace_poll_period_ms", 0644, kbdev->mali_debugfs_directory,
+				     kbdev, &kbase_csf_firmware_log_poll_period_fops);
+	if (IS_ERR_OR_NULL(dentry)) {
+		dev_err(kbdev->dev, "Unable to create fw_trace_poll_period_ms");
+		err = -ENOENT;
+		goto free_out;
+	}
 
 	return 0;
+
+free_out:
+	kfree(fw_log->dump_buf);
+	fw_log->dump_buf = NULL;
+#endif /* CONFIG_DEBUG_FS */
+out:
+	return err;
 }
 
 void kbase_csf_firmware_log_term(struct kbase_device *kbdev)
 {
 	struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
 
-	cancel_delayed_work_sync(&fw_log->poll_work);
-	kfree(fw_log->dump_buf);
+	if (fw_log->dump_buf) {
+		cancel_delayed_work_sync(&fw_log->poll_work);
+		kfree(fw_log->dump_buf);
+		fw_log->dump_buf = NULL;
+	}
 }
 
 void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
@@ -262,7 +361,7 @@ void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
 	u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf;
 	unsigned int read_size, remaining_size;
 	struct firmware_trace_buffer *tb =
-		kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
 
 	if (tb == NULL) {
 		dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
@@ -415,7 +514,7 @@ int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32
 
 	/* Wait for the MCU to get disabled */
 	dev_info(kbdev->dev, "Wait for the MCU to get disabled");
-	ret = kbase_pm_wait_for_desired_state(kbdev);
+	ret = kbase_pm_killable_wait_for_desired_state(kbdev);
 	if (ret) {
 		dev_err(kbdev->dev,
 			"wait for PM state failed when toggling FW logging calls");
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 514492c..764c18d 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -936,7 +936,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
 	kbase_pm_update_state(kbdev);
 }
 
-static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms, u32 *modifier)
 {
 #define HYSTERESIS_VAL_UNIT_SHIFT (10)
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
@@ -963,6 +963,8 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
 	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
 	dur_val = div_u64(dur_val, 1000);
 
+	*modifier = 0;
+
 	/* Interface limits the value field to S32_MAX */
 	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
 
@@ -984,7 +986,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 	u32 dur;
 
 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
-	dur = kbdev->csf.gpu_idle_hysteresis_us;
+	dur = kbdev->csf.gpu_idle_hysteresis_ns;
 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
 
 	return dur;
@@ -993,7 +995,9 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
 u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
 {
 	unsigned long flags;
-	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+	u32 modifier = 0;
+
+	const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur, &modifier);
 
 	/* The 'fw_load_lock' is taken to synchronize against the deferred
 	 * loading of FW, where the idle timer will be enabled.
@@ -1001,19 +1005,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 	mutex_lock(&kbdev->fw_load_lock);
 	if (unlikely(!kbdev->csf.firmware_inited)) {
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
-		kbdev->csf.gpu_idle_hysteresis_us = dur;
+		kbdev->csf.gpu_idle_hysteresis_ns = dur;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbdev->csf.gpu_idle_dur_count_modifier = modifier;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		mutex_unlock(&kbdev->fw_load_lock);
 		goto end;
 	}
 	mutex_unlock(&kbdev->fw_load_lock);
 
+	if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
+		dev_warn(kbdev->dev,
+			 "Failed to prevent GPU reset when updating idle_hysteresis_time");
+		return kbdev->csf.gpu_idle_dur_count;
+	}
+
 	kbase_csf_scheduler_pm_active(kbdev);
-	if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+	if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) {
 		dev_err(kbdev->dev,
 			"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
 		kbase_csf_scheduler_pm_idle(kbdev);
+		kbase_reset_gpu_allow(kbdev);
+
 		return kbdev->csf.gpu_idle_dur_count;
 	}
 
@@ -1041,6 +1054,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
 		kbdev->csf.gpu_idle_hysteresis_us = dur;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbdev->csf.gpu_idle_dur_count_modifier = modifier;
 		kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 		wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
@@ -1052,6 +1066,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
 		kbdev->csf.gpu_idle_hysteresis_us = dur;
 		kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+		kbdev->csf.gpu_idle_dur_count_modifier = modifier;
 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
 	}
 	kbase_csf_scheduler_unlock(kbdev);
@@ -1060,7 +1075,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
 #endif
 
 	kbase_csf_scheduler_pm_idle(kbdev);
-
+	kbase_reset_gpu_allow(kbdev);
 end:
 	dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
 		hysteresis_val);
@@ -1068,7 +1083,8 @@ end:
 	return hysteresis_val;
 }
 
-static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us,
+					    u32 *modifier)
 {
 	/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
 	u64 freq = arch_timer_get_cntfrq();
@@ -1094,6 +1110,8 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
 	dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
 	dur_val = div_u64(dur_val, 1000000);
 
+	*modifier = 0;
+
 	/* Interface limits the value field to S32_MAX */
 	cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
 
@@ -1115,7 +1133,7 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+	pwroff = kbdev->csf.mcu_core_pwroff_dur_ns;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	return pwroff;
@@ -1124,11 +1142,14 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
 u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
 {
 	unsigned long flags;
-	const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur);
+	u32 modifier = 0;
+
+	const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur, &modifier);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
-	kbdev->csf.mcu_core_pwroff_dur_us = dur;
+	kbdev->csf.mcu_core_pwroff_dur_ns = dur;
 	kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
+	kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
@@ -1136,6 +1157,11 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
 	return pwroff;
 }
 
+u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev)
+{
+	return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS);
+}
+
 int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 {
 	init_waitqueue_head(&kbdev->csf.event_wait);
@@ -1144,6 +1170,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	kbdev->csf.fw_timeout_ms =
 		kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
 
+	kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_config);
 	INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
@@ -1153,25 +1180,30 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
 	INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
 
 	mutex_init(&kbdev->csf.reg_lock);
+	kbase_csf_pending_gpuq_kicks_init(kbdev);
 
 	return 0;
 }
 
 void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
 {
+	kbase_csf_pending_gpuq_kicks_term(kbdev);
 	mutex_destroy(&kbdev->csf.reg_lock);
 }
 
 int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
 {
-	kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
+	u32 modifier = 0;
+
+	kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS;
 #ifdef KBASE_PM_RUNTIME
 	if (kbase_pm_gpu_sleep_allowed(kbdev))
-		kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+		kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
 #endif
-	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
+	WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns);
 	kbdev->csf.gpu_idle_dur_count =
-		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
+		convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier);
+	kbdev->csf.gpu_idle_dur_count_modifier = modifier;
 
 	return 0;
 }
@@ -1254,10 +1286,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
 
 	/* NO_MALI: Don't stop firmware or unload MMU tables */
 
-	kbase_csf_scheduler_term(kbdev);
-
 	kbase_csf_free_dummy_user_reg_page(kbdev);
 
+	kbase_csf_scheduler_term(kbdev);
+
 	kbase_csf_doorbell_mapping_term(kbdev);
 
 	free_global_iface(kbdev);
@@ -1604,8 +1636,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 	if (!cpu_addr)
 		goto vmap_error;
 
-	va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
-					 KBASE_REG_ZONE_MCU_SHARED);
+	va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages);
 	if (!va_reg)
 		goto va_region_alloc_error;
 
@@ -1621,7 +1652,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
 					      &phys[0], num_pages, gpu_map_properties,
-					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
+					      KBASE_MEM_GROUP_CSF_FW, NULL, NULL);
 	if (ret)
 		goto mmu_insert_pages_error;
 
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 6cb6733..08d82d2 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -24,7 +24,9 @@
 #include <mali_kbase_ctx_sched.h>
 #include "device/mali_kbase_device.h"
 #include "mali_kbase_csf.h"
+#include "mali_kbase_csf_sync_debugfs.h"
 #include <linux/export.h>
+#include <linux/version_compat_defs.h>
 
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 #include "mali_kbase_fence.h"
@@ -679,7 +681,7 @@ static int kbase_csf_queue_group_suspend_prepare(
 		struct tagged_addr *page_array;
 		u64 start, end, i;
 
-		if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
+		if ((kbase_bits_to_zone(reg->flags) != SAME_VA_ZONE) ||
 		    (kbase_reg_current_backed_size(reg) < nr_pages) ||
 		    !(reg->flags & KBASE_REG_CPU_WR) ||
 		    (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
@@ -1343,6 +1345,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
 	/* Fence gets signaled. Deactivate the timer for fence-wait timeout */
 	del_timer(&kcpu_queue->fence_timeout);
 #endif
+
 	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
 				  fence->context, fence->seqno);
 
@@ -1445,14 +1448,14 @@ static void fence_timeout_callback(struct timer_list *timer)
 }
 
 /**
- * fence_timeout_start() - Start a timer to check fence-wait timeout
+ * fence_wait_timeout_start() - Start a timer to check fence-wait timeout
  *
  * @cmd: KCPU command queue
  *
  * Activate a timer to check whether a fence-wait command in the queue
  * gets completed  within FENCE_WAIT_TIMEOUT_MS
  */
-static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd)
+static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd)
 {
 	mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS));
 }
@@ -1489,18 +1492,20 @@ static int kbase_kcpu_fence_wait_process(
 	if (kcpu_queue->fence_wait_processed) {
 		fence_status = dma_fence_get_status(fence);
 	} else {
-		int cb_err = dma_fence_add_callback(fence,
+		int cb_err;
+
+		KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_START, kcpu_queue,
+					  fence->context, fence->seqno);
+
+		cb_err = dma_fence_add_callback(fence,
 			&fence_info->fence_cb,
 			kbase_csf_fence_wait_callback);
 
-		KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev,
-					  KCPU_FENCE_WAIT_START, kcpu_queue,
-					  fence->context, fence->seqno);
 		fence_status = cb_err;
 		if (cb_err == 0) {
 			kcpu_queue->fence_wait_processed = true;
 #ifdef CONFIG_MALI_FENCE_DEBUG
-			fence_timeout_start(kcpu_queue);
+			fence_wait_timeout_start(kcpu_queue);
 #endif
 		} else if (cb_err == -ENOENT) {
 			fence_status = dma_fence_get_status(fence);
@@ -1512,14 +1517,12 @@ static int kbase_kcpu_fence_wait_process(
 					 "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u",
 					 info.name, kctx->tgid, kctx->id, kcpu_queue->id);
 			}
-			/*
-			 * At this point the fence in question is already signalled without
-			 * any error. Its useful to print a FENCE_WAIT_END trace here to
-			 * indicate completion.
-			 */
-			KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev,
-				KCPU_FENCE_WAIT_END, kcpu_queue,
-				fence->context, fence->seqno);
+
+			KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
+						  fence->context, fence->seqno);
+		} else {
+			KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
+						  fence->context, fence->seqno);
 		}
 	}
 
@@ -1565,12 +1568,193 @@ static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_q
 	return 0;
 }
 
+/**
+ * fence_signal_timeout_start() - Start a timer to check enqueued fence-signal command is
+ *                                blocked for too long a duration
+ *
+ * @kcpu_queue: KCPU command queue
+ *
+ * Activate the queue's fence_signal_timeout timer to check whether a fence-signal command
+ * enqueued has been blocked for longer than a configured wait duration.
+ */
+static void fence_signal_timeout_start(struct kbase_kcpu_command_queue *kcpu_queue)
+{
+	struct kbase_device *kbdev = kcpu_queue->kctx->kbdev;
+	unsigned int wait_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT);
+
+	if (atomic_read(&kbdev->fence_signal_timeout_enabled))
+		mod_timer(&kcpu_queue->fence_signal_timeout, jiffies + msecs_to_jiffies(wait_ms));
+}
+
+static void kbase_kcpu_command_fence_force_signaled_set(
+		struct kbase_kcpu_command_fence_info *fence_info,
+		bool has_force_signaled)
+{
+	fence_info->fence_has_force_signaled = has_force_signaled;
+}
+
+bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info)
+{
+	return fence_info->fence_has_force_signaled;
+}
+
+static int kbase_kcpu_fence_force_signal_process(
+		struct kbase_kcpu_command_queue *kcpu_queue,
+		struct kbase_kcpu_command_fence_info *fence_info)
+{
+	struct kbase_context *const kctx = kcpu_queue->kctx;
+	int ret;
+
+	/* already force signaled just return*/
+	if (kbase_kcpu_command_fence_has_force_signaled(fence_info))
+		return 0;
+
+	if (WARN_ON(!fence_info->fence))
+		return -EINVAL;
+
+	ret = dma_fence_signal(fence_info->fence);
+	if (unlikely(ret < 0)) {
+		dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret);
+		/* Treated as a success */
+		ret = 0;
+	}
+
+	KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue,
+				fence_info->fence->context,
+				fence_info->fence->seqno);
+
+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+	dev_info(kctx->kbdev->dev,
+			"ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%u\n",
+			kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence,
+			fence_info->fence->context, fence_info->fence->seqno);
+#else
+	dev_info(kctx->kbdev->dev,
+			"ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%llu\n",
+			kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence,
+			fence_info->fence->context, fence_info->fence->seqno);
+#endif
+
+	/* dma_fence refcount needs to be decreased to release it. */
+	dma_fence_put(fence_info->fence);
+	fence_info->fence = NULL;
+
+	return ret;
+}
+
+static void kcpu_force_signal_fence(struct kbase_kcpu_command_queue *kcpu_queue)
+{
+	int status;
+	int i;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct kbase_context *const kctx = kcpu_queue->kctx;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	int del;
+#endif
+
+	/* Force trigger all pending fence-signal commands */
+	for (i = 0; i != kcpu_queue->num_pending_cmds; ++i) {
+		struct kbase_kcpu_command *cmd =
+			&kcpu_queue->commands[(u8)(kcpu_queue->start_offset + i)];
+
+		if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) {
+			/* If a fence had already force-signalled previously,
+			 * just skip it in this round of force signalling.
+			 */
+			if (kbase_kcpu_command_fence_has_force_signaled(&cmd->info.fence))
+				continue;
+
+			fence = kbase_fence_get(&cmd->info.fence);
+
+			dev_info(kctx->kbdev->dev, "kbase KCPU[%pK] cmd%d fence[%pK] force signaled\n",
+					kcpu_queue, i+1, fence);
+
+			/* set ETIMEDOUT error flag before signal the fence*/
+			dma_fence_set_error_helper(fence, -ETIMEDOUT);
+
+			/* force signal fence */
+			status = kbase_kcpu_fence_force_signal_process(
+					kcpu_queue, &cmd->info.fence);
+			if (status < 0)
+				dev_err(kctx->kbdev->dev, "kbase signal failed\n");
+			else
+				kbase_kcpu_command_fence_force_signaled_set(&cmd->info.fence, true);
+
+			kcpu_queue->has_error = true;
+		}
+	}
+
+	/* set fence_signal_pending_cnt to 0
+	 * and del_timer of the kcpu_queue
+	 * because we signaled all the pending fence in the queue
+	 */
+	atomic_set(&kcpu_queue->fence_signal_pending_cnt, 0);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	del = del_timer_sync(&kcpu_queue->fence_signal_timeout);
+	dev_info(kctx->kbdev->dev, "kbase KCPU [%pK] delete fence signal timeout timer ret: %d",
+			kcpu_queue, del);
+#else
+	del_timer_sync(&kcpu_queue->fence_signal_timeout);
+#endif
+}
+
+static void kcpu_queue_force_fence_signal(struct kbase_kcpu_command_queue *kcpu_queue)
+{
+	struct kbase_context *const kctx = kcpu_queue->kctx;
+	char buff[] = "surfaceflinger";
+
+	/* Force signal unsignaled fence expect surfaceflinger */
+	if (memcmp(kctx->comm, buff, sizeof(buff))) {
+		mutex_lock(&kcpu_queue->lock);
+		kcpu_force_signal_fence(kcpu_queue);
+		mutex_unlock(&kcpu_queue->lock);
+	}
+}
+
+/**
+ * fence_signal_timeout_cb() - Timeout callback function for fence-signal-wait
+ *
+ * @timer: Timer struct
+ *
+ * Callback function on an enqueued fence signal command has expired on its configured wait
+ * duration. At the moment it's just a simple place-holder for other tasks to expand on actual
+ * sync state dump via a bottom-half workqueue item.
+ */
+static void fence_signal_timeout_cb(struct timer_list *timer)
+{
+	struct kbase_kcpu_command_queue *kcpu_queue =
+		container_of(timer, struct kbase_kcpu_command_queue, fence_signal_timeout);
+	struct kbase_context *const kctx = kcpu_queue->kctx;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+	dev_warn(kctx->kbdev->dev, "kbase KCPU fence signal timeout callback triggered");
+#endif
+
+	/* If we have additional pending fence signal commands in the queue, re-arm for the
+	 * remaining fence signal commands, and dump the work to dmesg, only if the
+	 * global configuration option is set.
+	 */
+	if (atomic_read(&kctx->kbdev->fence_signal_timeout_enabled)) {
+		if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1)
+			fence_signal_timeout_start(kcpu_queue);
+
+		kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->timeout_work);
+	}
+}
+
 static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
 					    struct kbase_kcpu_command_fence_info *fence_info)
 {
 	struct kbase_context *const kctx = kcpu_queue->kctx;
 	int ret;
 
+	/* already force signaled */
+	if (kbase_kcpu_command_fence_has_force_signaled(fence_info))
+		return 0;
+
 	if (WARN_ON(!fence_info->fence))
 		return -EINVAL;
 
@@ -1586,6 +1770,25 @@ static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcp
 				  fence_info->fence->context,
 				  fence_info->fence->seqno);
 
+	/* If one has multiple enqueued fence signal commands, re-arm the timer */
+	if (atomic_dec_return(&kcpu_queue->fence_signal_pending_cnt) > 0) {
+		fence_signal_timeout_start(kcpu_queue);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		dev_dbg(kctx->kbdev->dev,
+			"kbase re-arm KCPU fence signal timeout timer for next signal command");
+#endif
+	} else {
+#ifdef CONFIG_MALI_FENCE_DEBUG
+		int del = del_timer_sync(&kcpu_queue->fence_signal_timeout);
+
+		dev_dbg(kctx->kbdev->dev, "kbase KCPU delete fence signal timeout timer ret: %d",
+			del);
+		CSTD_UNUSED(del);
+#else
+		del_timer_sync(&kcpu_queue->fence_signal_timeout);
+#endif
+	}
+
 	/* dma_fence refcount needs to be decreased to release it. */
 	kbase_fence_put(fence_info->fence);
 	fence_info->fence = NULL;
@@ -1614,6 +1817,10 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
 	/* Set reference to KCPU metadata */
 	kcpu_fence->metadata = kcpu_queue->metadata;
 
+	/* Set reference to KCPU metadata and increment refcount */
+	kcpu_fence->metadata = kcpu_queue->metadata;
+	WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
+
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 	fence_out = (struct fence *)kcpu_fence;
 #else
@@ -1635,8 +1842,6 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
 	dma_fence_get(fence_out);
 #endif
 
-	WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
-
 	/* create a sync_file fd representing the fence */
 	*sync_file = sync_file_create(fence_out);
 	if (!(*sync_file)) {
@@ -1654,6 +1859,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
 
 	current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL;
 	current_command->info.fence.fence = fence_out;
+	kbase_kcpu_command_fence_force_signaled_set(&current_command->info.fence, false);
 
 	return 0;
 
@@ -1700,6 +1906,10 @@ static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu
 	 * before returning success.
 	 */
 	fd_install(fd, sync_file->file);
+
+	if (atomic_inc_return(&kcpu_queue->fence_signal_pending_cnt) == 1)
+		fence_signal_timeout_start(kcpu_queue);
+
 	return 0;
 
 fail:
@@ -1732,6 +1942,90 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
 KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init);
 #endif /* CONFIG_SYNC_FILE */
 
+static void kcpu_queue_dump(struct kbase_kcpu_command_queue *queue)
+{
+	struct kbase_context *kctx = queue->kctx;
+	struct kbase_kcpu_command *cmd;
+	struct kbase_kcpu_command_fence_info *fence_info;
+	struct kbase_kcpu_dma_fence *kcpu_fence;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+	struct fence *fence;
+#else
+	struct dma_fence *fence;
+#endif
+	struct kbase_sync_fence_info info;
+	size_t i;
+
+	mutex_lock(&queue->lock);
+
+	/* Find the next fence signal command in the queue */
+	for (i = 0; i != queue->num_pending_cmds; ++i) {
+		cmd = &queue->commands[(u8)(queue->start_offset + i)];
+		if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) {
+			fence_info = &cmd->info.fence;
+			/* find the first unforce signaled fence */
+			if (!kbase_kcpu_command_fence_has_force_signaled(fence_info))
+				break;
+		}
+	}
+
+	if (i == queue->num_pending_cmds) {
+		dev_err(kctx->kbdev->dev,
+			"%s: No fence signal command found in ctx:%d_%d kcpu queue:%u", __func__,
+			kctx->tgid, kctx->id, queue->id);
+		mutex_unlock(&queue->lock);
+		return;
+	}
+
+
+	fence = kbase_fence_get(fence_info);
+	if (!fence) {
+		dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid,
+			kctx->id, queue->id);
+		mutex_unlock(&queue->lock);
+		return;
+	}
+
+	kcpu_fence = kbase_kcpu_dma_fence_get(fence);
+	if (!kcpu_fence) {
+		dev_err(kctx->kbdev->dev, "no fence metadata found in ctx:%d_%d kcpu queue:%u",
+			kctx->tgid, kctx->id, queue->id);
+		kbase_fence_put(fence);
+		mutex_unlock(&queue->lock);
+		return;
+	}
+
+	kbase_sync_fence_info_get(fence, &info);
+
+	dev_warn(kctx->kbdev->dev, "------------------------------------------------\n");
+	dev_warn(kctx->kbdev->dev, "KCPU Fence signal timeout detected for ctx:%d_%d\n", kctx->tgid,
+		 kctx->id);
+	dev_warn(kctx->kbdev->dev, "------------------------------------------------\n");
+	dev_warn(kctx->kbdev->dev, "Kcpu queue:%u still waiting for fence[%pK] context#seqno:%s\n",
+		 queue->id, fence, info.name);
+	dev_warn(kctx->kbdev->dev, "Fence metadata timeline name: %s\n",
+		 kcpu_fence->metadata->timeline_name);
+
+	kbase_fence_put(fence);
+	mutex_unlock(&queue->lock);
+
+	mutex_lock(&kctx->csf.kcpu_queues.lock);
+	kbasep_csf_sync_kcpu_dump_locked(kctx, NULL);
+	mutex_unlock(&kctx->csf.kcpu_queues.lock);
+
+	dev_warn(kctx->kbdev->dev, "-----------------------------------------------\n");
+}
+
+static void kcpu_queue_timeout_worker(struct kthread_work *data)
+{
+	struct kbase_kcpu_command_queue *queue =
+		container_of(data, struct kbase_kcpu_command_queue, timeout_work);
+
+	kcpu_queue_dump(queue);
+
+	kcpu_queue_force_fence_signal(queue);
+}
+
 static void kcpu_queue_process_worker(struct kthread_work *data)
 {
 	struct kbase_kcpu_command_queue *queue = container_of(data,
@@ -2087,6 +2381,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
 				status = kbase_csf_queue_group_suspend_process(
 					queue->kctx, sus_buf,
 					cmd->info.suspend_buf_copy.group_handle);
+
 				if (status)
 					queue->has_error = true;
 
@@ -2579,6 +2874,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 	INIT_LIST_HEAD(&queue->jit_blocked);
 	queue->has_error = false;
 	kthread_init_work(&queue->work, kcpu_queue_process_worker);
+	kthread_init_work(&queue->timeout_work, kcpu_queue_timeout_worker);
 	queue->id = idx;
 
 	newq->id = idx;
@@ -2594,9 +2890,96 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
 #ifdef CONFIG_MALI_FENCE_DEBUG
 	kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
 #endif
+
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+	atomic_set(&queue->fence_signal_pending_cnt, 0);
+	kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb);
+#endif
 out:
 	mutex_unlock(&kctx->csf.kcpu_queues.lock);
 
 	return ret;
 }
 KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new);
+
+int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+		unsigned long queue_idx;
+		struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues;
+
+		mutex_lock(&kcpu_ctx->lock);
+
+		for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) {
+			struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx];
+
+			if (unlikely(!kcpu_queue))
+				continue;
+
+			mutex_lock(&kcpu_queue->lock);
+
+			if (atomic_read(&kcpu_queue->fence_signal_pending_cnt)) {
+				int ret = del_timer_sync(&kcpu_queue->fence_signal_timeout);
+
+				dev_dbg(kbdev->dev,
+					"Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend",
+					queue_idx, kctx->tgid, kctx->id,
+					ret ? "pending" : "not pending");
+			}
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+			if (kcpu_queue->fence_wait_processed) {
+				int ret = del_timer_sync(&kcpu_queue->fence_timeout);
+
+				dev_dbg(kbdev->dev,
+					"Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend",
+					queue_idx, kctx->tgid, kctx->id,
+					ret ? "pending" : "not pending");
+			}
+#endif
+			mutex_unlock(&kcpu_queue->lock);
+		}
+		mutex_unlock(&kcpu_ctx->lock);
+	}
+	return 0;
+}
+
+void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev)
+{
+	struct kbase_context *kctx;
+
+	list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+		unsigned long queue_idx;
+		struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues;
+
+		mutex_lock(&kcpu_ctx->lock);
+
+		for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) {
+			struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx];
+
+			if (unlikely(!kcpu_queue))
+				continue;
+
+			mutex_lock(&kcpu_queue->lock);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+			if (kcpu_queue->fence_wait_processed) {
+				fence_wait_timeout_start(kcpu_queue);
+				dev_dbg(kbdev->dev,
+					"Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume",
+					queue_idx, kctx->tgid, kctx->id);
+			}
+#endif
+			if (atomic_read(&kbdev->fence_signal_timeout_enabled) &&
+			    atomic_read(&kcpu_queue->fence_signal_pending_cnt)) {
+				fence_signal_timeout_start(kcpu_queue);
+				dev_dbg(kbdev->dev,
+					"Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume",
+					queue_idx, kctx->tgid, kctx->id);
+			}
+			mutex_unlock(&kcpu_queue->lock);
+		}
+		mutex_unlock(&kcpu_ctx->lock);
+	}
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 41c6e07..4a8d937 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -53,6 +53,7 @@ struct kbase_kcpu_command_import_info {
  * @fence_cb:      Fence callback
  * @fence:         Fence
  * @kcpu_queue:    kcpu command queue
+ * @fence_has_force_signaled:	fence has forced signaled after fence timeouted
  */
 struct kbase_kcpu_command_fence_info {
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
@@ -63,6 +64,7 @@ struct kbase_kcpu_command_fence_info {
 	struct dma_fence *fence;
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
 	struct kbase_kcpu_command_queue *kcpu_queue;
+	bool fence_has_force_signaled;
 };
 
 /**
@@ -249,10 +251,13 @@ struct kbase_kcpu_command {
  *				enqueued to this command queue.
  * @csf_kcpu_worker:		Dedicated worker for processing kernel CPU command
  *				queues.
- * @work:			struct work_struct which contains a pointer to
+ * @work:			struct kthread_work which contains a pointer to
  *				the function which handles processing of kcpu
  *				commands enqueued into a kcpu command queue;
  *				part of kernel API for processing workqueues
+ * @timeout_work:		struct kthread_work which contains a pointer to the
+ *				function which handles post-timeout actions
+ *				queue when a fence signal timeout occurs.
  * @start_offset:		Index of the command to be executed next
  * @id:				KCPU command queue ID.
  * @num_pending_cmds:		The number of commands enqueued but not yet
@@ -284,6 +289,9 @@ struct kbase_kcpu_command {
  * @fence_timeout:		Timer used to detect the fence wait timeout.
  * @metadata:                   Metadata structure containing basic information about
  *                              this queue for any fence objects associated with this queue.
+ * @fence_signal_timeout:	Timer used for detect a fence signal command has
+ *				been blocked for too long.
+ * @fence_signal_pending_cnt:	Enqueued fence signal commands in the queue.
  */
 struct kbase_kcpu_command_queue {
 	struct mutex lock;
@@ -291,6 +299,7 @@ struct kbase_kcpu_command_queue {
 	struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
 	struct kthread_worker csf_kcpu_worker;
 	struct kthread_work work;
+	struct kthread_work timeout_work;
 	u8 start_offset;
 	u8 id;
 	u16 num_pending_cmds;
@@ -308,6 +317,8 @@ struct kbase_kcpu_command_queue {
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 	struct kbase_kcpu_dma_fence_meta *metadata;
 #endif /* CONFIG_SYNC_FILE */
+	struct timer_list fence_signal_timeout;
+	atomic_t fence_signal_pending_cnt;
 };
 
 /**
@@ -382,4 +393,32 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
 				 struct base_fence *fence, struct sync_file **sync_file, int *fd);
 #endif /* CONFIG_SYNC_FILE */
 
+/*
+ * kbase_csf_kcpu_queue_halt_timers - Halt the KCPU fence timers associated with
+ *                                    the kbase device.
+ *
+ * @kbdev: Kbase device
+ *
+ * Note that this function assumes that the caller has ensured that the
+ * kbase_device::kctx_list does not get updated during this function's runtime.
+ * At the moment, the function is only safe to call during system suspend, when
+ * the device PM active count has reached zero.
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev);
+
+/*
+ * kbase_csf_kcpu_queue_resume_timers - Resume the KCPU fence timers associated
+ *                                      with the kbase device.
+ *
+ * @kbdev: Kbase device
+ *
+ * Note that this function assumes that the caller has ensured that the
+ * kbase_device::kctx_list does not get updated during this function's runtime.
+ * At the moment, the function is only safe to call during system resume.
+ */
+void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev);
+
+bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info);
 #endif /* _KBASE_CSF_KCPU_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c
new file mode 100644
index 0000000..cd55f62
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+#include <linux/debugfs.h>
+#endif
+
+#include <mali_kbase.h>
+#include <csf/mali_kbase_csf_kcpu_fence_debugfs.h>
+#include <mali_kbase_hwaccess_time.h>
+
+#define BUF_SIZE 10
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_get(struct file *file, char __user *buf,
+							     size_t count, loff_t *ppos)
+{
+	int ret;
+	struct kbase_device *kbdev = file->private_data;
+
+	if (atomic_read(&kbdev->fence_signal_timeout_enabled))
+		ret = simple_read_from_buffer(buf, count, ppos, "1\n", 2);
+	else
+		ret = simple_read_from_buffer(buf, count, ppos, "0\n", 2);
+
+	return ret;
+};
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_set(struct file *file,
+							     const char __user *buf, size_t count,
+							     loff_t *ppos)
+{
+	int ret;
+	unsigned int enabled;
+	struct kbase_device *kbdev = file->private_data;
+
+	ret = kstrtouint_from_user(buf, count, 10, &enabled);
+	if (ret < 0)
+		return ret;
+
+	atomic_set(&kbdev->fence_signal_timeout_enabled, enabled);
+
+	return count;
+}
+
+static const struct file_operations kbase_csf_kcpu_queue_fence_signal_fops = {
+	.owner = THIS_MODULE,
+	.read = kbase_csf_kcpu_queue_fence_signal_enabled_get,
+	.write = kbase_csf_kcpu_queue_fence_signal_enabled_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_get(struct file *file, char __user *buf,
+							     size_t count, loff_t *ppos)
+{
+	int size;
+	char buffer[BUF_SIZE];
+	struct kbase_device *kbdev = file->private_data;
+	unsigned int timeout_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT);
+
+	size = scnprintf(buffer, sizeof(buffer), "%u\n", timeout_ms);
+	return simple_read_from_buffer(buf, count, ppos, buffer, size);
+}
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_set(struct file *file,
+							     const char __user *buf, size_t count,
+							     loff_t *ppos)
+{
+	int ret;
+	unsigned int timeout_ms;
+	struct kbase_device *kbdev = file->private_data;
+
+	ret = kstrtouint_from_user(buf, count, 10, &timeout_ms);
+	if (ret < 0)
+		return ret;
+
+	/* The timeout passed by the user is bounded when trying to insert it into
+	 * the precomputed timeout table, so we don't need to do any more validation
+	 * before-hand.
+	 */
+	kbase_device_set_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT, timeout_ms);
+
+	return count;
+}
+
+static const struct file_operations kbase_csf_kcpu_queue_fence_signal_timeout_fops = {
+	.owner = THIS_MODULE,
+	.read = kbase_csf_kcpu_queue_fence_signal_timeout_get,
+	.write = kbase_csf_kcpu_queue_fence_signal_timeout_set,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev)
+{
+	struct dentry *file;
+	const mode_t mode = 0644;
+
+	if (WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
+		return -1;
+
+	file = debugfs_create_file("fence_signal_timeout_enable", mode,
+				   kbdev->mali_debugfs_directory, kbdev,
+				   &kbase_csf_kcpu_queue_fence_signal_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kbdev->dev, "Unable to create fence signal timer toggle entry");
+		return -1;
+	}
+
+	file = debugfs_create_file("fence_signal_timeout_ms", mode, kbdev->mali_debugfs_directory,
+				   kbdev, &kbase_csf_kcpu_queue_fence_signal_timeout_fops);
+
+	if (IS_ERR_OR_NULL(file)) {
+		dev_warn(kbdev->dev, "Unable to create fence signal timeout entry");
+		return -1;
+	}
+	return 0;
+}
+
+#else
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev)
+{
+	return 0;
+}
+
+#endif
+void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev)
+{
+}
diff --git a/mali_kbase/mali_kbase_bits.h b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.h
index a085fd8..e3799fb 100644
--- a/mali_kbase/mali_kbase_bits.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -18,14 +18,25 @@
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
  */
+#ifndef _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_
+#define _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_
 
-#ifndef _KBASE_BITS_H_
-#define _KBASE_BITS_H_
+struct kbase_device;
 
-#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
-#include <linux/bits.h>
-#else
-#include <linux/bitops.h>
-#endif
+/*
+ * kbase_csf_fence_timer_debugfs_init - Initialize fence signal timeout debugfs
+ *                                      entries.
+ * @kbdev: Kbase device.
+ *
+ * Return: 0 on success, -1 on failure.
+ */
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev);
+
+/*
+ * kbase_csf_fence_timer_debugfs_term - Terminate fence signal timeout debugfs
+ *                                      entries.
+ * @kbdev: Kbase device.
+ */
+void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev);
 
-#endif /* _KBASE_BITS_H_ */
+#endif /* _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
index bb5a092..863cf10 100644
--- a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -83,7 +83,7 @@ static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev)
 
 static void set_page_meta_status_not_movable(struct tagged_addr phy)
 {
-	if (kbase_page_migration_enabled) {
+	if (kbase_is_page_migration_enabled()) {
 		struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy));
 
 		if (page_md) {
@@ -117,7 +117,7 @@ static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 n
 
 	return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
 				      nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
-				      mmu_sync_info, NULL, false);
+				      mmu_sync_info, NULL);
 }
 
 /* Reset consecutive retry count to zero */
@@ -613,8 +613,7 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
 	int err, i;
 
 	INIT_LIST_HEAD(&csg_reg->link);
-	reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
-				      KBASE_REG_ZONE_MCU_SHARED);
+	reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, nr_csg_reg_pages);
 
 	if (!reg) {
 		dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n",
@@ -667,18 +666,19 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
 fail_userio_pages_map_fail:
 	while (i-- > 0) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
-		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
-					 MCU_AS_NR, true);
+		kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn,
+						  shared_regs->dummy_phys,
+						  KBASEP_NUM_CS_USER_IO_PAGES,
+						  KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR);
 	}
 
 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);
 fail_pmod_map_fail:
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);
 fail_susp_map_fail:
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
@@ -701,17 +701,18 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
 
 	for (i = 0; i < nr_csis; i++) {
 		vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
-		kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-					 KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
-					 MCU_AS_NR, true);
+		kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn,
+						  shared_regs->dummy_phys,
+						  KBASEP_NUM_CS_USER_IO_PAGES,
+						  KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR);
 	}
 
 	vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);
 	vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
-	kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
-				 nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+	kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+					  nr_susp_pages, nr_susp_pages, MCU_AS_NR);
 
 	mutex_lock(&kbdev->csf.reg_lock);
 	kbase_remove_va_region(kbdev, reg);
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index b5bf7bb..b5ca885 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -143,12 +143,15 @@
 #define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
 #define CSG_DB_REQ 0x0008 /* () Global doorbell request */
 #define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */
+
+
 #define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */
 #define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */
 #define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */
 #define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */
 #define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */
-#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */
+#define CSG_EP_REQ_LO 0x0034 /* () Maximum number of endpoints allowed, low word */
+#define CSG_EP_REQ_HI 0x0038 /* () Maximum number of endpoints allowed, high word */
 #define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */
 #define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */
 #define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */
@@ -645,6 +648,7 @@
 	(((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) |  \
 	 (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK))
 
+
 /* CS_STATUS_WAIT_SYNC_POINTER register */
 #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0
 #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK                                                   \
@@ -953,41 +957,46 @@
 
 /* CSG_EP_REQ register */
 #define CSG_EP_REQ_COMPUTE_EP_SHIFT 0
-#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_EP_REQ_COMPUTE_EP_MASK ((u64)0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT)
 #define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT)
-#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
-	(((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) |  \
-	 (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK))
+#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value)                                                  \
+	(((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) |                                               \
+	 ((((u64)value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK))
 #define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8
-#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_EP_REQ_FRAGMENT_EP_MASK ((u64)0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT)
 #define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT)
-#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
-	(((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) |  \
-	 (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK))
+#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value)                                                 \
+	(((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) |                                              \
+	 ((((u64)value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK))
 #define CSG_EP_REQ_TILER_EP_SHIFT 16
-#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT)
+#define CSG_EP_REQ_TILER_EP_MASK ((u64)0xF << CSG_EP_REQ_TILER_EP_SHIFT)
 #define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT)
-#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \
-	(((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK))
+#define CSG_EP_REQ_TILER_EP_SET(reg_val, value)                                                    \
+	(((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) |                                                 \
+	 ((((u64)value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK))
 #define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
 #define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
 	(((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
-	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) |  \
-	 (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value)                                           \
+	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) |                                        \
+	 ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) &                                   \
+	  CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
 #define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
 #define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
 	(((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
-	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) |  \
-	 (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value)                                          \
+	(((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) |                                       \
+	 ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) &                                  \
+	  CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
 #define CSG_EP_REQ_PRIORITY_SHIFT 28
-#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT)
+#define CSG_EP_REQ_PRIORITY_MASK ((u64)0xF << CSG_EP_REQ_PRIORITY_SHIFT)
 #define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT)
-#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \
-	(((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK))
+#define CSG_EP_REQ_PRIORITY_SET(reg_val, value)                                                    \
+	(((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) |                                                 \
+	 ((((u64)value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK))
+
 
 /* CSG_SUSPEND_BUF register */
 #define CSG_SUSPEND_BUF_POINTER_SHIFT 0
@@ -1096,6 +1105,7 @@
 	(((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) |  \
 	 (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK))
 
+
 /* CSG_STATUS_EP_REQ register */
 #define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0
 #define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
@@ -1133,6 +1143,7 @@
 	(((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) |  \
 	 (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
 
+
 /* End of CSG_OUTPUT_BLOCK register set definitions */
 
 /* STREAM_CONTROL_BLOCK register set definitions */
@@ -1481,6 +1492,20 @@
 #define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
 /* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */
 
+/* GLB_PWROFF_TIMER_CONFIG register */
+#ifndef GLB_PWROFF_TIMER_CONFIG
+#define GLB_PWROFF_TIMER_CONFIG 0x0088 /* () Configuration fields for GLB_PWROFF_TIMER */
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT 0
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_GET(reg_val)         \
+	(((reg_val)&GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) >> \
+	 GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) | \
+	 (((value) << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \
+	  GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK))
+#endif /* End of GLB_PWROFF_TIMER_CONFIG values */
+
 /* GLB_ALLOC_EN register */
 #define GLB_ALLOC_EN_MASK_SHIFT 0
 #define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT)
@@ -1546,6 +1571,20 @@
 #define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
 /* End of GLB_IDLE_TIMER_TIMER_SOURCE values */
 
+/* GLB_IDLE_TIMER_CONFIG values */
+#ifndef GLB_IDLE_TIMER_CONFIG
+#define GLB_IDLE_TIMER_CONFIG 0x0084 /* () Configuration fields for GLB_IDLE_TIMER */
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT 0
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_GET(reg_val)         \
+	(((reg_val)&GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) >> \
+	 GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value)    \
+	(((reg_val) & ~GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) | \
+	 (((value) << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \
+	  GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK))
+#endif /* End of GLB_IDLE_TIMER_CONFIG values */
+
 /* GLB_INSTR_FEATURES register */
 #define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0)
 #define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT)
@@ -1670,6 +1709,7 @@
 	(((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) |                                              \
 	 (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK))
 
+
 /* RUN_MODE values */
 #define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0
 #define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index d076f3d..b8ad3a4 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -196,7 +196,7 @@ static void kbase_csf_reset_begin_hw_access_sync(
 	 */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags);
 	kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);
-	atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING);
+	atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_HAPPENING);
 	kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags);
 }
@@ -257,14 +257,15 @@ void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)),
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)));
-	dev_err(kbdev->dev, "  JOB_IRQ_RAWSTAT=0x%08x   MMU_IRQ_RAWSTAT=0x%08x   GPU_FAULTSTATUS=0x%08x",
+	dev_err(kbdev->dev,
+		"  JOB_IRQ_RAWSTAT=0x%08x   MMU_IRQ_RAWSTAT=0x%08x   GPU_FAULTSTATUS=0x%08x",
 		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+		kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)),
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
 	dev_err(kbdev->dev, "  GPU_IRQ_MASK=0x%08x   JOB_IRQ_MASK=0x%08x   MMU_IRQ_MASK=0x%08x",
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
 		kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
-		kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
+		kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)));
 	dev_err(kbdev->dev, "  PWR_OVERRIDE0=0x%08x   PWR_OVERRIDE1=0x%08x",
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
 		kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
@@ -388,10 +389,12 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
 	rt_mutex_unlock(&kbdev->pm.lock);
 
 	if (err) {
+		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		if (!kbase_pm_l2_is_in_desired_state(kbdev))
 			ret = L2_ON_FAILED;
 		else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
 			ret = MCU_REINIT_FAILED;
+		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	}
 
 	return ret;
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index f21067f..2573e3f 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -19,6 +19,8 @@
  *
  */
 
+#include <linux/kthread.h>
+
 #include <mali_kbase.h>
 #include "mali_kbase_config_defaults.h"
 #include <mali_kbase_ctx_sched.h>
@@ -36,6 +38,11 @@
 #include "mali_kbase_csf_tiler_heap.h"
 #include "mali_kbase_csf_tiler_heap_reclaim.h"
 #include "mali_kbase_csf_mcu_shared_reg.h"
+#include <linux/version_compat_defs.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#include <csf/mali_kbase_csf_trace_buffer.h>
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 
 /* Value to indicate that a queue group is not groups_to_schedule list */
 #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -202,6 +209,222 @@ static bool queue_empty_or_blocked(struct kbase_queue *queue)
 }
 #endif
 
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/**
+ * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists,
+ *                          otherwise allocate and initialise one.
+ *
+ * @kctx: Pointer to the Kbase context.
+ *
+ * The GPU metrics context represents an "Application" for the purposes of GPU metrics
+ * reporting. There may be multiple kbase_contexts contributing data to a single GPU
+ * metrics context.
+ * This function takes a reference on GPU metrics context if it already exists
+ * corresponding to the Application that is creating the Kbase context, otherwise
+ * memory is allocated for it and initialised.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+static inline int gpu_metrics_ctx_init(struct kbase_context *kctx)
+{
+	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	int ret = 0;
+
+	const struct cred *cred = get_current_cred();
+	const unsigned int aid = cred->euid.val;
+
+	put_cred(cred);
+
+	/* Return early if this is not a Userspace created context */
+	if (unlikely(!kctx->kfile))
+		return 0;
+
+	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	rt_mutex_lock(&kbdev->csf.scheduler.lock);
+	gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid);
+	rt_mutex_unlock(&kbdev->csf.scheduler.lock);
+
+	if (!gpu_metrics_ctx) {
+		gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL);
+
+		if (gpu_metrics_ctx) {
+			rt_mutex_lock(&kbdev->csf.scheduler.lock);
+			kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid);
+			rt_mutex_unlock(&kbdev->csf.scheduler.lock);
+		} else {
+			dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed");
+			ret = -ENOMEM;
+		}
+	}
+
+	kctx->gpu_metrics_ctx = gpu_metrics_ctx;
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	return ret;
+}
+
+/**
+ * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it
+ *                          if the refcount becomes 0.
+ *
+ * @kctx: Pointer to the Kbase context.
+ */
+static inline void gpu_metrics_ctx_term(struct kbase_context *kctx)
+{
+	/* Return early if this is not a Userspace created context */
+	if (unlikely(!kctx->kfile))
+		return;
+
+	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
+	mutex_lock(&kctx->kbdev->kctx_list_lock);
+	rt_mutex_lock(&kctx->kbdev->csf.scheduler.lock);
+	kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx);
+	rt_mutex_unlock(&kctx->kbdev->csf.scheduler.lock);
+	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+}
+
+/**
+ * struct gpu_metrics_event - A GPU metrics event recorded in trace buffer.
+ *
+ * @csg_slot_act:  The 32bit data consisting of a GPU metrics event.
+ *                 5 bits[4:0] represents CSG slot number.
+ *                 1 bit [5]  represents the transition of the CSG group on the slot.
+ *                            '1' means idle->active whilst '0' does active->idle.
+ * @timestamp:     64bit timestamp consisting of a GPU metrics event.
+ *
+ * Note: It's packed and word-aligned as agreed layout with firmware.
+ */
+struct gpu_metrics_event {
+	u32 csg_slot_act;
+	u64 timestamp;
+} __packed __aligned(4);
+#define GPU_METRICS_EVENT_SIZE sizeof(struct gpu_metrics_event)
+
+#define GPU_METRICS_ACT_SHIFT 5
+#define GPU_METRICS_ACT_MASK (0x1 << GPU_METRICS_ACT_SHIFT)
+#define GPU_METRICS_ACT_GET(val) (((val)&GPU_METRICS_ACT_MASK) >> GPU_METRICS_ACT_SHIFT)
+
+#define GPU_METRICS_CSG_MASK 0x1f
+#define GPU_METRICS_CSG_GET(val) ((val)&GPU_METRICS_CSG_MASK)
+
+/**
+ * gpu_metrics_read_event() - Read a GPU metrics trace from trace buffer
+ *
+ * @kbdev:    Pointer to the device
+ * @kctx:     Kcontext that is derived from CSG slot field of a GPU metrics.
+ * @prev_act: Previous CSG activity transition in a GPU metrics.
+ * @cur_act:  Current CSG activity transition in a GPU metrics.
+ * @ts:       CSG activity transition timestamp in a GPU metrics.
+ *
+ * This function reads firmware trace buffer, named 'gpu_metrics' and
+ * parse one 12-byte data packet into following information.
+ * - The number of CSG slot on which CSG was transitioned to active or idle.
+ * - Activity transition (1: idle->active, 0: active->idle).
+ * - Timestamp in nanoseconds when the transition occurred.
+ *
+ * Return: true on success.
+ */
+static bool gpu_metrics_read_event(struct kbase_device *kbdev, struct kbase_context **kctx,
+				   bool *prev_act, bool *cur_act, uint64_t *ts)
+{
+	struct firmware_trace_buffer *tb = kbdev->csf.scheduler.gpu_metrics_tb;
+	struct gpu_metrics_event e;
+
+	if (kbase_csf_firmware_trace_buffer_read_data(tb, (u8 *)&e, GPU_METRICS_EVENT_SIZE) ==
+	    GPU_METRICS_EVENT_SIZE) {
+		const u8 slot = GPU_METRICS_CSG_GET(e.csg_slot_act);
+		struct kbase_queue_group *group =
+			kbdev->csf.scheduler.csg_slots[slot].resident_group;
+
+		if (unlikely(!group)) {
+			dev_err(kbdev->dev, "failed to find CSG group from CSG slot(%u)", slot);
+			return false;
+		}
+
+		*cur_act = GPU_METRICS_ACT_GET(e.csg_slot_act);
+		*ts = kbase_backend_time_convert_gpu_to_cpu(kbdev, e.timestamp);
+		*kctx = group->kctx;
+
+		*prev_act = group->prev_act;
+		group->prev_act = *cur_act;
+
+		return true;
+	}
+
+	dev_err(kbdev->dev, "failed to read a GPU metrics from trace buffer");
+
+	return false;
+}
+
+/**
+ * emit_gpu_metrics_to_frontend() - Emit GPU metrics events to the frontend.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function must be called to emit GPU metrics data to the
+ * frontend whenever needed.
+ * Calls to this function will be serialized by scheduler lock.
+ *
+ * Kbase reports invalid activity traces when detected.
+ */
+static void emit_gpu_metrics_to_frontend(struct kbase_device *kbdev)
+{
+	u64 system_time = 0;
+	u64 ts_before_drain;
+	u64 ts = 0;
+
+	lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+	return;
+#endif
+
+	if (WARN_ON_ONCE(kbdev->csf.scheduler.state == SCHED_SUSPENDED))
+		return;
+
+	kbase_backend_get_gpu_time_norequest(kbdev, NULL, &system_time, NULL);
+	ts_before_drain = kbase_backend_time_convert_gpu_to_cpu(kbdev, system_time);
+
+	while (!kbase_csf_firmware_trace_buffer_is_empty(kbdev->csf.scheduler.gpu_metrics_tb)) {
+		struct kbase_context *kctx;
+		bool prev_act;
+		bool cur_act;
+
+		if (gpu_metrics_read_event(kbdev, &kctx, &prev_act, &cur_act, &ts)) {
+			if (prev_act == cur_act) {
+				/* Error handling
+				 *
+				 * In case of active CSG, Kbase will try to recover the
+				 * lost event by ending previously active event and
+				 * starting a new one.
+				 *
+				 * In case of inactive CSG, the event is drop as Kbase
+				 * cannot recover.
+				 */
+				dev_err(kbdev->dev,
+					"Invalid activity state transition. (prev_act = %u, cur_act = %u)",
+					prev_act, cur_act);
+				if (cur_act) {
+					kbase_gpu_metrics_ctx_end_activity(kctx, ts);
+					kbase_gpu_metrics_ctx_start_activity(kctx, ts);
+				}
+			} else {
+				/* Normal handling */
+				if (cur_act)
+					kbase_gpu_metrics_ctx_start_activity(kctx, ts);
+				else
+					kbase_gpu_metrics_ctx_end_activity(kctx, ts);
+			}
+		} else
+			break;
+	}
+
+	kbase_gpu_metrics_emit_tracepoint(kbdev, ts >= ts_before_drain ? ts + 1 : ts_before_drain);
+}
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+
 /**
  * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
  *              scheduling tick/tock to complete before the group deschedule.
@@ -424,79 +647,20 @@ out:
  *
  * @timer: Pointer to the scheduling tick hrtimer
  *
- * This function will enqueue the scheduling tick work item for immediate
- * execution, if it has not been queued already.
+ * This function will wake up kbase_csf_scheduler_kthread() to process a
+ * pending scheduling tick. It will be restarted manually once a tick has been
+ * processed if appropriate.
  *
  * Return: enum value to indicate that timer should not be restarted.
  */
 static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
 {
-	struct kbase_device *kbdev = container_of(timer, struct kbase_device,
-						  csf.scheduler.tick_timer);
-
-	kbase_csf_scheduler_tick_advance(kbdev);
-	return HRTIMER_NORESTART;
-}
-
-/**
- * start_tick_timer() - Start the scheduling tick hrtimer.
- *
- * @kbdev: Pointer to the device
- *
- * This function will start the scheduling tick hrtimer and is supposed to
- * be called only from the tick work item function. The tick hrtimer should
- * not be active already.
- */
-static void start_tick_timer(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	lockdep_assert_held(&scheduler->lock);
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	if (likely(!scheduler->tick_timer_active)) {
-		scheduler->tick_timer_active = true;
-
-		hrtimer_start(&scheduler->tick_timer,
-		    HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
-		    HRTIMER_MODE_REL);
-	}
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
-/**
- * cancel_tick_timer() - Cancel the scheduling tick hrtimer
- *
- * @kbdev: Pointer to the device
- */
-static void cancel_tick_timer(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	scheduler->tick_timer_active = false;
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-	hrtimer_cancel(&scheduler->tick_timer);
-}
-
-/**
- * enqueue_tick_work() - Enqueue the scheduling tick work item
- *
- * @kbdev: Pointer to the device
- *
- * This function will queue the scheduling tick work item for immediate
- * execution. This shall only be called when both the tick hrtimer and tick
- * work item are not active/pending.
- */
-static void enqueue_tick_work(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
-	lockdep_assert_held(&scheduler->lock);
+	struct kbase_device *kbdev =
+		container_of(timer, struct kbase_device, csf.scheduler.tick_timer);
 
 	kbase_csf_scheduler_invoke_tick(kbdev);
+
+	return HRTIMER_NORESTART;
 }
 
 static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
@@ -642,8 +806,14 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
 
 			if (queue && queue->user_io_addr) {
 				u64 const *const output_addr =
-					(u64 const *)(queue->user_io_addr + PAGE_SIZE);
+					(u64 const *)(queue->user_io_addr +
+						      PAGE_SIZE / sizeof(u64));
 
+				/*
+				 * This 64-bit read will be atomic on a 64-bit kernel but may not
+				 * be atomic on 32-bit kernels. Support for 32-bit kernels is
+				 * limited to build-only.
+				 */
 				queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
 			}
 		}
@@ -698,7 +868,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
 			 * updated whilst gpu_idle_worker() is executing.
 			 */
 			scheduler->fast_gpu_idle_handling =
-				(kbdev->csf.gpu_idle_hysteresis_us == 0) ||
+				(kbdev->csf.gpu_idle_hysteresis_ns == 0) ||
 				!kbase_csf_scheduler_all_csgs_idle(kbdev);
 
 			/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
@@ -713,8 +883,8 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
 		}
 #endif
 	} else {
-		/* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
-		kbase_csf_scheduler_tick_advance_nolock(kbdev);
+		/* Invoke the scheduling tick to get the non-idle suspended groups loaded soon */
+		kbase_csf_scheduler_invoke_tick(kbdev);
 	}
 
 	return ack_gpu_idle_event;
@@ -806,6 +976,14 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
 	return queue_group_scheduled(group);
 }
 
+static void update_idle_protm_group_state_to_runnable(struct kbase_queue_group *group)
+{
+	lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+	group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, group->run_state);
+}
+
 /**
  * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
  *
@@ -889,24 +1067,6 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev)
 }
 
 /**
- * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
- * automatically for periodic tasks.
- *
- * @kbdev: Pointer to the device
- *
- * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the
- * CSF scheduler lock to already have been held.
- *
- * Return: true if the scheduler is configured to wake up periodically
- */
-static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
-{
-	lockdep_assert_held(&kbdev->csf.scheduler.lock);
-
-	return kbdev->csf.scheduler.timer_enabled;
-}
-
-/**
  * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
  *                                        Scheduler
  *
@@ -1694,9 +1854,9 @@ static void update_hw_active(struct kbase_queue *queue, bool active)
 {
 #if IS_ENABLED(CONFIG_MALI_NO_MALI)
 	if (queue && queue->enabled) {
-		u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+		u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64);
 
-		output_addr[CS_ACTIVE / sizeof(u32)] = active;
+		output_addr[CS_ACTIVE / sizeof(*output_addr)] = active;
 	}
 #else
 	CSTD_UNUSED(queue);
@@ -1706,11 +1866,16 @@ static void update_hw_active(struct kbase_queue *queue, bool active)
 
 static void program_cs_extract_init(struct kbase_queue *queue)
 {
-	u64 *input_addr = (u64 *)queue->user_io_addr;
-	u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
+	u64 *input_addr = queue->user_io_addr;
+	u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64);
 
-	input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] =
-			output_addr[CS_EXTRACT_LO / sizeof(u64)];
+	/*
+	 * These 64-bit reads and writes will be atomic on a 64-bit kernel but may
+	 * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to
+	 * build-only.
+	 */
+	input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] =
+		output_addr[CS_EXTRACT_LO / sizeof(*output_addr)];
 }
 
 static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream,
@@ -1930,7 +2095,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
 	kbase_reset_gpu_assert_prevented(kbdev);
 	lockdep_assert_held(&queue->kctx->csf.lock);
 
-	if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
+	if (WARN_ON_ONCE(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
 		return -EINVAL;
 
 	rt_mutex_lock(&kbdev->csf.scheduler.lock);
@@ -2402,7 +2567,7 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
 	 * of work needs to be enforced in situation such as entering into
 	 * protected mode).
 	 */
-	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
+	if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) || force) {
 		dev_dbg(kbdev->dev, "Kicking async for group %d\n",
 			group->handle);
 		kbase_csf_scheduler_invoke_tock(kbdev);
@@ -2485,13 +2650,12 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
 
 	scheduler->total_runnable_grps++;
 
-	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
-	    (scheduler->total_runnable_grps == 1 ||
-	     scheduler->state == SCHED_SUSPENDED ||
+	if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) &&
+	    (scheduler->total_runnable_grps == 1 || scheduler->state == SCHED_SUSPENDED ||
 	     scheduler->state == SCHED_SLEEPING)) {
 		dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
 		/* Fire a scheduling to start the time-slice */
-		enqueue_tick_work(kbdev);
+		kbase_csf_scheduler_invoke_tick(kbdev);
 	} else
 		schedule_in_cycle(group, false);
 
@@ -2501,6 +2665,17 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
 	scheduler_wakeup(kbdev, false);
 }
 
+static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
+{
+	hrtimer_cancel(&scheduler->tick_timer);
+	atomic_set(&scheduler->pending_tick_work, false);
+}
+
+static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
+{
+	atomic_set(&scheduler->pending_tock_work, false);
+}
+
 static
 void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
 		struct kbase_queue_group *group,
@@ -2595,7 +2770,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
 	scheduler->total_runnable_grps--;
 	if (!scheduler->total_runnable_grps) {
 		dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
-		cancel_tick_timer(kctx->kbdev);
+		cancel_tick_work(scheduler);
 		WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
 		if (scheduler->state != SCHED_SUSPENDED)
 			enqueue_gpu_idle_work(scheduler, 0);
@@ -2741,7 +2916,7 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
 	u32 glb_version = iface->version;
 
 	u64 const *input_addr = (u64 const *)queue->user_io_addr;
-	u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+	u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64));
 
 	if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
 		/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
@@ -2755,6 +2930,11 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
 						     CS_STATUS_SCOREBOARDS));
 	}
 
+	/*
+	 * These 64-bit reads and writes will be atomic on a 64-bit kernel but may
+	 * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to
+	 * build-only.
+	 */
 	cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
 		    output_addr[CS_EXTRACT_LO / sizeof(u64)]);
 	cs_idle = cs_empty && (!sb_status);
@@ -2858,7 +3038,7 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
 	s8 slot;
 	struct kbase_csf_csg_slot *csg_slot;
 	unsigned long flags;
-	u32 i;
+	u32 csg_req, csg_ack, i;
 	bool as_fault = false;
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
@@ -2898,8 +3078,16 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
 		as_fault = true;
 	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
 
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+
 	/* now marking the slot is vacant */
 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+	/* Process pending SYNC_UPDATE, if any */
+	csg_req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
+	csg_ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
+	kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, csg_req, csg_ack);
 
 	kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL;
 	clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask);
@@ -2962,10 +3150,10 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
 		return;
 
 	/* Read the csg_ep_cfg back for updating the priority field */
-	ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ);
+	ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ_LO);
 	prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg);
 	ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
-	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
+	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg);
 
 	spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
 	csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
@@ -2999,12 +3187,11 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	const u64 compute_mask = shader_core_mask & group->compute_mask;
 	const u64 fragment_mask = shader_core_mask & group->fragment_mask;
 	const u64 tiler_mask = tiler_core_mask & group->tiler_mask;
-	const u8 num_cores = kbdev->gpu_props.num_cores;
-	const u8 compute_max = min(num_cores, group->compute_max);
-	const u8 fragment_max = min(num_cores, group->fragment_max);
+	const u8 compute_max = min(kbdev->gpu_props.num_cores, group->compute_max);
+	const u8 fragment_max = min(kbdev->gpu_props.num_cores, group->fragment_max);
 	const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max);
 	struct kbase_csf_cmd_stream_group_info *ginfo;
-	u32 ep_cfg = 0;
+	u64 ep_cfg = 0;
 	u32 csg_req;
 	u32 state;
 	int i;
@@ -3078,6 +3265,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 				     fragment_mask & U32_MAX);
 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI,
 				     fragment_mask >> 32);
+
 	kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
 				     tiler_mask & U32_MAX);
 
@@ -3089,7 +3277,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
 	ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
 	ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
 	ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
-	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
+	kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg & U32_MAX);
 
 	/* Program the address space number assigned to the context */
 	kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr);
@@ -3719,7 +3907,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 	DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS);
 	DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0};
 	bool suspend_wait_failed = false;
-	long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 
@@ -3731,6 +3918,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 
 	while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
+		long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT));
 
 		bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
 
@@ -3752,15 +3940,18 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask);
 
-				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
-					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
-
 				if (likely(group)) {
 					bool as_fault;
 					/* Only do save/cleanup if the
 					 * group is not terminated during
 					 * the sleep.
 					 */
+
+					/* Only emit suspend, if there was no AS fault */
+					if (kctx_as_enabled(group->kctx) && !group->faulted)
+						KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+							kbdev,
+							kbdev->gpu_props.props.raw_props.gpu_id, i);
 					save_csg_slot(group);
 					as_fault = cleanup_csg_slot(group);
 					/* If AS fault detected, evict it */
@@ -4258,16 +4449,13 @@ static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	u32 num_groups = kbdev->csf.global_iface.group_num;
 	u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
-	DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
 	u32 i;
 
 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
 
-	bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
-		   num_groups);
 	/* Reset the tick's pending protm seq number to invalid initially */
 	scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
-	for_each_set_bit(i, active_csgs, num_groups) {
+	for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
 		struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
 
 		/* Set to the next pending protm group's scan_seq_number */
@@ -4508,8 +4696,9 @@ static void scheduler_apply(struct kbase_device *kbdev)
 	program_suspending_csg_slots(kbdev);
 }
 
-static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
-		struct kbase_context *kctx, int priority)
+static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, struct kbase_context *kctx,
+				      int priority, struct list_head *privileged_groups,
+				      struct list_head *active_groups)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	struct kbase_queue_group *group;
@@ -4523,8 +4712,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 	if (!kctx_as_enabled(kctx))
 		return;
 
-	list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority],
-			    link) {
+	list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], link) {
+		bool protm_req;
+
 		if (WARN_ON(!list_empty(&group->link_to_schedule)))
 			/* This would be a bug */
 			list_del_init(&group->link_to_schedule);
@@ -4535,33 +4725,30 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
 		/* Set the scanout sequence number, starting from 0 */
 		group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
 
+		protm_req = !bitmap_empty(group->protm_pending_bitmap,
+					  kbdev->csf.global_iface.groups[0].stream_num);
+
 		if (scheduler->tick_protm_pending_seq ==
-				KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
-			if (!bitmap_empty(group->protm_pending_bitmap,
-			     kbdev->csf.global_iface.groups[0].stream_num))
-				scheduler->tick_protm_pending_seq =
-					group->scan_seq_num;
+		    KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
+			if (protm_req)
+				scheduler->tick_protm_pending_seq = group->scan_seq_num;
 		}
 
-		if (queue_group_idle_locked(group)) {
+		if (protm_req && on_slot_group_idle_locked(group))
+			update_idle_protm_group_state_to_runnable(group);
+		else if (queue_group_idle_locked(group)) {
 			if (can_schedule_idle_group(group))
 				list_add_tail(&group->link_to_schedule,
 					&scheduler->idle_groups_to_schedule);
 			continue;
 		}
 
-		if (!scheduler->ngrp_to_schedule) {
-			/* keep the top csg's origin */
-			scheduler->top_ctx = kctx;
-			scheduler->top_grp = group;
+		if (protm_req && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) {
+			list_add_tail(&group->link_to_schedule, privileged_groups);
+			continue;
 		}
 
-		list_add_tail(&group->link_to_schedule,
-			      &scheduler->groups_to_schedule);
-		group->prepared_seq_num = scheduler->ngrp_to_schedule++;
-
-		kctx->csf.sched.ngrp_to_schedule++;
-		count_active_address_space(kbdev, kctx);
+		list_add_tail(&group->link_to_schedule, active_groups);
 	}
 }
 
@@ -4891,18 +5078,16 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 }
 
-static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
+static void scheduler_scan_group_list(struct kbase_device *kbdev, struct list_head *groups)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 	struct kbase_queue_group *group, *n;
 
-	list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
-				 link_to_schedule) {
-		WARN_ON(!can_schedule_idle_group(group));
-
+	list_for_each_entry_safe(group, n, groups, link_to_schedule) {
 		if (!scheduler->ngrp_to_schedule) {
 			/* keep the top csg's origin */
 			scheduler->top_ctx = group->kctx;
+			/* keep the top csg''s origin */
 			scheduler->top_grp = group;
 		}
 
@@ -5049,7 +5234,12 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
 			if (!queue || !queue->user_io_addr)
 				continue;
 
-			output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+			output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64));
+			/*
+			 * These 64-bit reads and writes will be atomic on a 64-bit kernel
+			 * but may not be atomic on 32-bit kernels. Support for 32-bit
+			 * kernels is limited to build-only.
+			 */
 			cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
 			if (cur_extract_ofs != queue->extract_ofs) {
 				/* More work has been executed since the idle
@@ -5141,10 +5331,13 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
 
 	dev_dbg(kbdev->dev,
 		"Scheduler to be put to sleep on GPU becoming idle");
-	cancel_tick_timer(kbdev);
+	cancel_tick_work(scheduler);
 	scheduler_pm_idle_before_sleep(kbdev);
 	scheduler->state = SCHED_SLEEPING;
 	KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 }
 #endif
 
@@ -5162,6 +5355,7 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
  */
 static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
 {
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	int ret = suspend_active_groups_on_powerdown(kbdev, false);
 
 	if (ret) {
@@ -5169,7 +5363,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
 			atomic_read(
 				&kbdev->csf.scheduler.non_idle_offslot_grps));
 		/* Bring forward the next tick */
-		kbase_csf_scheduler_tick_advance(kbdev);
+		kbase_csf_scheduler_invoke_tick(kbdev);
 		return false;
 	}
 
@@ -5180,7 +5374,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
 
 	dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
 	scheduler_suspend(kbdev);
-	cancel_tick_timer(kbdev);
+	cancel_tick_work(scheduler);
 	return true;
 }
 
@@ -5514,6 +5708,7 @@ static void sc_rails_off_worker(struct work_struct *work)
 static int scheduler_prepare(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+	struct list_head privileged_groups, active_groups;
 	unsigned long flags;
 	int i;
 
@@ -5539,6 +5734,8 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 	scheduler->num_active_address_spaces = 0;
 	scheduler->num_csg_slots_for_tick = 0;
 	bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
+	INIT_LIST_HEAD(&privileged_groups);
+	INIT_LIST_HEAD(&active_groups);
 
 	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 	scheduler->tick_protm_pending_seq =
@@ -5548,10 +5745,17 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 		struct kbase_context *kctx;
 
 		list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
-			scheduler_ctx_scan_groups(kbdev, kctx, i);
+			scheduler_ctx_scan_groups(kbdev, kctx, i, &privileged_groups,
+						  &active_groups);
 	}
 	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 
+	/* Adds privileged (RT + p.mode) groups to the scanout list */
+	scheduler_scan_group_list(kbdev, &privileged_groups);
+
+	/* Adds remainder of active groups to the scanout list */
+	scheduler_scan_group_list(kbdev, &active_groups);
+
 	/* Update this tick's non-idle groups */
 	scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
 
@@ -5566,7 +5770,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
 				 scheduler->non_idle_scanout_grps);
 
 	/* Adds those idle but runnable groups to the scanout list */
-	scheduler_scan_idle_groups(kbdev);
+	scheduler_scan_group_list(kbdev, &scheduler->idle_groups_to_schedule);
 
 	WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
 
@@ -5668,11 +5872,9 @@ static int prepare_fast_local_tock(struct kbase_device *kbdev)
 	return bitmap_weight(csg_bitmap, num_groups);
 }
 
-static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask,
-				  unsigned int timeout_ms)
+static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
 	u32 num_groups = kbdev->csf.global_iface.group_num;
 	int err = 0;
 	DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
@@ -5681,11 +5883,11 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
 
 	bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
 
-	while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) {
+	while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)) {
+		long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT));
 		DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
 
 		bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
-
 		remaining = wait_event_timeout(
 			kbdev->csf.event_wait,
 			slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining);
@@ -5702,18 +5904,23 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
 				/* The on slot csg is now stopped */
 				clear_bit(i, slot_mask_local);
 
-				KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
-					kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
-
 				group = scheduler->csg_slots[i].resident_group;
 				if (likely(group)) {
 					/* Only do save/cleanup if the
 					 * group is not terminated during
 					 * the sleep.
 					 */
+
+					/* Only emit suspend, if there was no AS fault */
+					if (kctx_as_enabled(group->kctx) && !group->faulted)
+						KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+							kbdev,
+							kbdev->gpu_props.props.raw_props.gpu_id, i);
+
 					save_csg_slot(group);
-					if (cleanup_csg_slot(group))
+					if (cleanup_csg_slot(group)) {
 						sched_evict_group(group, true, true);
+					}
 				}
 			}
 		} else {
@@ -5724,8 +5931,8 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
 				slot_mask_local[0]);
 			/* Return the bitmask of the timed out slots to the caller */
 			bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS);
-
 			err = -ETIMEDOUT;
+			break;
 		}
 	}
 
@@ -5787,7 +5994,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
 		 * idle.
 		 */
 		if ((group->run_state == KBASE_CSF_GROUP_IDLE) &&
-		    (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
+		    (group->priority != KBASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
 		    ((lru_idle_group == NULL) ||
 		     (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
 			if (WARN_ON(group->kctx->as_nr < 0))
@@ -5809,7 +6016,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
 			lru_idle_group->handle, lru_idle_group->kctx->tgid,
 			lru_idle_group->kctx->id, lru_idle_group->csg_nr);
 		suspend_queue_group(lru_idle_group);
-		if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) {
+		if (wait_csg_slots_suspend(kbdev, &slot_mask)) {
 			enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
 
 			dev_warn(
@@ -6033,10 +6240,8 @@ static bool can_skip_scheduling(struct kbase_device *kbdev)
 	return false;
 }
 
-static void schedule_on_tock(struct kthread_work *work)
+static void schedule_on_tock(struct kbase_device *kbdev)
 {
-	struct kbase_device *kbdev =
-		container_of(work, struct kbase_device, csf.scheduler.tock_work.work);
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	int err;
 
@@ -6071,12 +6276,12 @@ static void schedule_on_tock(struct kthread_work *work)
 	KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
 	if (!scheduler->total_runnable_grps)
 		enqueue_gpu_idle_work(scheduler, 0);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 	rt_mutex_unlock(&scheduler->lock);
 	kbase_reset_gpu_allow(kbdev);
 
-	dev_dbg(kbdev->dev,
-		"Waking up for event after schedule-on-tock completes.");
-	wake_up_all(&kbdev->csf.event_wait);
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u);
 	return;
 
@@ -6085,10 +6290,8 @@ exit_no_schedule_unlock:
 	kbase_reset_gpu_allow(kbdev);
 }
 
-static void schedule_on_tick(struct kthread_work *work)
+static void schedule_on_tick(struct kbase_device *kbdev)
 {
-	struct kbase_device *kbdev =
-		container_of(work, struct kbase_device, csf.scheduler.tick_work);
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
 	int err = kbase_reset_gpu_try_prevent(kbdev);
@@ -6115,23 +6318,25 @@ static void schedule_on_tick(struct kthread_work *work)
 	scheduler->last_schedule = jiffies;
 
 	/* Kicking next scheduling if needed */
-	if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
-			(scheduler->total_runnable_grps > 0)) {
-		start_tick_timer(kbdev);
-		dev_dbg(kbdev->dev,
-			"scheduling for next tick, num_runnable_groups:%u\n",
+	if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) &&
+	    (scheduler->total_runnable_grps > 0)) {
+		hrtimer_start(&scheduler->tick_timer,
+			      HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
+			      HRTIMER_MODE_REL);
+		dev_dbg(kbdev->dev, "scheduling for next tick, num_runnable_groups:%u\n",
 			scheduler->total_runnable_grps);
 	} else if (!scheduler->total_runnable_grps) {
 		enqueue_gpu_idle_work(scheduler, 0);
 	}
 
 	scheduler->state = SCHED_INACTIVE;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 	rt_mutex_unlock(&scheduler->lock);
 	KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
 	kbase_reset_gpu_allow(kbdev);
 
-	dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
-	wake_up_all(&kbdev->csf.event_wait);
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL,
 			 scheduler->total_runnable_grps);
 	return;
@@ -6161,7 +6366,7 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev,
 		}
 	}
 
-	ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms);
+	ret = wait_csg_slots_suspend(kbdev, slot_mask);
 	return ret;
 }
 
@@ -6180,7 +6385,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
 		dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
 			 kbdev->csf.global_iface.group_num, slot_mask);
 		//TODO: should introduce SSCD report if this happens.
-		kbase_gpu_timeout_debug_message(kbdev);
+		kbase_gpu_timeout_debug_message(kbdev, "");
 		dev_warn(kbdev->dev, "[%llu] Firmware ping %d",
 				kbase_backend_get_cycle_cnt(kbdev),
 				kbase_csf_firmware_ping_wait(kbdev, 0));
@@ -6201,11 +6406,10 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
 	 * overflow.
 	 */
 	kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
-	ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
-			kbdev->reset_timeout_ms);
+	ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, kbdev->mmu_or_gpu_cache_op_wait_time_ms);
 	if (ret2) {
-		dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
-			 kbase_backend_get_cycle_cnt(kbdev));
+		dev_err(kbdev->dev, "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC",
+			kbase_backend_get_cycle_cnt(kbdev));
 		if (!ret)
 			ret = ret2;
 	}
@@ -6323,17 +6527,6 @@ unlock:
 	return suspend_on_slot_groups;
 }
 
-static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
-{
-	kthread_cancel_work_sync(&scheduler->tick_work);
-}
-
-static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
-{
-	atomic_set(&scheduler->pending_tock_work, false);
-	kthread_cancel_delayed_work_sync(&scheduler->tock_work);
-}
-
 static void scheduler_inner_reset(struct kbase_device *kbdev)
 {
 	u32 const num_groups = kbdev->csf.global_iface.group_num;
@@ -6348,7 +6541,6 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
 #else
 	cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
 #endif
-	cancel_tick_timer(kbdev);
 	cancel_tick_work(scheduler);
 	cancel_tock_work(scheduler);
 	cancel_delayed_work_sync(&scheduler->ping_work);
@@ -6547,8 +6739,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 
 		if (!WARN_ON(scheduler->state == SCHED_SUSPENDED))
 			suspend_queue_group(group);
-		err = wait_csg_slots_suspend(kbdev, slot_mask,
-					     kbdev->csf.fw_timeout_ms);
+		err = wait_csg_slots_suspend(kbdev, slot_mask);
 		if (err) {
 			const struct gpu_uevent evt = {
 				.type = GPU_UEVENT_TYPE_KMD_ERROR,
@@ -6593,7 +6784,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 				target_page_nr < sus_buf->nr_pages; i++) {
 			struct page *pg =
 				as_page(group->normal_suspend_buf.phy[i]);
-			void *sus_page = kmap(pg);
+			void *sus_page = kbase_kmap(pg);
 
 			if (sus_page) {
 				kbase_sync_single_for_cpu(kbdev,
@@ -6604,7 +6795,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
 						sus_buf->pages, sus_page,
 						&to_copy, sus_buf->nr_pages,
 						&target_page_nr, offset);
-				kunmap(pg);
+				kbase_kunmap(pg, sus_page);
 				if (err)
 					break;
 			} else {
@@ -6720,12 +6911,21 @@ static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
 
 		spin_lock_irqsave(&scheduler->interrupt_lock, flags);
 
-		if (kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
-		    bitmap_empty(pending, ginfo->stream_num))
+		if (bitmap_empty(pending, ginfo->stream_num)) {
+			dev_dbg(kbdev->dev,
+				"Pmode requested for group %d of ctx %d_%d with no pending queues",
+				input_grp->handle, input_grp->kctx->tgid, input_grp->kctx->id);
+			input_grp = NULL;
+		} else if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) {
+			kbase_csf_scheduler_invoke_tock(kbdev);
 			input_grp = NULL;
+		}
 
 		spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
 	} else {
+		if (group && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME))
+			kbase_csf_scheduler_invoke_tock(kbdev);
+
 		input_grp = NULL;
 	}
 
@@ -6753,11 +6953,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
 
 	rt_mutex_lock(&scheduler->lock);
 
-	if (group->run_state == KBASE_CSF_GROUP_IDLE) {
-		group->run_state = KBASE_CSF_GROUP_RUNNABLE;
-		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
-					group->run_state);
-	}
+	if (on_slot_group_idle_locked(group))
+		update_idle_protm_group_state_to_runnable(group);
 	/* Check if the group is now eligible for execution in protected mode. */
 	if (scheduler_get_protm_enter_async_group(kbdev, group))
 		scheduler_group_check_protm_enter(kbdev, group);
@@ -7084,6 +7281,13 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 {
 	int priority;
 	int err;
+	struct kbase_device *kbdev = kctx->kbdev;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	err = gpu_metrics_ctx_init(kctx);
+	if (err)
+		return err;
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 
 	kbase_ctx_sched_init_ctx(kctx);
 
@@ -7115,8 +7319,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
 	err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
 
 	if (err) {
-		dev_err(kctx->kbdev->dev,
-			"Failed to register a sync update callback");
+		dev_err(kbdev->dev, "Failed to register a sync update callback");
 		goto event_wait_add_failed;
 	}
 
@@ -7126,6 +7329,9 @@ event_wait_add_failed:
 	kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
 alloc_wq_failed:
 	kbase_ctx_sched_remove_ctx(kctx);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	gpu_metrics_ctx_term(kctx);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 	return err;
 }
 
@@ -7136,6 +7342,74 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
 	kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
 
 	kbase_ctx_sched_remove_ctx(kctx);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	gpu_metrics_ctx_term(kctx);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+}
+
+static int kbase_csf_scheduler_kthread(void *data)
+{
+	struct kbase_device *const kbdev = data;
+	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+	while (scheduler->kthread_running) {
+		struct kbase_queue *queue;
+
+		if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0)
+			continue;
+		reinit_completion(&scheduler->kthread_signal);
+
+		/* Iterate through queues with pending kicks */
+		do {
+			u8 prio;
+
+			spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+			queue = NULL;
+			for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) {
+				if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) {
+					queue = list_first_entry(
+						&kbdev->csf.pending_gpuq_kicks[prio],
+						struct kbase_queue, pending_kick_link);
+					list_del_init(&queue->pending_kick_link);
+					break;
+				}
+			}
+			spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+
+			if (queue != NULL) {
+				WARN_ONCE(
+					prio != queue->group_priority,
+					"Queue %pK has priority %hhu but instead its kick was handled at priority %hhu",
+					(void *)queue, queue->group_priority, prio);
+
+				kbase_csf_process_queue_kick(queue);
+
+				/* Perform a scheduling tock for high-priority queue groups if
+				 * required.
+				 */
+				BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0);
+				BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1);
+				if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) &&
+				    atomic_read(&scheduler->pending_tock_work))
+					schedule_on_tock(kbdev);
+			}
+		} while (queue != NULL);
+
+		/* Check if we need to perform a scheduling tick/tock. A tick
+		 * event shall override a tock event but not vice-versa.
+		 */
+		if (atomic_cmpxchg(&scheduler->pending_tick_work, true, false) == true) {
+			atomic_set(&scheduler->pending_tock_work, false);
+			schedule_on_tick(kbdev);
+		} else if (atomic_read(&scheduler->pending_tock_work)) {
+			schedule_on_tock(kbdev);
+		}
+
+		dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration.");
+		wake_up_all(&kbdev->csf.event_wait);
+	}
+
+	return 0;
 }
 
 int kbase_csf_scheduler_init(struct kbase_device *kbdev)
@@ -7154,33 +7428,51 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
 		return -ENOMEM;
 	}
 
+	init_completion(&scheduler->kthread_signal);
+	scheduler->kthread_running = true;
+	scheduler->gpuq_kthread =
+		kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread");
+	if (!scheduler->gpuq_kthread) {
+		kfree(scheduler->csg_slots);
+		scheduler->csg_slots = NULL;
+
+		dev_err(kbdev->dev, "Failed to spawn the GPU queue submission worker thread");
+		return -ENOMEM;
+	}
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+	scheduler->gpu_metrics_tb =
+		kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_GPU_METRICS_BUF_NAME);
+	if (!scheduler->gpu_metrics_tb) {
+		scheduler->kthread_running = false;
+		complete(&scheduler->kthread_signal);
+		kthread_stop(scheduler->gpuq_kthread);
+		scheduler->gpuq_kthread = NULL;
+
+		kfree(scheduler->csg_slots);
+		scheduler->csg_slots = NULL;
+
+		dev_err(kbdev->dev, "Failed to get the handler of gpu_metrics from trace buffer");
+		return -ENOENT;
+	}
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+
 	return kbase_csf_mcu_shared_regs_data_init(kbdev);
 }
 
 int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 {
-	int err;
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 
-	scheduler->timer_enabled = true;
+	atomic_set(&scheduler->timer_enabled, true);
 
-	err = kbase_create_realtime_thread(kbdev, kthread_worker_fn, &scheduler->csf_worker,
-					   "csf_scheduler");
-	if (err) {
-		dev_err(kbdev->dev, "Failed to allocate scheduler kworker\n");
-		return -ENOMEM;
-	}
 	scheduler->idle_wq = alloc_ordered_workqueue(
 		"csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
 	if (!scheduler->idle_wq) {
-		dev_err(kbdev->dev,
-			"Failed to allocate GPU idle scheduler workqueue\n");
-		kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker);
+		dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n");
 		return -ENOMEM;
 	}
 
-	kthread_init_work(&scheduler->tick_work, schedule_on_tick);
-	kthread_init_delayed_work(&scheduler->tock_work, schedule_on_tock);
+	atomic_set(&scheduler->pending_tick_work, false);
 	atomic_set(&scheduler->pending_tock_work, false);
 
 	INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
@@ -7223,7 +7515,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 
 	hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	scheduler->tick_timer.function = tick_timer_callback;
-	scheduler->tick_timer_active = false;
 
 	kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
 
@@ -7232,6 +7523,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
 
 void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 {
+	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+	if (scheduler->gpuq_kthread) {
+		scheduler->kthread_running = false;
+		complete(&scheduler->kthread_signal);
+		kthread_stop(scheduler->gpuq_kthread);
+	}
+
 	if (kbdev->csf.scheduler.csg_slots) {
 		WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
 		/* The unload of Driver can take place only when all contexts have
@@ -7261,9 +7560,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
 
 		rt_mutex_unlock(&kbdev->csf.scheduler.lock);
 		cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
-		cancel_tick_timer(kbdev);
-		cancel_tick_work(&kbdev->csf.scheduler);
-		cancel_tock_work(&kbdev->csf.scheduler);
 		kfree(kbdev->csf.scheduler.csg_slots);
 		kbdev->csf.scheduler.csg_slots = NULL;
 	}
@@ -7277,8 +7573,6 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
 {
 	if (kbdev->csf.scheduler.idle_wq)
 		destroy_workqueue(kbdev->csf.scheduler.idle_wq);
-	if (kbdev->csf.scheduler.csf_worker.task)
-		kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker);
 
 	kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
 }
@@ -7299,7 +7593,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
 
 	lockdep_assert_held(&kbdev->csf.scheduler.lock);
 
-	if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev)))
+	if (unlikely(!kbase_csf_scheduler_timer_is_enabled(kbdev)))
 		return;
 
 	WARN_ON((scheduler->state != SCHED_INACTIVE) &&
@@ -7307,7 +7601,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
 		(scheduler->state != SCHED_SLEEPING));
 
 	if (scheduler->total_runnable_grps > 0) {
-		enqueue_tick_work(kbdev);
+		kbase_csf_scheduler_invoke_tick(kbdev);
 		dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
 	} else if (scheduler->state != SCHED_SUSPENDED) {
 		enqueue_gpu_idle_work(scheduler, 0);
@@ -7321,43 +7615,24 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev)
 	rt_mutex_unlock(&kbdev->csf.scheduler.lock);
 }
 
-bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
-	bool enabled;
-
-	rt_mutex_lock(&scheduler->lock);
-	enabled = scheduler_timer_is_enabled_nolock(kbdev);
-	rt_mutex_unlock(&scheduler->lock);
-
-	return enabled;
-}
-
 void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
 		bool enable)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	bool currently_enabled;
 
+	/* This lock is taken to prevent this code being executed concurrently
+	 * by userspace.
+	 */
 	rt_mutex_lock(&scheduler->lock);
 
-	currently_enabled = scheduler_timer_is_enabled_nolock(kbdev);
+	currently_enabled = kbase_csf_scheduler_timer_is_enabled(kbdev);
 	if (currently_enabled && !enable) {
-		scheduler->timer_enabled = false;
-		cancel_tick_timer(kbdev);
-		rt_mutex_unlock(&scheduler->lock);
-		/* The non-sync version to cancel the normal work item is not
-		 * available, so need to drop the lock before cancellation.
-		 */
+		atomic_set(&scheduler->timer_enabled, false);
 		cancel_tick_work(scheduler);
-		cancel_tock_work(scheduler);
-		return;
-	}
-
-	if (!currently_enabled && enable) {
-		scheduler->timer_enabled = true;
-
-		scheduler_enable_tick_timer_nolock(kbdev);
+	} else if (!currently_enabled && enable) {
+		atomic_set(&scheduler->timer_enabled, true);
+		kbase_csf_scheduler_invoke_tick(kbdev);
 	}
 
 	rt_mutex_unlock(&scheduler->lock);
@@ -7367,17 +7642,17 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
 
-	rt_mutex_lock(&scheduler->lock);
+	if (unlikely(kbase_csf_scheduler_timer_is_enabled(kbdev)))
+		return;
 
-	if (unlikely(scheduler_timer_is_enabled_nolock(kbdev)))
-		goto out;
+	/* This lock is taken to prevent this code being executed concurrently
+	 * by userspace.
+	 */
+	rt_mutex_lock(&scheduler->lock);
 
-	if (scheduler->total_runnable_grps > 0) {
-		enqueue_tick_work(kbdev);
-		dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
-	}
+	kbase_csf_scheduler_invoke_tick(kbdev);
+	dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
 
-out:
 	rt_mutex_unlock(&scheduler->lock);
 }
 
@@ -7414,7 +7689,7 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
 		} else {
 			dev_dbg(kbdev->dev, "Scheduler PM suspend");
 			scheduler_suspend(kbdev);
-			cancel_tick_timer(kbdev);
+			cancel_tick_work(scheduler);
 		}
 	}
 
@@ -7492,7 +7767,7 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
 }
 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
 
-int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
+static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_wait)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 	unsigned long flags;
@@ -7505,9 +7780,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 	kbase_pm_unlock(kbdev);
 
-	kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	if (killable_wait)
+		err = kbase_pm_killable_wait_for_poweroff_work_complete(kbdev);
+	else
+		err = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+	if (err)
+		return err;
 
-	err = kbase_pm_wait_for_desired_state(kbdev);
+	if (killable_wait)
+		err = kbase_pm_killable_wait_for_desired_state(kbdev);
+	else
+		err = kbase_pm_wait_for_desired_state(kbdev);
 	if (!err) {
 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 		WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
@@ -7516,6 +7799,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
 
 	return err;
 }
+
+int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev)
+{
+	return scheduler_wait_mcu_active(kbdev, true);
+}
+
+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
+{
+	return scheduler_wait_mcu_active(kbdev, false);
+}
+
 KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
 
 #ifdef KBASE_PM_RUNTIME
@@ -7594,8 +7888,7 @@ void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
 
 	rt_mutex_lock(&scheduler->lock);
-	if (kbase_pm_gpu_sleep_allowed(kbdev) &&
-	    (scheduler->state == SCHED_INACTIVE))
+	if (kbase_pm_gpu_sleep_allowed(kbdev) && (scheduler->state == SCHED_INACTIVE))
 		scheduler_sleep_on_idle(kbdev);
 	rt_mutex_unlock(&scheduler->lock);
 }
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 4062d78..88521f0 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -338,7 +338,10 @@ kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev)
  *
  * Return: true if the scheduler is configured to wake up periodically
  */
-bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev);
+static inline bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
+{
+	return atomic_read(&kbdev->csf.scheduler.timer_enabled);
+}
 
 /**
  * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic
@@ -412,6 +415,22 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev);
 int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev);
 
 /**
+ * kbase_csf_scheduler_killable_wait_mcu_active - Wait for the MCU to actually become
+ *                                                active in killable state.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function is same as kbase_csf_scheduler_wait_mcu_active(), expect that
+ * it would allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 if the MCU was successfully activated, or -ETIMEDOUT code on timeout error or
+ *        -ERESTARTSYS if the wait was interrupted.
+ */
+int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev);
+
+/**
  * kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume
  *
  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
@@ -474,69 +493,24 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
 }
 
 /**
- * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick
- *
- * @kbdev: Pointer to the device
- *
- * This function advances the scheduling tick by enqueing the tick work item for
- * immediate execution, but only if the tick hrtimer is active. If the timer
- * is inactive then the tick work item is already in flight.
- * The caller must hold the interrupt lock.
- */
-static inline void
-kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
-	lockdep_assert_held(&scheduler->interrupt_lock);
-
-	if (scheduler->tick_timer_active) {
-		KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u);
-		scheduler->tick_timer_active = false;
-		kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work);
-	} else {
-		KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u);
-	}
-}
-
-/**
- * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick
- *
- * @kbdev: Pointer to the device
- *
- * This function advances the scheduling tick by enqueing the tick work item for
- * immediate execution, but only if the tick hrtimer is active. If the timer
- * is inactive then the tick work item is already in flight.
- */
-static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev)
-{
-	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
-
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	kbase_csf_scheduler_tick_advance_nolock(kbdev);
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
-/**
  * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick
  *
  * @kbdev: Pointer to the device
  *
- * This function will queue the scheduling tick work item for immediate
- * execution if tick timer is not active. This can be called from interrupt
- * context to resume the scheduling after GPU was put to sleep.
+ * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling
+ * tick regardless of whether the tick timer is enabled. This can be called
+ * from interrupt context to resume the scheduling after GPU was put to sleep.
+ *
+ * Caller is expected to check kbase_csf_scheduler.timer_enabled as required
+ * to see whether it is appropriate before calling this function.
  */
 static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
 {
 	struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-	unsigned long flags;
 
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u);
-	spin_lock_irqsave(&scheduler->interrupt_lock, flags);
-	if (!scheduler->tick_timer_active)
-		kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work);
-	spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+	if (atomic_cmpxchg(&scheduler->pending_tick_work, false, true) == false)
+		complete(&scheduler->kthread_signal);
 }
 
 /**
@@ -544,8 +518,11 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
  *
  * @kbdev: Pointer to the device
  *
- * This function will queue the scheduling tock work item for immediate
- * execution.
+ * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling
+ * tock.
+ *
+ * Caller is expected to check kbase_csf_scheduler.timer_enabled as required
+ * to see whether it is appropriate before calling this function.
  */
 static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
 {
@@ -553,7 +530,7 @@ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
 
 	KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u);
 	if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false)
-		kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
+		complete(&scheduler->kthread_signal);
 }
 
 /**
diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
index a5e0ab5..72c0b6f 100644
--- a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,49 +23,46 @@
 #include "mali_kbase_csf_csg_debugfs.h"
 #include <mali_kbase.h>
 #include <linux/seq_file.h>
+#include <linux/version_compat_defs.h>
 
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 #include "mali_kbase_sync.h"
 #endif
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-
 #define CQS_UNREADABLE_LIVE_VALUE "(unavailable)"
 
-/* GPU queue related values */
-#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
-#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
-#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
-#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
-#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
-#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
-#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
-#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
-#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+#define CSF_SYNC_DUMP_SIZE 256
 
-#define MAX_NR_GPU_CALLS (5)
-#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
-#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
-#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
-#define MOVE_DEST_MASK ((u64)0xFF << 48)
-#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
-#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
-#define SYNC_SRC0_MASK ((u64)0xFF << 40)
-#define SYNC_SRC1_MASK ((u64)0xFF << 32)
-#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
-#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
-#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
-#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
-
-/* Enumeration for types of GPU queue sync events for
- * the purpose of dumping them through debugfs.
+/**
+ * kbasep_print() - Helper function to print to either debugfs file or dmesg.
+ *
+ * @kctx: The kbase context
+ * @file: The seq_file for printing to. This is NULL if printing to dmesg.
+ * @fmt:  The message to print.
+ * @...:  Arguments to format the message.
  */
-enum debugfs_gpu_sync_type {
-	DEBUGFS_GPU_SYNC_WAIT,
-	DEBUGFS_GPU_SYNC_SET,
-	DEBUGFS_GPU_SYNC_ADD,
-	NUM_DEBUGFS_GPU_SYNC_TYPES
-};
+__attribute__((format(__printf__, 3, 4))) static void
+kbasep_print(struct kbase_context *kctx, struct seq_file *file, const char *fmt, ...)
+{
+	int len = 0;
+	char buffer[CSF_SYNC_DUMP_SIZE];
+	va_list arglist;
+
+	va_start(arglist, fmt);
+	len = vsnprintf(buffer, CSF_SYNC_DUMP_SIZE, fmt, arglist);
+	if (len <= 0) {
+		pr_err("message write to the buffer failed");
+		goto exit;
+	}
+
+	if (file)
+		seq_printf(file, buffer);
+	else
+		dev_warn(kctx->kbdev->dev, buffer);
+
+exit:
+	va_end(arglist);
+}
 
 /**
  * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object.
@@ -120,11 +117,12 @@ static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 o
  *                                                     or Fence Signal command, contained in a
  *                                                     KCPU queue.
  *
- * @file:     The seq_file for printing to.
+ * @buffer:   The buffer to write to.
+ * @length:   The length of text in the buffer.
  * @cmd:      The KCPU Command to be printed.
  * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT.
  */
-static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file,
+static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *length,
 							    struct kbase_kcpu_command *cmd,
 							    const char *cmd_name)
 {
@@ -133,38 +131,46 @@ static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *fil
 #else
 	struct dma_fence *fence = NULL;
 #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
-
+	struct kbase_kcpu_command_fence_info *fence_info;
 	struct kbase_sync_fence_info info;
 	const char *timeline_name = NULL;
 	bool is_signaled = false;
 
-	fence = cmd->info.fence.fence;
+	fence_info = &cmd->info.fence;
+	if (kbase_kcpu_command_fence_has_force_signaled(fence_info))
+		return;
+
+	fence = kbase_fence_get(fence_info);
 	if (WARN_ON(!fence))
 		return;
 
-	kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
+	kbase_sync_fence_info_get(fence, &info);
 	timeline_name = fence->ops->get_timeline_name(fence);
 	is_signaled = info.status > 0;
 
-	seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence,
-		   is_signaled);
+	*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			    "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled);
 
 	/* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
-	seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
-		   timeline_name, fence->context, (u64)fence->seqno);
+	*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+			    "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
+			    timeline_name, fence->context, (u64)fence->seqno);
+
+	kbase_fence_put(fence);
 }
 
 /**
  * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command,
  *                                         contained in a KCPU queue.
  *
- * @file: The seq_file for printing to.
- * @cmd:  The KCPU Command to be printed.
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
  */
-static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
-						struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char *buffer,
+						int *length, struct kbase_kcpu_command *cmd)
 {
-	struct kbase_context *kctx = file->private;
 	size_t i;
 
 	for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
@@ -174,14 +180,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
 		int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
 		bool live_val_valid = (ret >= 0);
 
-		seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
 
 		if (live_val_valid)
-			seq_printf(file, "0x%.16llx", (u64)live_val);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", (u64)live_val);
 		else
-			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
 
-		seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val);
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:gt arg_value:0x%.8x", cqs_obj->val);
 	}
 }
 
@@ -189,13 +200,14 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
  * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS
  *                                        Set command, contained in a KCPU queue.
  *
- * @file: The seq_file for printing to.
- * @cmd:  The KCPU Command to be printed.
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
  */
-static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
-					       struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char *buffer,
+					       int *length, struct kbase_kcpu_command *cmd)
 {
-	struct kbase_context *kctx = file->private;
 	size_t i;
 
 	for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
@@ -205,14 +217,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
 		int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
 		bool live_val_valid = (ret >= 0);
 
-		seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
 
 		if (live_val_valid)
-			seq_printf(file, "0x%.16llx", (u64)live_val);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", (u64)live_val);
 		else
-			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
 
-		seq_printf(file, " | op:add arg_value:0x%.8x", 1);
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:add arg_value:0x%.8x", 1);
 	}
 }
 
@@ -271,14 +288,15 @@ static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op
  *                                            Wait Operation command, contained
  *                                            in a KCPU queue.
  *
- * @file: The seq_file for printing to.
- * @cmd:  The KCPU Command to be printed.
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
  */
-static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
-						   struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, char *buffer,
+						   int *length, struct kbase_kcpu_command *cmd)
 {
 	size_t i;
-	struct kbase_context *kctx = file->private;
 
 	for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
 		struct base_cqs_wait_operation_info *wait_op =
@@ -290,14 +308,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
 
 		bool live_val_valid = (ret >= 0);
 
-		seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
 
 		if (live_val_valid)
-			seq_printf(file, "0x%.16llx", live_val);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", live_val);
 		else
-			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
 
-		seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
 	}
 }
 
@@ -306,14 +329,15 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
  *                                           Set Operation command, contained
  *                                           in a KCPU queue.
  *
- * @file: The seq_file for printing to.
- * @cmd:  The KCPU Command to be printed.
+ * @kctx:   The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd:    The KCPU Command to be printed.
  */
-static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
-						  struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, char *buffer,
+						  int *length, struct kbase_kcpu_command *cmd)
 {
 	size_t i;
-	struct kbase_context *kctx = file->private;
 
 	for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
 		struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i];
@@ -325,29 +349,35 @@ static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
 
 		bool live_val_valid = (ret >= 0);
 
-		seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
+		*length +=
+			snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				 "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
 
 		if (live_val_valid)
-			seq_printf(file, "0x%.16llx", live_val);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    "0x%.16llx", live_val);
 		else
-			seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+			*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+					    CQS_UNREADABLE_LIVE_VALUE);
 
-		seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
+		*length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+				    " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
 	}
 }
 
 /**
  * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue
  *
+ * @kctx:  The kbase context.
  * @file:  The seq_file to print to.
  * @queue: Pointer to the KCPU queue.
  */
-static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
+static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct kbase_context *kctx,
+						     struct seq_file *file,
 						     struct kbase_kcpu_command_queue *queue)
 {
 	char started_or_pending;
 	struct kbase_kcpu_command *cmd;
-	struct kbase_context *kctx = file->private;
 	size_t i;
 
 	if (WARN_ON(!queue))
@@ -357,72 +387,115 @@ static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
 	mutex_lock(&queue->lock);
 
 	for (i = 0; i != queue->num_pending_cmds; ++i) {
+		char buffer[CSF_SYNC_DUMP_SIZE];
+		int length = 0;
 		started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
-		seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id,
-			   started_or_pending);
+		length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ",
+				   kctx->id, queue->id, started_or_pending);
 
-		cmd = &queue->commands[queue->start_offset + i];
+		cmd = &queue->commands[(u8)(queue->start_offset + i)];
 		switch (cmd->type) {
 #if IS_ENABLED(CONFIG_SYNC_FILE)
 		case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
-			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL");
+			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd,
+									"FENCE_SIGNAL");
 			break;
 		case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
-			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT");
+			kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd,
+									"FENCE_WAIT");
 			break;
 #endif
 		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
-			kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd);
+			kbasep_csf_sync_print_kcpu_cqs_wait(kctx, buffer, &length, cmd);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_CQS_SET:
-			kbasep_csf_sync_print_kcpu_cqs_set(file, cmd);
+			kbasep_csf_sync_print_kcpu_cqs_set(kctx, buffer, &length, cmd);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
-			kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd);
+			kbasep_csf_sync_print_kcpu_cqs_wait_op(kctx, buffer, &length, cmd);
 			break;
 		case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
-			kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd);
+			kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd);
 			break;
 		default:
-			seq_puts(file, ", U, Unknown blocking command");
+			length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length,
+					   ", U, Unknown blocking command");
 			break;
 		}
 
-		seq_puts(file, "\n");
+		length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n");
+		kbasep_print(kctx, file, buffer);
 	}
 
 	mutex_unlock(&queue->lock);
 }
 
-/**
- * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info
- *
- * @file: The seq_file for printing to.
- *
- * Return: Negative error code or 0 on success.
- */
-static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file)
+int kbasep_csf_sync_kcpu_dump_locked(struct kbase_context *kctx, struct seq_file *file)
 {
-	struct kbase_context *kctx = file->private;
 	unsigned long queue_idx;
 
-	mutex_lock(&kctx->csf.kcpu_queues.lock);
-	seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id);
+	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+
+	kbasep_print(kctx, file, "KCPU queues for ctx %d:\n", kctx->id);
 
 	queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);
 
 	while (queue_idx < KBASEP_MAX_KCPU_QUEUES) {
-		kbasep_csf_sync_kcpu_debugfs_print_queue(file,
+		kbasep_csf_sync_kcpu_debugfs_print_queue(kctx, file,
 							 kctx->csf.kcpu_queues.array[queue_idx]);
 
 		queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES,
 					  queue_idx + 1);
 	}
 
+	return 0;
+}
+
+int kbasep_csf_sync_kcpu_dump(struct kbase_context *kctx, struct seq_file *file)
+{
+	mutex_lock(&kctx->csf.kcpu_queues.lock);
+	kbasep_csf_sync_kcpu_dump_locked(kctx, file);
 	mutex_unlock(&kctx->csf.kcpu_queues.lock);
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+/* GPU queue related values */
+#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
+#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
+#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
+#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
+#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
+#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
+#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
+#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
+#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+
+#define MAX_NR_GPU_CALLS (5)
+#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
+#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
+#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
+#define MOVE_DEST_MASK ((u64)0xFF << 48)
+#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
+#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
+#define SYNC_SRC0_MASK ((u64)0xFF << 40)
+#define SYNC_SRC1_MASK ((u64)0xFF << 32)
+#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
+#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
+#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
+#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
+
+/* Enumeration for types of GPU queue sync events for
+ * the purpose of dumping them through debugfs.
+ */
+enum debugfs_gpu_sync_type {
+	DEBUGFS_GPU_SYNC_WAIT,
+	DEBUGFS_GPU_SYNC_SET,
+	DEBUGFS_GPU_SYNC_ADD,
+	NUM_DEBUGFS_GPU_SYNC_TYPES
+};
+
 /**
  * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations
  *                                         from a MOVE instruction.
@@ -476,10 +549,21 @@ static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringb
 	u64 page_off = ringbuff_offset >> PAGE_SHIFT;
 	u64 offset_within_page = ringbuff_offset & ~PAGE_MASK;
 	struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
-	u64 *ringbuffer = kmap_atomic(page);
-	u64 value = ringbuffer[offset_within_page / sizeof(u64)];
+	u64 *ringbuffer = vmap(&page, 1, VM_MAP, pgprot_noncached(PAGE_KERNEL));
+	u64 value;
+
+	if (!ringbuffer) {
+		struct kbase_context *kctx = queue->kctx;
+
+		dev_err(kctx->kbdev->dev, "%s failed to map the buffer page for read a command!",
+			__func__);
+		/* Return an alternative 0 for dumpping operation*/
+		value = 0;
+	} else {
+		value = ringbuffer[offset_within_page / sizeof(u64)];
+		vunmap(ringbuffer);
+	}
 
-	kunmap_atomic(ringbuffer);
 	return value;
 }
 
@@ -559,24 +643,25 @@ static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_con
 		return;
 
 	/* 5. Print info */
-	seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle,
-		   queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P',
-		   gpu_sync_type_name[type]);
+	kbasep_print(kctx, file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id,
+		     queue->group->handle, queue->csi_index,
+		     queue->enabled && !follows_wait ? 'S' : 'P', gpu_sync_type_name[type]);
 
 	if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID)
-		seq_puts(file, "slot:-");
+		kbasep_print(kctx, file, "slot:-");
 	else
-		seq_printf(file, "slot:%d", (int)queue->group->csg_nr);
+		kbasep_print(kctx, file, "slot:%d", (int)queue->group->csg_nr);
 
-	seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
+	kbasep_print(kctx, file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
 
 	if (type == DEBUGFS_GPU_SYNC_WAIT) {
 		wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd);
-		seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition));
+		kbasep_print(kctx, file, "op:%s ",
+			     kbasep_csf_sync_get_wait_op_name(wait_condition));
 	} else
-		seq_printf(file, "op:%s ", gpu_sync_type_op[type]);
+		kbasep_print(kctx, file, "op:%s ", gpu_sync_type_op[type]);
 
-	seq_printf(file, "arg_value:0x%.16llx\n", compare_val);
+	kbasep_print(kctx, file, "arg_value:0x%.16llx\n", compare_val);
 }
 
 /**
@@ -595,7 +680,7 @@ static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_con
 static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue)
 {
 	struct kbase_context *kctx;
-	u32 *addr;
+	u64 *addr;
 	u64 cs_extract, cs_insert, instr, cursor;
 	bool follows_wait = false;
 	int nr_calls = 0;
@@ -605,11 +690,11 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
 
 	kctx = queue->kctx;
 
-	addr = (u32 *)queue->user_io_addr;
-	cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32);
+	addr = queue->user_io_addr;
+	cs_insert = addr[CS_INSERT_LO / sizeof(*addr)];
 
-	addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
-	cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32);
+	addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr);
+	cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)];
 
 	cursor = cs_extract;
 
@@ -637,6 +722,7 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
 		case GPU_CSF_SYNC_SET64_OPCODE:
 		case GPU_CSF_SYNC_WAIT64_OPCODE:
 			instr_is_64_bit = true;
+			break;
 		default:
 			break;
 		}
@@ -663,7 +749,7 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
 			break;
 		case GPU_CSF_CALL_OPCODE:
 			nr_calls++;
-			/* Fallthrough */
+			break;
 		default:
 			/* Unrecognized command, skip past it */
 			break;
@@ -677,36 +763,37 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
  * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of
  *                                             the provided queue group.
  *
+ * @kctx:  The kbase context
  * @file:  seq_file for printing to.
  * @group: Address of a GPU command group to iterate through.
  *
  * This function will iterate through each queue in the provided GPU queue group and
  * print its SYNC related commands.
  */
-static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file,
+static void kbasep_csf_dump_active_group_sync_state(struct kbase_context *kctx,
+						    struct seq_file *file,
 						    struct kbase_queue_group *const group)
 {
-	struct kbase_context *kctx = file->private;
 	unsigned int i;
 
-	seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
-		   group->csg_nr, kctx->tgid, kctx->id);
+	kbasep_print(kctx, file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
+		     group->csg_nr, kctx->tgid, kctx->id);
 
 	for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
 		kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]);
 }
 
 /**
- * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info
+ * kbasep_csf_sync_gpu_dump() - Print CSF GPU queue sync info
  *
+ * @kctx: The kbase context
  * @file: The seq_file for printing to.
  *
  * Return: Negative error code or 0 on success.
  */
-static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
+static int kbasep_csf_sync_gpu_dump(struct kbase_context *kctx, struct seq_file *file)
 {
 	u32 gr;
-	struct kbase_context *kctx = file->private;
 	struct kbase_device *kbdev;
 
 	if (WARN_ON(!kctx))
@@ -721,7 +808,7 @@ static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
 			kbdev->csf.scheduler.csg_slots[gr].resident_group;
 		if (!group || group->kctx != kctx)
 			continue;
-		kbasep_csf_dump_active_group_sync_state(file, group);
+		kbasep_csf_dump_active_group_sync_state(kctx, file, group);
 	}
 
 	kbase_csf_scheduler_unlock(kbdev);
@@ -738,10 +825,13 @@ static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
  */
 static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data)
 {
-	seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION);
+	struct kbase_context *kctx = file->private;
+
+	kbasep_print(kctx, file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n",
+		     MALI_CSF_SYNC_DEBUGFS_VERSION);
 
-	kbasep_csf_sync_kcpu_debugfs_show(file);
-	kbasep_csf_sync_gpu_debugfs_show(file);
+	kbasep_csf_sync_kcpu_dump(kctx, file);
+	kbasep_csf_sync_gpu_dump(kctx, file);
 	return 0;
 }
 
diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
index 177e15d..2fe5060 100644
--- a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
 #ifndef _KBASE_CSF_SYNC_DEBUGFS_H_
 #define _KBASE_CSF_SYNC_DEBUGFS_H_
 
+#include <linux/seq_file.h>
+
 /* Forward declaration */
 struct kbase_context;
 
@@ -34,4 +36,27 @@ struct kbase_context;
  */
 void kbase_csf_sync_debugfs_init(struct kbase_context *kctx);
 
+/**
+ * kbasep_csf_sync_kcpu_dump() - Print CSF KCPU queue sync info
+ *
+ * @kctx: The kbase context.
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ *
+ * Note: This function should not be used if kcpu_queues.lock is held. Use
+ * kbasep_csf_sync_kcpu_dump_locked() instead.
+ */
+int kbasep_csf_sync_kcpu_dump(struct kbase_context *kctx, struct seq_file *file);
+
+/**
+ * kbasep_csf_sync_kcpu_dump() - Print CSF KCPU queue sync info
+ *
+ * @kctx: The kbase context.
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+int kbasep_csf_sync_kcpu_dump_locked(struct kbase_context *kctx, struct seq_file *file);
+
 #endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 8072a8b..85d8018 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -362,7 +362,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
 	/* If page migration is enabled, we don't want to migrate tiler heap pages.
 	 * This does not change if the constituent pages are already marked as isolated.
 	 */
-	if (kbase_page_migration_enabled)
+	if (kbase_is_page_migration_enabled())
 		kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
 
 	return chunk;
@@ -748,7 +748,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
 					  KBASE_REG_CPU_RD, &heap->buf_desc_map,
 					  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
 
-		if (kbase_page_migration_enabled)
+		if (kbase_is_page_migration_enabled())
 			kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
 
 		kbase_gpu_vm_unlock(kctx);
diff --git a/mali_kbase/csf/mali_kbase_csf_timeout.c b/mali_kbase/csf/mali_kbase_csf_timeout.c
index ea6c116..f7fcbb1 100644
--- a/mali_kbase/csf/mali_kbase_csf_timeout.c
+++ b/mali_kbase/csf/mali_kbase_csf_timeout.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,7 @@ static int set_timeout(struct kbase_device *const kbdev, u64 const timeout)
 	dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout);
 
 	atomic64_set(&kbdev->csf.progress_timeout, timeout);
+	kbase_device_set_timeout(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, timeout, 1);
 
 	return 0;
 }
@@ -100,7 +101,7 @@ static ssize_t progress_timeout_store(struct device * const dev,
 	if (!err) {
 		kbase_csf_scheduler_pm_active(kbdev);
 
-		err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+		err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev);
 		if (!err)
 			err = kbase_csf_firmware_set_timeout(kbdev, timeout);
 
@@ -147,8 +148,14 @@ int kbase_csf_timeout_init(struct kbase_device *const kbdev)
 	int err;
 
 #if IS_ENABLED(CONFIG_OF)
-	err = of_property_read_u64(kbdev->dev->of_node,
-		"progress_timeout", &timeout);
+	/* Read "progress-timeout" property and fallback to "progress_timeout"
+	 * if not found.
+	 */
+	err = of_property_read_u64(kbdev->dev->of_node, "progress-timeout", &timeout);
+
+	if (err == -EINVAL)
+		err = of_property_read_u64(kbdev->dev->of_node, "progress_timeout", &timeout);
+
 	if (!err)
 		dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n",
 			timeout);
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 910ba22..ce50683 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -39,8 +39,6 @@
 #include <linux/version_compat_defs.h>
 #endif
 
-/* Name of the CSFFW timeline tracebuffer. */
-#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline"
 /* Name of the timeline header metatadata */
 #define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header"
 
@@ -299,16 +297,13 @@ static int tl_reader_init_late(
 	if (self->kbdev)
 		return 0;
 
-	tb = kbase_csf_firmware_get_trace_buffer(
-		kbdev, KBASE_CSFFW_TRACEBUFFER_NAME);
+	tb = kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_TIMELINE_BUF_NAME);
 	hdr = kbase_csf_firmware_get_timeline_metadata(
 		kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size);
 
 	if (!tb) {
-		dev_warn(
-			kbdev->dev,
-			"'%s' tracebuffer is not present in the firmware image.",
-			KBASE_CSFFW_TRACEBUFFER_NAME);
+		dev_warn(kbdev->dev, "'%s' tracebuffer is not present in the firmware image.",
+			 KBASE_CSFFW_TIMELINE_BUF_NAME);
 		return -1;
 	}
 
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index 9ce6776..2b63f19 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -89,7 +89,7 @@ struct firmware_trace_buffer {
 	} cpu_va;
 	u32 num_pages;
 	u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX];
-	char name[1]; /* this field must be last */
+	char name[]; /* this field must be last */
 };
 
 /**
@@ -118,16 +118,19 @@ struct firmware_trace_buffer_data {
  */
 static const struct firmware_trace_buffer_data trace_buffer_data[] = {
 #if MALI_UNIT_TEST
-	{ "fwutf", { 0 }, 1 },
+	{ KBASE_CSFFW_UTF_BUF_NAME, { 0 }, 1 },
 #endif
 #ifdef CONFIG_MALI_PIXEL_GPU_SSCD
 	/* Enable all the logs */
-	{ FIRMWARE_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES },
+	{ KBASE_CSFFW_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES },
 #else
-	{ FIRMWARE_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES },
+	{ KBASE_CSFFW_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES },
 #endif /* CONFIG_MALI_PIXEL_GPU_SSCD */
-	{ "benchmark", { 0 }, 2 },
-	{ "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
+	{ KBASE_CSFFW_BENCHMARK_BUF_NAME, { 0 }, 2 },
+	{ KBASE_CSFFW_TIMELINE_BUF_NAME, { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	{ KBASE_CSFFW_GPU_METRICS_BUF_NAME, { 0 }, 8 },
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 };
 
 int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
@@ -265,7 +268,7 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
 	 * trace buffer name (with NULL termination).
 	 */
 	trace_buffer =
-		kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL);
+		kmalloc(struct_size(trace_buffer, name, name_len + 1), GFP_KERNEL);
 
 	if (!trace_buffer)
 		return -ENOMEM;
@@ -512,6 +515,37 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(
 }
 EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data);
 
+void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer)
+{
+	unsigned int bytes_discarded;
+	u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT;
+	u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va);
+	u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va);
+	unsigned int trace_size;
+
+	if (insert_offset >= extract_offset) {
+		trace_size = insert_offset - extract_offset;
+		if (trace_size > buffer_size / 2) {
+			bytes_discarded = trace_size - buffer_size / 2;
+			extract_offset += bytes_discarded;
+			*(trace_buffer->cpu_va.extract_cpu_va) = extract_offset;
+		}
+	} else {
+		unsigned int bytes_tail;
+
+		bytes_tail = buffer_size - extract_offset;
+		trace_size = bytes_tail + insert_offset;
+		if (trace_size > buffer_size / 2) {
+			bytes_discarded = trace_size - buffer_size / 2;
+			extract_offset += bytes_discarded;
+			if (extract_offset >= buffer_size)
+				extract_offset = extract_offset - buffer_size;
+			*(trace_buffer->cpu_va.extract_cpu_va) = extract_offset;
+		}
+	}
+}
+EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard);
+
 static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
 {
 	unsigned int i;
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
index 037dc22..c0a42ca 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,8 +25,16 @@
 #include <linux/types.h>
 
 #define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4)
-#define FIRMWARE_LOG_BUF_NAME "fwlog"
 #define FW_TRACE_BUF_NR_PAGES 4
+#if MALI_UNIT_TEST
+#define KBASE_CSFFW_UTF_BUF_NAME "fwutf"
+#endif
+#define KBASE_CSFFW_LOG_BUF_NAME "fwlog"
+#define KBASE_CSFFW_BENCHMARK_BUF_NAME "benchmark"
+#define KBASE_CSFFW_TIMELINE_BUF_NAME "timeline"
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#define KBASE_CSFFW_GPU_METRICS_BUF_NAME "gpu_metrics"
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
 
 /* Forward declarations */
 struct firmware_trace_buffer;
@@ -117,7 +125,8 @@ struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer(
 	struct kbase_device *kbdev, const char *name);
 
 /**
- * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer
+ * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits
+ *                                                               for a trace buffer
  *
  * @trace_buffer: Trace buffer handle
  *
@@ -167,6 +176,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(
 	struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes);
 
 /**
+ * kbase_csf_firmware_trace_buffer_discard - Discard data from a trace buffer
+ *
+ * @trace_buffer: Trace buffer handle
+ *
+ * Discard part of the data in the trace buffer to reduce its utilization to half of its size.
+ */
+void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer);
+
+/**
  * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask
  *
  * @tb: Trace buffer handle
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 492684f..571761f 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -34,13 +34,16 @@
 #include <mali_kbase.h>
 #include <backend/gpu/mali_kbase_irq_internal.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
-#include <backend/gpu/mali_kbase_js_internal.h>
 #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
 #include <csf/mali_kbase_csf_csg_debugfs.h>
+#include <csf/mali_kbase_csf_kcpu_fence_debugfs.h>
 #include <hwcnt/mali_kbase_hwcnt_virtualizer.h>
 #include <mali_kbase_kinstr_prfcnt.h>
 #include <mali_kbase_vinstr.h>
 #include <tl/mali_kbase_timeline.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#endif
 
 /**
  * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC
@@ -84,10 +87,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_pm_powerup;
 
-	err = kbase_backend_timer_init(kbdev);
-	if (err)
-		goto fail_timer;
-
 #ifdef CONFIG_MALI_DEBUG
 #if IS_ENABLED(CONFIG_MALI_REAL_HW)
 	if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
@@ -123,10 +122,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_update_l2_features;
 
-	err = kbase_backend_time_init(kbdev);
-	if (err)
-		goto fail_update_l2_features;
-
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
 
 	kbase_pm_context_idle(kbdev);
@@ -148,8 +143,6 @@ fail_interrupt_test:
 #endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #endif /* CONFIG_MALI_DEBUG */
 
-	kbase_backend_timer_term(kbdev);
-fail_timer:
 	kbase_pm_context_idle(kbdev);
 	kbase_hwaccess_pm_halt(kbdev);
 fail_pm_powerup:
@@ -285,20 +278,21 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
 
 static const struct kbase_device_init dev_init[] = {
 #if !IS_ENABLED(CONFIG_MALI_REAL_HW)
-	{ kbase_gpu_device_create, kbase_gpu_device_destroy,
-	  "Dummy model initialization failed" },
+	{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
 #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 	{ assign_irqs, NULL, "IRQ search failed" },
 #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
 #if !IS_ENABLED(CONFIG_MALI_NO_MALI)
 	{ registers_map, registers_unmap, "Register map failed" },
 #endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	{ kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
+#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
 	{ power_control_init, power_control_term, "Power control initialization failed" },
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
 	{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
-	{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
-	{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
+	{ kbase_backend_time_init, NULL, "Time backend initialization failed" },
 	{ kbase_device_misc_init, kbase_device_misc_term,
 	  "Miscellaneous device initialization failed" },
 	{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -330,6 +324,8 @@ static const struct kbase_device_init dev_init[] = {
 	{ kbase_debug_csf_fault_init, kbase_debug_csf_fault_term,
 	  "CSF fault debug initialization failed" },
 	{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
+	{ kbase_csf_fence_timer_debugfs_init, kbase_csf_fence_timer_debugfs_term,
+	  "Fence timeout DebugFS initialization failed" },
 	/* Sysfs init needs to happen before registering the device with
 	 * misc_register(), otherwise it causes a race condition between
 	 * registering the device and a uevent event being generated for
@@ -522,4 +518,4 @@ out:
 
 	return ret;
 }
-KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once);
+KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once);
+\ No newline at end of file
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index 5e27094..c837f5a 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev)
 {
 	const u32 status = kbase_reg_read(kbdev,
 			GPU_CONTROL_REG(GPU_FAULTSTATUS));
-	const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG;
+	const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK;
 	const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >>
 			GPU_FAULTSTATUS_JASID_SHIFT;
 	bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) ==
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
index 38223af..8f7b39b 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -124,9 +124,10 @@ KBASE_EXPORT_TEST_API(kbase_reg_write);
 
 u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
 {
-	u32 val;
+	u32 val = 0;
 
-	WARN_ON(!kbdev->pm.backend.gpu_powered);
+	if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+		return val;
 
 	val = readl(kbdev->reg + offset);
 
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 14b5602..89635b5 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -45,6 +45,9 @@
 #include <backend/gpu/mali_kbase_pm_internal.h>
 #include <mali_kbase_dummy_job_wa.h>
 #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#endif
 
 /**
  * kbase_backend_late_init - Perform any backend-specific initialization.
@@ -102,10 +105,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
 	if (err)
 		goto fail_update_l2_features;
 
-	err = kbase_backend_time_init(kbdev);
-	if (err)
-		goto fail_update_l2_features;
-
 	init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
 
 	/* Idle the GPU and/or cores, if the policy wants it to */
@@ -224,12 +223,14 @@ static const struct kbase_device_init dev_init[] = {
 #if !IS_ENABLED(CONFIG_MALI_NO_MALI)
 	{ registers_map, registers_unmap, "Register map failed" },
 #endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	{ kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
+#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
 	{ kbase_device_io_history_init, kbase_device_io_history_term,
 	  "Register access history initialization failed" },
 	{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
 	{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
-	{ kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
-	{ kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
+	{ kbase_backend_time_init, NULL, "Time backend initialization failed" },
 	{ kbase_device_misc_init, kbase_device_misc_term,
 	  "Miscellaneous device initialization failed" },
 	{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -363,4 +364,4 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev)
 	mutex_unlock(&kbdev->fw_load_lock);
 
 	return ret;
-}
+}
+\ No newline at end of file
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index e90e791..e5b3e2b 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -230,11 +230,14 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 	kbdev->cci_snoop_enabled = false;
 	np = kbdev->dev->of_node;
 	if (np != NULL) {
-		if (of_property_read_u32(np, "snoop_enable_smc",
-					&kbdev->snoop_enable_smc))
+		/* Read "-" versions of the properties and fallback to "_"
+		 * if these are not found
+		 */
+		if (of_property_read_u32(np, "snoop-enable-smc", &kbdev->snoop_enable_smc) &&
+		    of_property_read_u32(np, "snoop_enable_smc", &kbdev->snoop_enable_smc))
 			kbdev->snoop_enable_smc = 0;
-		if (of_property_read_u32(np, "snoop_disable_smc",
-					&kbdev->snoop_disable_smc))
+		if (of_property_read_u32(np, "snoop-disable-smc", &kbdev->snoop_disable_smc) &&
+		    of_property_read_u32(np, "snoop_disable_smc", &kbdev->snoop_disable_smc))
 			kbdev->snoop_disable_smc = 0;
 		/* Either both or none of the calls should be provided. */
 		if (!((kbdev->snoop_disable_smc == 0
@@ -306,13 +309,13 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 	kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
 
 #if MALI_USE_CSF
-	kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
-#else
+	kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT);
+#else /* MALI_USE_CSF */
 	kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS;
-#endif /* MALI_USE_CSF */
+#endif /* !MALI_USE_CSF */
 
 	kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
-	kbdev->mmu_as_inactive_wait_time_ms =
+	kbdev->mmu_or_gpu_cache_op_wait_time_ms =
 		kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT);
 	mutex_init(&kbdev->kctx_list_lock);
 	INIT_LIST_HEAD(&kbdev->kctx_list);
@@ -327,9 +330,13 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
 		kbdev->oom_notifier_block.notifier_call = NULL;
 	}
 
-#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+#if MALI_USE_CSF
+#if IS_ENABLED(CONFIG_SYNC_FILE)
 	atomic_set(&kbdev->live_fence_metadata, 0);
+#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */
+	atomic_set(&kbdev->fence_signal_timeout_enabled, 1);
 #endif
+
 	return 0;
 
 term_as:
@@ -367,8 +374,7 @@ void kbase_device_free(struct kbase_device *kbdev)
 
 void kbase_device_id_init(struct kbase_device *kbdev)
 {
-	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
-			kbase_dev_nr);
+	scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", KBASE_DRV_NAME, kbase_dev_nr);
 	kbdev->id = kbase_dev_nr;
 }
 
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index f025011..e9cb5c2 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -191,6 +191,7 @@ void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev);
  * called from paths (like GPU reset) where an indefinite wait for the
  * completion of cache clean operation can cause deadlock, as the operation may
  * never complete.
+ * If cache clean times out, reset GPU to recover.
  *
  * Return: 0 if successful or a negative error code on failure.
  */
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index 8b4588e..8126b9b 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,34 +27,47 @@
 #include <mali_kbase_reset_gpu.h>
 #include <mmu/mali_kbase_mmu.h>
 
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
 bool kbase_is_gpu_removed(struct kbase_device *kbdev)
 {
-	u32 val;
+	if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT))
+		return false;
 
-	val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
-
-	return val == 0;
+	return (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)) == 0);
 }
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
 
-static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
+/**
+ * busy_wait_cache_operation - Wait for a pending cache flush to complete
+ *
+ * @kbdev:   Pointer of kbase device.
+ * @irq_bit: IRQ bit cache flush operation to wait on.
+ *
+ * It will reset GPU if the wait fails.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int busy_wait_cache_operation(struct kbase_device *kbdev, u32 irq_bit)
 {
-	char *irq_flag_name;
-	/* Previously MMU-AS command was used for L2 cache flush on page-table update.
-	 * And we're using the same max-loops count for GPU command, because amount of
-	 * L2 cache flush overhead are same between them.
-	 */
-	unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+	const ktime_t wait_loop_start = ktime_get_raw();
+	const u32 wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms;
+	bool completed = false;
+	s64 diff;
+
+	do {
+		unsigned int i;
+
+		for (i = 0; i < 1000; i++) {
+			if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit) {
+				completed = true;
+				break;
+			}
+		}
 
-	/* Wait for the GPU cache clean operation to complete */
-	while (--max_loops &&
-	       !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) {
-		;
-	}
+		diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+	} while ((diff < wait_time_ms) && !completed);
+
+	if (!completed) {
+		char *irq_flag_name;
 
-	/* reset gpu if time-out occurred */
-	if (max_loops == 0) {
 		switch (irq_bit) {
 		case CLEAN_CACHES_COMPLETED:
 			irq_flag_name = "CLEAN_CACHES_COMPLETED";
@@ -68,15 +81,15 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
 		}
 
 		dev_err(kbdev->dev,
-			"Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n",
+			"Stuck waiting on %s bit, might be due to unstable GPU clk/pwr or possible faulty FPGA connector\n",
 			irq_flag_name);
 
 		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
 			kbase_reset_gpu_locked(kbdev);
+
 		return -EBUSY;
 	}
 
-	/* Clear the interrupt bit. */
 	KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit);
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit);
 
@@ -110,7 +123,7 @@ int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phy
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
 
 	/* 3. Busy-wait irq status to be enabled. */
-	ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED);
+	ret = busy_wait_cache_operation(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED);
 
 	return ret;
 }
@@ -143,7 +156,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
 				irq_mask & ~CLEAN_CACHES_COMPLETED);
 
 		/* busy wait irq status to be enabled */
-		ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
+		ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED);
 		if (ret)
 			return ret;
 
@@ -164,7 +177,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
 
 	/* 3. Busy-wait irq status to be enabled. */
-	ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
+	ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED);
 	if (ret)
 		return ret;
 
@@ -271,8 +284,9 @@ static inline bool get_cache_clean_flag(struct kbase_device *kbdev)
 void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev)
 {
 	while (get_cache_clean_flag(kbdev)) {
-		wait_event_interruptible(kbdev->cache_clean_wait,
-				!kbdev->cache_clean_in_progress);
+		if (wait_event_interruptible(kbdev->cache_clean_wait,
+					     !kbdev->cache_clean_in_progress))
+			dev_warn(kbdev->dev, "Wait for cache clean is interrupted");
 	}
 }
 
@@ -280,6 +294,7 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev,
 				unsigned int wait_timeout_ms)
 {
 	long remaining = msecs_to_jiffies(wait_timeout_ms);
+	int result = 0;
 
 	while (remaining && get_cache_clean_flag(kbdev)) {
 		remaining = wait_event_timeout(kbdev->cache_clean_wait,
@@ -287,5 +302,15 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev,
 					remaining);
 	}
 
-	return (remaining ? 0 : -ETIMEDOUT);
+	if (!remaining) {
+		dev_err(kbdev->dev,
+			"Cache clean timed out. Might be caused by unstable GPU clk/pwr or faulty system");
+
+		if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+			kbase_reset_gpu_locked(kbdev);
+
+		result = -ETIMEDOUT;
+	}
+
+	return result;
 }
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
index e7457dd..ab989e0 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,17 @@
 #error "Cannot be compiled with JM"
 #endif
 
+/* GPU control registers */
+#define MCU_CONTROL 0x700
+
+#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12)
+#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT)
+#define L2_CONFIG_PBHA_HWU_GET(reg_val)                                                            \
+	(((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT)
+#define L2_CONFIG_PBHA_HWU_SET(reg_val, value)                                                     \
+	(((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) |                                                  \
+	 (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK))
+
 /* GPU_CONTROL_MCU base address */
 #define GPU_CONTROL_MCU_BASE 0x3000
 
@@ -35,35 +46,39 @@
 #define MCU_SUBSYSTEM_BASE 0x20000
 
 /* IPA control registers */
-#define COMMAND                0x000 /* (WO) Command register */
-#define TIMER                  0x008 /* (RW) Timer control register */
-
-#define SELECT_CSHW_LO         0x010 /* (RW) Counter select for CS hardware, low word */
-#define SELECT_CSHW_HI         0x014 /* (RW) Counter select for CS hardware, high word */
-#define SELECT_MEMSYS_LO       0x018 /* (RW) Counter select for Memory system, low word */
-#define SELECT_MEMSYS_HI       0x01C /* (RW) Counter select for Memory system, high word */
-#define SELECT_TILER_LO        0x020 /* (RW) Counter select for Tiler cores, low word */
-#define SELECT_TILER_HI        0x024 /* (RW) Counter select for Tiler cores, high word */
-#define SELECT_SHADER_LO       0x028 /* (RW) Counter select for Shader cores, low word */
-#define SELECT_SHADER_HI       0x02C /* (RW) Counter select for Shader cores, high word */
+#define IPA_CONTROL_BASE        0x40000
+#define IPA_CONTROL_REG(r)      (IPA_CONTROL_BASE + (r))
+
+#define COMMAND                 0x000 /* (WO) Command register */
+#define STATUS                  0x004 /* (RO) Status register */
+#define TIMER                   0x008 /* (RW) Timer control register */
+
+#define SELECT_CSHW_LO          0x010 /* (RW) Counter select for CS hardware, low word */
+#define SELECT_CSHW_HI          0x014 /* (RW) Counter select for CS hardware, high word */
+#define SELECT_MEMSYS_LO        0x018 /* (RW) Counter select for Memory system, low word */
+#define SELECT_MEMSYS_HI        0x01C /* (RW) Counter select for Memory system, high word */
+#define SELECT_TILER_LO         0x020 /* (RW) Counter select for Tiler cores, low word */
+#define SELECT_TILER_HI         0x024 /* (RW) Counter select for Tiler cores, high word */
+#define SELECT_SHADER_LO        0x028 /* (RW) Counter select for Shader cores, low word */
+#define SELECT_SHADER_HI        0x02C /* (RW) Counter select for Shader cores, high word */
 
 /* Accumulated counter values for CS hardware */
-#define VALUE_CSHW_BASE        0x100
-#define VALUE_CSHW_REG_LO(n)   (VALUE_CSHW_BASE + ((n) << 3))       /* (RO) Counter value #n, low word */
-#define VALUE_CSHW_REG_HI(n)   (VALUE_CSHW_BASE + ((n) << 3) + 4)   /* (RO) Counter value #n, high word */
+#define VALUE_CSHW_BASE         0x100
+#define VALUE_CSHW_REG_LO(n)    (VALUE_CSHW_BASE + ((n) << 3))       /* (RO) Counter value #n, low word */
+#define VALUE_CSHW_REG_HI(n)    (VALUE_CSHW_BASE + ((n) << 3) + 4)   /* (RO) Counter value #n, high word */
 
 /* Accumulated counter values for memory system */
-#define VALUE_MEMSYS_BASE      0x140
-#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
-#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+#define VALUE_MEMSYS_BASE       0x140
+#define VALUE_MEMSYS_REG_LO(n)  (VALUE_MEMSYS_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_MEMSYS_REG_HI(n)  (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
 
-#define VALUE_TILER_BASE       0x180
-#define VALUE_TILER_REG_LO(n)  (VALUE_TILER_BASE + ((n) << 3))      /* (RO) Counter value #n, low word */
-#define VALUE_TILER_REG_HI(n)  (VALUE_TILER_BASE + ((n) << 3) + 4)  /* (RO) Counter value #n, high word */
+#define VALUE_TILER_BASE        0x180
+#define VALUE_TILER_REG_LO(n)   (VALUE_TILER_BASE + ((n) << 3))      /* (RO) Counter value #n, low word */
+#define VALUE_TILER_REG_HI(n)   (VALUE_TILER_BASE + ((n) << 3) + 4)  /* (RO) Counter value #n, high word */
 
-#define VALUE_SHADER_BASE      0x1C0
-#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
-#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+#define VALUE_SHADER_BASE       0x1C0
+#define VALUE_SHADER_REG_LO(n)  (VALUE_SHADER_BASE + ((n) << 3))     /* (RO) Counter value #n, low word */
+#define VALUE_SHADER_REG_HI(n)  (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
 
 #define AS_STATUS_AS_ACTIVE_INT 0x2
 
@@ -112,7 +127,6 @@
 
 /* GPU control registers */
 #define CORE_FEATURES           0x008   /* () Shader Core Features */
-#define MCU_CONTROL             0x700
 #define MCU_STATUS              0x704
 
 #define MCU_CNTRL_ENABLE        (1 << 0)
@@ -122,15 +136,7 @@
 #define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31)
 #define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT)
 
-#define MCU_STATUS_HALTED        (1 << 1)
-
-#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12)
-#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT)
-#define L2_CONFIG_PBHA_HWU_GET(reg_val)                                                            \
-	(((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT)
-#define L2_CONFIG_PBHA_HWU_SET(reg_val, value)                                                     \
-	(((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) |                                                  \
-	 (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK))
+#define MCU_STATUS_HALTED       (1 << 1)
 
 /* JOB IRQ flags */
 #define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */
@@ -292,13 +298,13 @@
 #define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \
 	(0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT)
 
-#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10
-#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \
-	(1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK \
+	(GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
 
-#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11
-#define GPU_FAULTSTATUS_JASID_VALID_FLAG \
-	(1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT)
+#define GPU_FAULTSTATUS_JASID_VALID_SHIFT GPU_U(11)
+#define GPU_FAULTSTATUS_JASID_VALID_MASK \
+	(GPU_U(0x1) << GPU_FAULTSTATUS_JASID_VALID_SHIFT)
 
 #define GPU_FAULTSTATUS_JASID_SHIFT 12
 #define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT)
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
index f86f493..387cd50 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -59,28 +59,27 @@
 
 #define CORE_FEATURES           0x008   /* (RO) Shader Core Features */
 #define JS_PRESENT              0x01C   /* (RO) Job slots present */
-
-#define PRFCNT_BASE_LO   0x060  /* (RW) Performance counter memory
-				 * region base address, low word
-				 */
-#define PRFCNT_BASE_HI   0x064  /* (RW) Performance counter memory
-				 * region base address, high word
-				 */
-#define PRFCNT_CONFIG    0x068  /* (RW) Performance counter
-				 * configuration
-				 */
-#define PRFCNT_JM_EN     0x06C  /* (RW) Performance counter enable
-				 * flags for Job Manager
-				 */
-#define PRFCNT_SHADER_EN 0x070  /* (RW) Performance counter enable
-				 * flags for shader cores
-				 */
-#define PRFCNT_TILER_EN  0x074  /* (RW) Performance counter enable
-				 * flags for tiler
-				 */
-#define PRFCNT_MMU_L2_EN 0x07C  /* (RW) Performance counter enable
-				 * flags for MMU/L2 cache
-				 */
+#define LATEST_FLUSH            0x038   /* (RO) Flush ID of latest
+                                         * clean-and-invalidate operation
+                                         */
+#define PRFCNT_BASE_LO          0x060   /* (RW) Performance counter memory
+                                         * region base address, low word
+                                         */
+#define PRFCNT_BASE_HI          0x064   /* (RW) Performance counter memory
+                                         * region base address, high word
+                                         */
+#define PRFCNT_CONFIG           0x068   /* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN            0x06C   /* (RW) Performance counter enable
+                                         * flags for Job Manager
+                                         */
+#define PRFCNT_SHADER_EN        0x070   /* (RW) Performance counter enable
+                                         * flags for shader cores */
+#define PRFCNT_TILER_EN         0x074   /* (RW) Performance counter enable
+                                         * flags for tiler
+                                         */
+#define PRFCNT_MMU_L2_EN        0x07C   /* (RW) Performance counter enable
+                                         * flags for MMU/L2 cache
+                                         */
 
 #define JS0_FEATURES            0x0C0   /* (RO) Features of job slot 0 */
 #define JS1_FEATURES            0x0C4   /* (RO) Features of job slot 1 */
@@ -108,6 +107,8 @@
 #define JOB_IRQ_JS_STATE        0x010   /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
 #define JOB_IRQ_THROTTLE        0x014   /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt.  */
 
+#define JOB_SLOT0               0x800   /* Configuration registers for job slot 0 */
+#define JOB_SLOT_REG(n, r)      (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
 #define JOB_SLOT1               0x880   /* Configuration registers for job slot 1 */
 #define JOB_SLOT2               0x900   /* Configuration registers for job slot 2 */
 #define JOB_SLOT3               0x980   /* Configuration registers for job slot 3 */
@@ -124,29 +125,41 @@
 #define JOB_SLOT14              0xF00   /* Configuration registers for job slot 14 */
 #define JOB_SLOT15              0xF80   /* Configuration registers for job slot 15 */
 
-#define JS_XAFFINITY           0x1C /* (RO) Extended affinity mask for job slot n*/
-
-#define JS_COMMAND             0x20	/* (WO) Command register for job slot n */
-#define JS_STATUS              0x24	/* (RO) Status register for job slot n */
-
-#define JS_XAFFINITY_NEXT      0x5C /* (RW) Next extended affinity mask for job slot n */
-
-#define JS_FLUSH_ID_NEXT       0x70	/* (RW) Next job slot n cache flush ID */
+/* JM Job control register definitions for mali_kbase_debug_job_fault */
+#define JS_HEAD_LO              0x00    /* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI              0x04    /* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO              0x08    /* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI              0x0C    /* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO          0x10    /* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI          0x14    /* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG               0x18    /* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY            0x1C    /* (RO) Extended affinity mask for job slot n*/
+#define JS_COMMAND              0x20	/* (WO) Command register for job slot n */
+#define JS_STATUS               0x24    /* (RO) Status register for job slot n */
+#define JS_HEAD_NEXT_LO         0x40    /* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI         0x44    /* (RW) Next job queue head pointer for job slot n, high word */
+#define JS_AFFINITY_NEXT_LO     0x50    /* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI     0x54    /* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT          0x58    /* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT       0x5C    /* (RW) Next extended affinity mask for job slot n */
+#define JS_COMMAND_NEXT         0x60    /* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT        0x70    /* (RW) Next job slot n cache flush ID */
 
 /* No JM-specific MMU control registers */
 /* No JM-specific MMU address space control registers */
 
 /* JS_COMMAND register commands */
-#define JS_COMMAND_NOP         0x00	/* NOP Operation. Writing this value is ignored */
-#define JS_COMMAND_START       0x01	/* Start processing a job chain. Writing this value is ignored */
-#define JS_COMMAND_SOFT_STOP   0x02	/* Gently stop processing a job chain */
-#define JS_COMMAND_HARD_STOP   0x03	/* Rudely stop processing a job chain */
-#define JS_COMMAND_SOFT_STOP_0 0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_HARD_STOP_0 0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_SOFT_STOP_1 0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
-#define JS_COMMAND_HARD_STOP_1 0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
-
-#define JS_COMMAND_MASK        0x07    /* Mask of bits currently in use by the HW */
+#define JS_COMMAND_NOP          0x00	/* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START        0x01	/* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP    0x02	/* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP    0x03	/* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0  0x04	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0  0x05	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1  0x06	/* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1  0x07	/* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK         0x07    /* Mask of bits currently in use by the HW */
 
 /* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
 #define JS_CONFIG_START_FLUSH_NO_ACTION        (0u << 0)
diff --git a/mali_kbase/gpu/mali_kbase_gpu.c b/mali_kbase/gpu/mali_kbase_gpu.c
index 8a84ef5..eee670f 100644
--- a/mali_kbase/gpu/mali_kbase_gpu.c
+++ b/mali_kbase/gpu/mali_kbase_gpu.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,7 @@ const char *kbase_gpu_access_type_name(u32 fault_status)
 		return "READ";
 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
 		return "WRITE";
-	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+	case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE:
 		return "EXECUTE";
 	default:
 		WARN_ON(1);
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index e51791f..a92b498 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,7 @@
 #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h>
 #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h>
 #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h>
+
 #if MALI_USE_CSF
 #include "backend/mali_kbase_gpu_regmap_csf.h"
 #else
@@ -42,19 +43,29 @@
 #define GPU_ULL(x) x##ull
 #endif /* __ASSEMBLER__ */
 
+
 /* Begin Register Offsets */
 /* GPU control registers */
 
+#define GPU_CONTROL_BASE        0x0000
+#define GPU_CONTROL_REG(r)      (GPU_CONTROL_BASE + (r))
+
+#define GPU_ID                  0x000   /* (RO) GPU and revision identifier */
 #define L2_FEATURES             0x004   /* (RO) Level 2 cache features */
 #define TILER_FEATURES          0x00C   /* (RO) Tiler Features */
 #define MEM_FEATURES            0x010   /* (RO) Memory system features */
 #define MMU_FEATURES            0x014   /* (RO) MMU features */
 #define AS_PRESENT              0x018   /* (RO) Address space slots present */
 #define GPU_IRQ_RAWSTAT         0x020   /* (RW) */
+#define GPU_IRQ_CLEAR           0x024   /* (WO) */
 #define GPU_IRQ_MASK            0x028   /* (RW) */
-
+#define GPU_IRQ_STATUS          0x02C   /* (RO) */
 #define GPU_COMMAND             0x030   /* (WO) */
+
 #define GPU_STATUS              0x034   /* (RO) */
+#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
+#define GPU_STATUS_CYCLE_COUNT_ACTIVE       (1 << 6)    /* Set if the cycle counter is active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
 
 #define GPU_DBGEN               (1 << 8)    /* DBGEN wire status */
 
@@ -64,10 +75,9 @@
 
 #define L2_CONFIG               0x048   /* (RW) Level 2 cache configuration */
 
-#define GROUPS_L2_COHERENT      (1 << 0) /* Cores groups are l2 coherent */
-#define SUPER_L2_COHERENT       (1 << 1) /* Shader cores within a core
-					  * supergroup are l2 coherent
-					  */
+/* Cores groups are l2 coherent */
+#define MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT GPU_U(0)
+#define MEM_FEATURES_COHERENT_CORE_GROUP_MASK (GPU_U(0x1) << MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT)
 
 #define PWR_KEY                 0x050   /* (WO) Power manager key register */
 #define PWR_OVERRIDE0           0x054   /* (RW) Power manager override settings */
@@ -95,10 +105,10 @@
 
 #define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
 
-#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */
-#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */
-#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */
-#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */
+#define GPU_COMMAND_ARG0_LO     0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */
+#define GPU_COMMAND_ARG0_HI     0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */
+#define GPU_COMMAND_ARG1_LO     0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */
+#define GPU_COMMAND_ARG1_HI     0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */
 
 #define SHADER_PRESENT_LO       0x100   /* (RO) Shader core present bitmap, low word */
 #define SHADER_PRESENT_HI       0x104   /* (RO) Shader core present bitmap, high word */
@@ -109,14 +119,32 @@
 #define L2_PRESENT_LO           0x120   /* (RO) Level 2 cache present bitmap, low word */
 #define L2_PRESENT_HI           0x124   /* (RO) Level 2 cache present bitmap, high word */
 
+#define SHADER_READY_LO         0x140   /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI         0x144   /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO          0x150   /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI          0x154   /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO             0x160   /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI             0x164   /* (RO) Level 2 cache ready bitmap, high word */
+
+#define SHADER_PWRON_LO         0x180   /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI         0x184   /* (WO) Shader core power on bitmap, high word */
+
+#define SHADER_PWRFEATURES      0x188   /* (RW) Shader core power features */
+
+#define TILER_PWRON_LO          0x190   /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI          0x194   /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO             0x1A0   /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI             0x1A4   /* (WO) Level 2 cache power on bitmap, high word */
+
 #define STACK_PRESENT_LO        0xE00   /* (RO) Core stack present bitmap, low word */
 #define STACK_PRESENT_HI        0xE04   /* (RO) Core stack present bitmap, high word */
 
 #define STACK_READY_LO          0xE10   /* (RO) Core stack ready bitmap, low word */
 #define STACK_READY_HI          0xE14   /* (RO) Core stack ready bitmap, high word */
 
-#define SHADER_PWRFEATURES      0x188   /* (RW) Shader core power features */
-
 #define STACK_PWRON_LO          0xE20   /* (RO) Core stack power on bitmap, low word */
 #define STACK_PWRON_HI          0xE24   /* (RO) Core stack power on bitmap, high word */
 
@@ -173,10 +201,25 @@
 
 /* Job control registers */
 
+#define JOB_CONTROL_BASE        0x1000
+#define JOB_CONTROL_REG(r)      (JOB_CONTROL_BASE + (r))
+
 #define JOB_IRQ_RAWSTAT         0x000   /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR           0x004   /* Interrupt clear register */
+#define JOB_IRQ_MASK            0x008   /* Interrupt mask register */
+#define JOB_IRQ_STATUS          0x00C   /* Interrupt status register */
 
 /* MMU control registers */
 
+#define MMU_CONTROL_BASE        0x2000
+#define MMU_CONTROL_REG(r)      (MMU_CONTROL_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT         0x000   /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR           0x004   /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK            0x008   /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS          0x00C   /* (RO) Interrupt status register */
+
+#define MMU_AS0                 0x400   /* Configuration registers for address space 0 */
 #define MMU_AS1                 0x440   /* Configuration registers for address space 1 */
 #define MMU_AS2                 0x480   /* Configuration registers for address space 2 */
 #define MMU_AS3                 0x4C0   /* Configuration registers for address space 3 */
@@ -194,17 +237,27 @@
 #define MMU_AS15                0x7C0   /* Configuration registers for address space 15 */
 
 /* MMU address space control registers */
-#define AS_LOCKADDR_LO         0x10	/* (RW) Lock region address for address space n, low word */
-#define AS_LOCKADDR_HI         0x14	/* (RW) Lock region address for address space n, high word */
-#define AS_FAULTSTATUS         0x1C	/* (RO) MMU fault status register for address space n */
-#define AS_FAULTADDRESS_LO     0x20	/* (RO) Fault Address for address space n, low word */
-#define AS_FAULTADDRESS_HI     0x24	/* (RO) Fault Address for address space n, high word */
-#define AS_STATUS              0x28	/* (RO) Status flags for address space n */
-
-/* (RO) Secondary fault address for address space n, low word */
-#define AS_FAULTEXTRA_LO       0x38
-/* (RO) Secondary fault address for address space n, high word */
-#define AS_FAULTEXTRA_HI       0x3C
+
+#define MMU_STAGE1 0x2000 /* () MMU control registers */
+#define MMU_STAGE1_REG(r) (MMU_STAGE1 + (r))
+
+#define MMU_AS_REG(n, r)        (MMU_AS0 + ((n) << 6) + (r))
+
+#define AS_TRANSTAB_LO          0x00	/* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI          0x04	/* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO           0x08	/* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI           0x0C	/* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO          0x10    /* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI          0x14    /* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND              0x18	/* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS          0x1C    /* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO      0x20    /* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI      0x24    /* (RO) Fault Address for address space n, high word */
+#define AS_STATUS               0x28    /* (RO) Status flags for address space n */
+#define AS_TRANSCFG_LO          0x30    /* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_HI          0x34    /* (RW) Translation table configuration for address space n, high word */
+#define AS_FAULTEXTRA_LO        0x38    /* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_HI        0x3C    /* (RO) Secondary fault address for address space n, high word */
 
 /* End Register Offsets */
 
@@ -254,7 +307,7 @@
 	(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
 
 #define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC       (0x0)
-#define AS_FAULTSTATUS_ACCESS_TYPE_EX           (0x1)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE      (0x1)
 #define AS_FAULTSTATUS_ACCESS_TYPE_READ         (0x2)
 #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE        (0x3)
 
@@ -336,11 +389,6 @@
 	(((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) |                                       \
 	 ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK))
 
-/* GPU_STATUS values */
-#define GPU_STATUS_PRFCNT_ACTIVE            (1 << 2)    /* Set if the performance counters are active. */
-#define GPU_STATUS_CYCLE_COUNT_ACTIVE       (1 << 6)    /* Set if the cycle counter is active. */
-#define GPU_STATUS_PROTECTED_MODE_ACTIVE    (1 << 7)    /* Set if protected mode is active */
-
 /* PRFCNT_CONFIG register values */
 #define PRFCNT_CONFIG_MODE_SHIFT        0 /* Counter mode position. */
 #define PRFCNT_CONFIG_AS_SHIFT          4 /* Address space bitmap position. */
@@ -452,16 +500,6 @@
 	(((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) |              \
 	 (((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) &              \
 	  AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK))
-#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6)
-#define AMBA_FEATURES_INVALIDATE_HINT_MASK                                     \
-	(GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
-#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val)                             \
-	(((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >>                     \
-	 AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
-#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value)                      \
-	(((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) |                   \
-	 (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) &                   \
-	  AMBA_FEATURES_INVALIDATE_HINT_MASK))
 
 /* AMBA_ENABLE register */
 #define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0)
@@ -489,16 +527,6 @@
 	(((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) |                \
 	 (((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) &                \
 	  AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK))
-#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6)
-#define AMBA_ENABLE_INVALIDATE_HINT_MASK                                       \
-	(GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
-#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val)                               \
-	(((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >>                       \
-	 AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
-#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value)                        \
-	(((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) |                     \
-	 (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) &                     \
-	  AMBA_ENABLE_INVALIDATE_HINT_MASK))
 
 /* IDVS_GROUP register */
 #define IDVS_GROUP_SIZE_SHIFT (16)
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index 9a409f6..c8cf934 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -347,7 +347,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
 	/* Update MMU table */
 	ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
 				     num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
-				     mmu_sync_info, NULL, false);
+				     mmu_sync_info, NULL);
 	if (ret)
 		goto mmu_insert_failed;
 
@@ -480,10 +480,10 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
 	if (fw_ring_buf->phys) {
 		u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
 
-		WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
-						 gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
-						 fw_ring_buf->num_pages, fw_ring_buf->num_pages,
-						 MCU_AS_NR, true));
+		WARN_ON(kbase_mmu_teardown_firmware_pages(
+			fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT,
+			fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages,
+			MCU_AS_NR));
 
 		vunmap(fw_ring_buf->cpu_dump_base);
 
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index 639b35f..e694f9f 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -140,15 +140,17 @@
  * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT
  *                              to be updated on HW side so a Job Slot is
  *                              considered free.
- * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
- *                                the enum.
+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors.
+ * @KBASE_DEFAULT_TIMEOUT: Fallthrough in case an invalid timeout is
+ *                         passed.
  */
 enum kbase_timeout_selector {
 	MMU_AS_INACTIVE_WAIT_TIMEOUT,
 	JM_DEFAULT_JS_FREE_TIMEOUT,
 
 	/* Must be the last in the enum */
-	KBASE_TIMEOUT_SELECTOR_COUNT
+	KBASE_TIMEOUT_SELECTOR_COUNT,
+	KBASE_DEFAULT_TIMEOUT = JM_DEFAULT_JS_FREE_TIMEOUT
 };
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -862,10 +864,6 @@ struct jsctx_queue {
  * @pf_data:           Data relating to Page fault.
  * @bf_data:           Data relating to Bus fault.
  * @current_setup:     Stores the MMU configuration for this address space.
- * @is_unresponsive:   Flag to indicate MMU is not responding.
- *                     Set if a MMU command isn't completed within
- *                     &kbase_device:mmu_as_inactive_wait_time_ms.
- *                     Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
  */
 struct kbase_as {
 	int number;
@@ -875,7 +873,6 @@ struct kbase_as {
 	struct kbase_fault pf_data;
 	struct kbase_fault bf_data;
 	struct kbase_mmu_setup current_setup;
-	bool is_unresponsive;
 };
 
 #endif /* _KBASE_JM_DEFS_H_ */
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index 5023eaa..009ff02 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -342,6 +342,30 @@ struct kbasep_js_device_data {
 	 * * the kbasep_js_kctx_info::runpool substructure
 	 */
 	struct mutex runpool_mutex;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	/**
+	 * @gpu_metrics_timer: High-resolution timer used to periodically emit the GPU metrics
+	 *                     tracepoints for applications that are using the GPU. The timer is
+	 *                     needed for the long duration handling so that the length of work
+	 *                     period is within the allowed limit.
+	 */
+	struct hrtimer gpu_metrics_timer;
+
+	/**
+	 * @gpu_metrics_timer_needed: Flag to indicate if the @gpu_metrics_timer is needed.
+	 *                            The timer won't be started after the expiry if the flag
+	 *                            isn't set.
+	 */
+	bool gpu_metrics_timer_needed;
+
+	/**
+	 * @gpu_metrics_timer_running: Flag to indicate if the @gpu_metrics_timer is running.
+	 *                             The flag is set to false when the timer is cancelled or
+	 *                             is not restarted after the expiry.
+	 */
+	bool gpu_metrics_timer_running;
+#endif
 };
 
 /**
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 11aedef..724145f 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,7 @@
 
 /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
  * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
- * For more information see base/tools/hwconfig_generator/README
+ * For more information see base/tools/docs/hwconfig_generator.md
  */
 
 #ifndef _BASE_HWCONFIG_FEATURES_H_
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 0fbdec0..91b9b83 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -21,7 +21,7 @@
 
 /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
  * please update base/tools/hwconfig_generator/hwc_{issues,features}.py
- * For more information see base/tools/hwconfig_generator/README
+ * For more information see base/tools/docs/hwconfig_generator.md
  */
 
 #ifndef _BASE_HWCONFIG_ISSUES_H_
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 7de793c..c39ba99 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,7 @@
 #include <uapi/gpu/arm/midgard/mali_base_kernel.h>
 
 #include <mali_kbase_linux.h>
+#include <linux/version_compat_defs.h>
 
 /*
  * Include mali_kbase_defs.h first as this provides types needed by other local
@@ -61,9 +62,7 @@
 
 #include "debug/mali_kbase_debug_ktrace.h"
 #include "context/mali_kbase_context.h"
-#include "mali_kbase_strings.h"
 #include "mali_kbase_mem_lowlevel.h"
-#include "mali_kbase_utility.h"
 #include "mali_kbase_mem.h"
 #include "mmu/mali_kbase_mmu.h"
 #include "mali_kbase_gpu_memory_debugfs.h"
@@ -87,6 +86,9 @@
 
 #include "mali_linux_trace.h"
 
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
 #if MALI_USE_CSF
 #include "csf/mali_kbase_csf.h"
 
@@ -462,9 +464,9 @@ void kbasep_as_do_poke(struct work_struct *work);
  *
  * @kbdev: The kbase device structure for the device
  *
- * The caller should ensure that either kbdev->pm.active_count_lock is held, or
- * a dmb was executed recently (to ensure the value is most
- * up-to-date). However, without a lock the value could change afterwards.
+ * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held,
+ * or a dmb was executed recently (to ensure the value is most up-to-date).
+ * However, without a lock the value could change afterwards.
  *
  * Return:
  * * false if a suspend is not in progress
@@ -475,6 +477,22 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
 	return kbdev->pm.suspending;
 }
 
+/**
+ * kbase_pm_is_resuming - Check whether System resume of GPU device is in progress.
+ *
+ * @kbdev: The kbase device structure for the device
+ *
+ * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held,
+ * or a dmb was executed recently (to ensure the value is most up-to-date).
+ * However, without a lock the value could change afterwards.
+ *
+ * Return: true if System resume is in progress, otherwise false.
+ */
+static inline bool kbase_pm_is_resuming(struct kbase_device *kbdev)
+{
+	return kbdev->pm.resuming;
+}
+
 #ifdef CONFIG_MALI_ARBITER_SUPPORT
 /*
  * Check whether a gpu lost is in progress
@@ -528,9 +546,11 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
 }
 
 /**
- * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can
- *                                run as using the device tree, and save this
- *                                within kbdev.
+ * kbase_pm_gpu_freq_init() - Find the lowest frequency that the GPU can
+ *                            run as using the device tree, then query the
+ *                            GPU properties to find out the highest GPU
+ *                            frequency and store both of them within the
+ *                            @kbase_device.
  * @kbdev: Pointer to kbase device.
  *
  * This function could be called from kbase_clk_rate_trace_manager_init,
@@ -538,9 +558,9 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
  * dev_pm_opp_of_add_table() has been called to initialize the OPP table,
  * which occurs in power_control_init().
  *
- * Return: 0 in any case.
+ * Return: 0 on success, negative error code on failure.
  */
-int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev);
+int kbase_pm_gpu_freq_init(struct kbase_device *kbdev);
 
 /**
  * kbase_pm_metrics_start - Start the utilization metrics timer
@@ -808,4 +828,108 @@ void kbase_destroy_kworker_stack(struct kthread_worker *worker);
 	#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
 #endif
 
+/**
+ * kbase_file_fops_count() - Get the kfile::fops_count value
+ *
+ * @kfile: Pointer to the object representing the mali device file.
+ *
+ * The value is read with kfile::lock held.
+ *
+ * Return: sampled value of kfile::fops_count.
+ */
+static inline u32 kbase_file_fops_count(struct kbase_file *kfile)
+{
+	u32 fops_count;
+
+	spin_lock(&kfile->lock);
+	fops_count = kfile->fops_count;
+	spin_unlock(&kfile->lock);
+
+	return fops_count;
+}
+
+/**
+ * kbase_file_inc_fops_count_unless_closed() - Increment the kfile::fops_count value if the
+ *                                             kfile::owner is still set.
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * Return: true if the increment was done otherwise false.
+ */
+static inline bool kbase_file_inc_fops_count_unless_closed(struct kbase_file *kfile)
+{
+	bool count_incremented = false;
+
+	spin_lock(&kfile->lock);
+	if (kfile->owner) {
+		kfile->fops_count++;
+		count_incremented = true;
+	}
+	spin_unlock(&kfile->lock);
+
+	return count_incremented;
+}
+
+/**
+ * kbase_file_dec_fops_count() - Decrement the kfile::fops_count value
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * This function shall only be called to decrement kfile::fops_count if a successful call
+ * to kbase_file_inc_fops_count_unless_closed() was made previously by the current thread.
+ *
+ * The function would enqueue the kfile::destroy_kctx_work if the process that originally
+ * created the file instance has closed its copy and no Kbase handled file operations are
+ * in progress and no memory mappings are present for the file instance.
+ */
+static inline void kbase_file_dec_fops_count(struct kbase_file *kfile)
+{
+	spin_lock(&kfile->lock);
+	WARN_ON_ONCE(kfile->fops_count <= 0);
+	kfile->fops_count--;
+	if (unlikely(!kfile->fops_count && !kfile->owner && !kfile->map_count)) {
+		queue_work(system_wq, &kfile->destroy_kctx_work);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+		wake_up(&kfile->zero_fops_count_wait);
+#endif
+	}
+	spin_unlock(&kfile->lock);
+}
+
+/**
+ * kbase_file_inc_cpu_mapping_count() - Increment the kfile::map_count value.
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * This function shall be called when the memory mapping on /dev/malixx device file
+ * instance is created. The kbase_file::setup_state shall be KBASE_FILE_COMPLETE.
+ */
+static inline void kbase_file_inc_cpu_mapping_count(struct kbase_file *kfile)
+{
+	spin_lock(&kfile->lock);
+	kfile->map_count++;
+	spin_unlock(&kfile->lock);
+}
+
+/**
+ * kbase_file_dec_cpu_mapping_count() - Decrement the kfile::map_count value
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * This function is called to decrement kfile::map_count value when the memory mapping
+ * on /dev/malixx device file is closed.
+ * The function would enqueue the kfile::destroy_kctx_work if the process that originally
+ * created the file instance has closed its copy and there are no mappings present and no
+ * Kbase handled file operations are in progress for the file instance.
+ */
+static inline void kbase_file_dec_cpu_mapping_count(struct kbase_file *kfile)
+{
+	spin_lock(&kfile->lock);
+	WARN_ON_ONCE(kfile->map_count <= 0);
+	kfile->map_count--;
+	if (unlikely(!kfile->map_count && !kfile->owner && !kfile->fops_count))
+		queue_work(system_wq, &kfile->destroy_kctx_work);
+	spin_unlock(&kfile->lock);
+}
+
 #endif
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index c99ad52..fa73612 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -183,6 +183,7 @@ enum {
  *
  * This is also the default timeout to be used when an invalid timeout
  * selector is used to retrieve the timeout on CSF GPUs.
+ * This shouldn't be used as a timeout for the CSG suspend request.
  *
  * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
  * on scaling from a 50MHz GPU system.
@@ -196,17 +197,16 @@ enum {
  */
 #define CSF_PM_TIMEOUT_CYCLES (250000000)
 
-/* Waiting timeout in clock cycles for GPU reset to complete.
+/* Waiting timeout in clock cycles for a CSG to be suspended.
  *
- * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system
+ * Based on 30s timeout at 100MHz, scaled from 5s at 600Mhz GPU frequency.
+ * More cycles (1s @ 100Mhz = 100000000) are added up to ensure that
+ * host timeout is always bigger than FW timeout.
  */
-#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000)
+#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull)
 
-/* Waiting timeout in clock cycles for all active CSGs to be suspended.
- *
- * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system.
- */
-#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000)
+/* Waiting timeout in clock cycles for GPU reset to complete. */
+#define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2)
 
 /* Waiting timeout in clock cycles for GPU firmware to boot.
  *
@@ -220,6 +220,19 @@ enum {
  */
 #define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull)
 
+/* Waiting timeout for a KCPU queue's fence signal blocked to long, in clock cycles.
+ *
+ * Based on 10s timeout at 100MHz, scaled from a 50MHz GPU system.
+ */
+#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (1000000000ull)
+
+/* Waiting timeout for task execution on an endpoint. Based on the
+ * DEFAULT_PROGRESS_TIMEOUT.
+ *
+ * Based on 25s timeout at 100Mhz, scaled from a 500MHz GPU system.
+ */
+#define DEFAULT_PROGRESS_TIMEOUT_CYCLES (2500000000ull)
+
 #else /* MALI_USE_CSF */
 
 /* A default timeout in clock cycles to be used when an invalid timeout
@@ -242,7 +255,7 @@ enum {
  */
 #define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000)
 
-#endif /* MALI_USE_CSF */
+#endif /* !MALI_USE_CSF */
 
 /* Default timeslice that a context is scheduled in for, in nanoseconds.
  *
@@ -286,4 +299,10 @@ enum {
  * It corresponds to 0.5s in GPU @ 100Mhz.
  */
 #define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024)
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/* Default value of the time interval at which GPU metrics tracepoints are emitted. */
+#define DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS (500000000u) /* 500 ms */
+#endif
+
 #endif /* _KBASE_CONFIG_DEFAULTS_H_ */
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index c31994c..28cbcdb 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -170,6 +170,8 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA
 static struct mutex kbase_probe_mutex;
 #endif
 
+static void kbase_file_destroy_kctx_worker(struct work_struct *work);
+
 /**
  * mali_kbase_supports_cap - Query whether a kbase capability is supported
  *
@@ -274,6 +276,8 @@ void kbase_destroy_kworker_stack(struct kthread_worker *worker)
  *
  * Return: Address of an object representing a simulated device file, or NULL
  *         on failure.
+ *
+ * Note: This function always gets called in Userspace context.
  */
 static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
 	struct file *const filp)
@@ -286,6 +290,16 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
 		kfile->kctx = NULL;
 		kfile->api_version = 0;
 		atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN);
+		/* Store the pointer to the file table structure of current process. */
+		kfile->owner = current->files;
+		INIT_WORK(&kfile->destroy_kctx_work, kbase_file_destroy_kctx_worker);
+		spin_lock_init(&kfile->lock);
+		kfile->fops_count = 0;
+		kfile->map_count = 0;
+		typecheck(typeof(kfile->map_count), typeof(current->mm->map_count));
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+		init_waitqueue_head(&kfile->zero_fops_count_wait);
+#endif
 	}
 	return kfile;
 }
@@ -366,6 +380,33 @@ static int kbase_file_create_kctx(struct kbase_file *kfile,
 	base_context_create_flags flags);
 
 /**
+ * kbase_file_inc_fops_count_if_allowed - Increment the kfile::fops_count value if the file
+ *                                        operation is allowed for the current process.
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * The function shall be called at the beginning of certain file operation methods
+ * implemented for @kbase_fops, like ioctl, poll, read and mmap.
+ *
+ * kbase_file_dec_fops_count() shall be called if the increment was done.
+ *
+ * Return: true if the increment was done otherwise false.
+ *
+ * Note: This function shall always be called in Userspace context.
+ */
+static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile)
+{
+	/* Disallow file operations from the other process that shares the instance
+	 * of /dev/malixx file i.e. 'kfile' or disallow file operations if parent
+	 * process has closed the file instance.
+	 */
+	if (unlikely(kfile->owner != current->files))
+		return false;
+
+	return kbase_file_inc_fops_count_unless_closed(kfile);
+}
+
+/**
  * kbase_file_get_kctx_if_setup_complete - Get a kernel base context
  *                                         pointer from a device file
  *
@@ -377,6 +418,8 @@ static int kbase_file_create_kctx(struct kbase_file *kfile,
  *
  * Return: Address of the kernel base context associated with the @kfile, or
  *         NULL if no context exists.
+ *
+ * Note: This function shall always be called in Userspace context.
  */
 static struct kbase_context *kbase_file_get_kctx_if_setup_complete(
 	struct kbase_file *const kfile)
@@ -390,37 +433,103 @@ static struct kbase_context *kbase_file_get_kctx_if_setup_complete(
 }
 
 /**
- * kbase_file_delete - Destroy an object representing a device file
+ * kbase_file_destroy_kctx - Destroy the Kbase context created for @kfile.
  *
  * @kfile: A device file created by kbase_file_new()
- *
- * If any context was created for the @kfile then it is destroyed.
  */
-static void kbase_file_delete(struct kbase_file *const kfile)
+static void kbase_file_destroy_kctx(struct kbase_file *const kfile)
 {
-	struct kbase_device *kbdev = NULL;
-
-	if (WARN_ON(!kfile))
+	if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_COMPLETE,
+	    KBASE_FILE_DESTROY_CTX) != KBASE_FILE_COMPLETE)
 		return;
 
-	kfile->filp->private_data = NULL;
-	kbdev = kfile->kbdev;
-
-	if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) {
-		struct kbase_context *kctx = kfile->kctx;
-
 #if IS_ENABLED(CONFIG_DEBUG_FS)
-		kbasep_mem_profile_debugfs_remove(kctx);
+	kbasep_mem_profile_debugfs_remove(kfile->kctx);
+	kbase_context_debugfs_term(kfile->kctx);
 #endif
-		kbase_context_debugfs_term(kctx);
 
-		kbase_destroy_context(kctx);
+	kbase_destroy_context(kfile->kctx);
+	dev_dbg(kfile->kbdev->dev, "Deleted kbase context");
+}
+
+/**
+ * kbase_file_destroy_kctx_worker - Work item to destroy the Kbase context.
+ *
+ * @work: Pointer to the kfile::destroy_kctx_work.
+ *
+ * The work item shall only be enqueued if the context termination could not
+ * be done from @kbase_flush().
+ */
+static void kbase_file_destroy_kctx_worker(struct work_struct *work)
+{
+	struct kbase_file *kfile =
+		container_of(work, struct kbase_file, destroy_kctx_work);
+
+	WARN_ON_ONCE(kfile->owner);
+	WARN_ON_ONCE(kfile->map_count);
+	WARN_ON_ONCE(kfile->fops_count);
+
+	kbase_file_destroy_kctx(kfile);
+}
+
+/**
+ * kbase_file_destroy_kctx_on_flush - Try destroy the Kbase context from the flush()
+ *                                    method of @kbase_fops.
+ *
+ * @kfile: A device file created by kbase_file_new()
+ */
+static void kbase_file_destroy_kctx_on_flush(struct kbase_file *const kfile)
+{
+	bool can_destroy_context = false;
+
+	spin_lock(&kfile->lock);
+	kfile->owner = NULL;
+	/* To destroy the context from flush() method, unlike the release()
+	 * method, need to synchronize manually against the other threads in
+	 * the current process that could be operating on the /dev/malixx file.
+	 *
+	 * Only destroy the context if all the memory mappings on the
+	 * /dev/malixx file instance have been closed. If there are mappings
+	 * present then the context would be destroyed later when the last
+	 * mapping is closed.
+	 * Also, only destroy the context if no file operations are in progress.
+	 */
+	can_destroy_context = !kfile->map_count && !kfile->fops_count;
+	spin_unlock(&kfile->lock);
 
-		dev_dbg(kbdev->dev, "deleted base context\n");
+	if (likely(can_destroy_context)) {
+		WARN_ON_ONCE(work_pending(&kfile->destroy_kctx_work));
+		kbase_file_destroy_kctx(kfile);
 	}
+}
 
-	kbase_release_device(kbdev);
+/**
+ * kbase_file_delete - Destroy an object representing a device file
+ *
+ * @kfile: A device file created by kbase_file_new()
+ *
+ * If any context was created for the @kfile and is still alive, then it is destroyed.
+ */
+static void kbase_file_delete(struct kbase_file *const kfile)
+{
+	if (WARN_ON(!kfile))
+		return;
+
+	/* All the CPU mappings on the device file should have been closed */
+	WARN_ON_ONCE(kfile->map_count);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	/* There could still be file operations due to the debugfs file (mem_view) */
+	wait_event(kfile->zero_fops_count_wait, !kbase_file_fops_count(kfile));
+#else
+	/* There shall not be any file operations in progress on the device file */
+	WARN_ON_ONCE(kfile->fops_count);
+#endif
 
+	kfile->filp->private_data = NULL;
+	cancel_work_sync(&kfile->destroy_kctx_work);
+	/* Destroy the context if it wasn't done earlier from the flush() method. */
+	kbase_file_destroy_kctx(kfile);
+	kbase_release_device(kfile->kbdev);
 	kfree(kfile);
 }
 
@@ -676,7 +785,7 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
 	kbdev = kfile->kbdev;
 
 	kctx = kbase_create_context(kbdev, in_compat_syscall(),
-		flags, kfile->api_version, kfile->filp);
+		flags, kfile->api_version, kfile);
 
 	/* if bad flags, will stay stuck in setup mode */
 	if (!kctx)
@@ -762,6 +871,36 @@ static int kbase_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+/**
+ * kbase_flush - Function implementing the flush() method of @kbase_fops.
+ *
+ * @filp: Pointer to the /dev/malixx device file instance.
+ * @id:   Pointer to the file table structure of current process.
+ *        If @filp is being shared by multiple processes then @id can differ
+ *        from kfile::owner.
+ *
+ * This function is called everytime the copy of @filp is closed. So if 3 processes
+ * are sharing the @filp then this function would be called 3 times and only after
+ * that kbase_release() would get called.
+ *
+ * Return: 0 if successful, otherwise a negative error code.
+ *
+ * Note: This function always gets called in Userspace context when the
+ *       file is closed.
+ */
+static int kbase_flush(struct file *filp, fl_owner_t id)
+{
+	struct kbase_file *const kfile = filp->private_data;
+
+	/* Try to destroy the context if the flush() method has been called for the
+	 * process that created the instance of /dev/malixx file i.e. 'kfile'.
+	 */
+	if (kfile->owner == id)
+		kbase_file_destroy_kctx_on_flush(kfile);
+
+	return 0;
+}
+
 static int kbase_api_set_flags(struct kbase_file *kfile,
 		struct kbase_ioctl_set_flags *flags)
 {
@@ -1485,6 +1624,7 @@ static int kbasep_cs_queue_group_create_1_6(
 	struct kbase_context *kctx,
 	union kbase_ioctl_cs_queue_group_create_1_6 *create)
 {
+	int ret, i;
 	union kbase_ioctl_cs_queue_group_create
 		new_create = { .in = {
 				       .tiler_mask = create->in.tiler_mask,
@@ -1498,16 +1638,61 @@ static int kbasep_cs_queue_group_create_1_6(
 				       .compute_max = create->in.compute_max,
 			       } };
 
-	int ret = kbase_csf_queue_group_create(kctx, &new_create);
+	for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
+		if (create->in.padding[i] != 0) {
+			dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
+			return -EINVAL;
+		}
+	}
+
+	ret = kbase_csf_queue_group_create(kctx, &new_create);
+
+	create->out.group_handle = new_create.out.group_handle;
+	create->out.group_uid = new_create.out.group_uid;
+
+	return ret;
+}
+
+static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx,
+					     union kbase_ioctl_cs_queue_group_create_1_18 *create)
+{
+	int ret, i;
+	union kbase_ioctl_cs_queue_group_create
+		new_create = { .in = {
+				       .tiler_mask = create->in.tiler_mask,
+				       .fragment_mask = create->in.fragment_mask,
+				       .compute_mask = create->in.compute_mask,
+				       .cs_min = create->in.cs_min,
+				       .priority = create->in.priority,
+				       .tiler_max = create->in.tiler_max,
+				       .fragment_max = create->in.fragment_max,
+				       .compute_max = create->in.compute_max,
+				       .csi_handlers = create->in.csi_handlers,
+				       .dvs_buf = create->in.dvs_buf,
+			       } };
+
+	for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
+		if (create->in.padding[i] != 0) {
+			dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
+			return -EINVAL;
+		}
+	}
+
+	ret = kbase_csf_queue_group_create(kctx, &new_create);
 
 	create->out.group_handle = new_create.out.group_handle;
 	create->out.group_uid = new_create.out.group_uid;
 
 	return ret;
 }
+
 static int kbasep_cs_queue_group_create(struct kbase_context *kctx,
 			     union kbase_ioctl_cs_queue_group_create *create)
 {
+	if (create->in.reserved != 0) {
+		dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n");
+		return -EINVAL;
+	}
 	return kbase_csf_queue_group_create(kctx, create);
 }
 
@@ -1765,9 +1950,8 @@ static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx,
 	return 0;
 }
 
-static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsigned long arg)
 {
-	struct kbase_file *const kfile = filp->private_data;
 	struct kbase_context *kctx = NULL;
 	struct kbase_device *kbdev = kfile->kbdev;
 	void __user *uarg = (void __user *)arg;
@@ -2081,6 +2265,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			kbasep_cs_queue_group_create_1_6,
 			union kbase_ioctl_cs_queue_group_create_1_6, kctx);
 		break;
+	case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18:
+		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18,
+					 kbasep_cs_queue_group_create_1_18,
+					 union kbase_ioctl_cs_queue_group_create_1_18, kctx);
+		break;
 	case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE:
 		KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE,
 				kbasep_cs_queue_group_create,
@@ -2179,22 +2368,44 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	return -ENOIOCTLCMD;
 }
 
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	struct kbase_file *const kfile = filp->private_data;
+	long ioctl_ret;
+
+	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
+		return -EPERM;
+
+	ioctl_ret = kbase_kfile_ioctl(kfile, cmd, arg);
+	kbase_file_dec_fops_count(kfile);
+
+	return ioctl_ret;
+}
+
 #if MALI_USE_CSF
 static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *const kctx =
-		kbase_file_get_kctx_if_setup_complete(kfile);
+	struct kbase_context *kctx;
 	struct base_csf_notification event_data = {
 		.type = BASE_CSF_NOTIFICATION_EVENT };
 	const size_t data_size = sizeof(event_data);
 	bool read_event = false, read_error = false;
+	ssize_t err = 0;
 
-	if (unlikely(!kctx))
+	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
 		return -EPERM;
 
-	if (count < data_size)
-		return -ENOBUFS;
+	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+	if (unlikely(!kctx)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	if (count < data_size) {
+		err = -ENOBUFS;
+		goto out;
+	}
 
 	if (atomic_read(&kctx->event_count))
 		read_event = true;
@@ -2218,28 +2429,39 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 	if (copy_to_user(buf, &event_data, data_size) != 0) {
 		dev_warn(kctx->kbdev->dev,
 			"Failed to copy data\n");
-		return -EFAULT;
+		err = -EFAULT;
+		goto out;
 	}
 
 	if (read_event)
 		atomic_set(&kctx->event_count, 0);
 
-	return data_size;
+out:
+	kbase_file_dec_fops_count(kfile);
+	return err ? err : data_size;
 }
 #else /* MALI_USE_CSF */
 static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *const kctx =
-		kbase_file_get_kctx_if_setup_complete(kfile);
+	struct kbase_context *kctx;
 	struct base_jd_event_v2 uevent;
 	int out_count = 0;
+	ssize_t err = 0;
 
-	if (unlikely(!kctx))
+	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
 		return -EPERM;
 
-	if (count < sizeof(uevent))
-		return -ENOBUFS;
+	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+	if (unlikely(!kctx)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	if (count < sizeof(uevent)) {
+		err = -ENOBUFS;
+		goto out;
+	}
 
 	memset(&uevent, 0, sizeof(uevent));
 
@@ -2248,56 +2470,78 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
 			if (out_count > 0)
 				goto out;
 
-			if (filp->f_flags & O_NONBLOCK)
-				return -EAGAIN;
+			if (filp->f_flags & O_NONBLOCK) {
+				err = -EAGAIN;
+				goto out;
+			}
 
 			if (wait_event_interruptible(kctx->event_queue,
-					kbase_event_pending(kctx)) != 0)
-				return -ERESTARTSYS;
+					kbase_event_pending(kctx)) != 0) {
+				err = -ERESTARTSYS;
+				goto out;
+			}
 		}
 		if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
-			if (out_count == 0)
-				return -EPIPE;
+			if (out_count == 0) {
+				err = -EPIPE;
+				goto out;
+			}
 			goto out;
 		}
 
-		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
-			return -EFAULT;
+		if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) {
+			err = -EFAULT;
+			goto out;
+		}
 
 		buf += sizeof(uevent);
 		out_count++;
 		count -= sizeof(uevent);
 	} while (count >= sizeof(uevent));
 
- out:
-	return out_count * sizeof(uevent);
+out:
+	kbase_file_dec_fops_count(kfile);
+	return err ? err : (out_count * sizeof(uevent));
 }
 #endif /* MALI_USE_CSF */
 
 static __poll_t kbase_poll(struct file *filp, poll_table *wait)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *const kctx =
-		kbase_file_get_kctx_if_setup_complete(kfile);
+	struct kbase_context *kctx;
+	__poll_t ret = 0;
+
+	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) {
+#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
+		ret = POLLNVAL;
+#else
+		ret = EPOLLNVAL;
+#endif
+		return ret;
+	}
 
+	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
 	if (unlikely(!kctx)) {
 #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
-		return POLLERR;
+		ret = POLLERR;
 #else
-		return EPOLLERR;
+		ret = EPOLLERR;
 #endif
+		goto out;
 	}
 
 	poll_wait(filp, &kctx->event_queue, wait);
 	if (kbase_event_pending(kctx)) {
 #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
-		return POLLIN | POLLRDNORM;
+		ret = POLLIN | POLLRDNORM;
 #else
-		return EPOLLIN | EPOLLRDNORM;
+		ret = EPOLLIN | EPOLLRDNORM;
 #endif
 	}
 
-	return 0;
+out:
+	kbase_file_dec_fops_count(kfile);
+	return ret;
 }
 
 void _kbase_event_wakeup(struct kbase_context *kctx, bool sync)
@@ -2347,13 +2591,20 @@ KBASE_EXPORT_TEST_API(kbase_event_pending);
 static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *const kctx =
-		kbase_file_get_kctx_if_setup_complete(kfile);
+	struct kbase_context *kctx;
+	int ret;
 
-	if (unlikely(!kctx))
+	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
 		return -EPERM;
 
-	return kbase_context_mmap(kctx, vma);
+	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+	if (likely(kctx))
+		ret = kbase_context_mmap(kctx, vma);
+	else
+		ret = -EPERM;
+
+	kbase_file_dec_fops_count(kfile);
+	return ret;
 }
 
 static int kbase_check_flags(int flags)
@@ -2372,18 +2623,26 @@ static unsigned long kbase_get_unmapped_area(struct file *const filp,
 		const unsigned long pgoff, const unsigned long flags)
 {
 	struct kbase_file *const kfile = filp->private_data;
-	struct kbase_context *const kctx =
-		kbase_file_get_kctx_if_setup_complete(kfile);
+	struct kbase_context *kctx;
+	unsigned long address;
 
-	if (unlikely(!kctx))
+	if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
 		return -EPERM;
 
-	return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
+	kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+	if (likely(kctx))
+		address = kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
+	else
+		address = -EPERM;
+
+	kbase_file_dec_fops_count(kfile);
+	return address;
 }
 
 static const struct file_operations kbase_fops = {
 	.owner = THIS_MODULE,
 	.open = kbase_open,
+	.flush = kbase_flush,
 	.release = kbase_release,
 	.read = kbase_read,
 	.poll = kbase_poll,
@@ -3306,10 +3565,8 @@ static ssize_t gpuinfo_show(struct device *dev,
 		  .name = "Mali-G510" },
 		{ .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
 		  .name = "Mali-G310" },
-		{ .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-TTIX" },
 		{ .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
-		  .name = "Mali-LTIX" },
+		  .name = "Mali-G620" },
 	};
 	const char *product_name = "(Unknown Mali GPU)";
 	struct kbase_device *kbdev;
@@ -3361,6 +3618,21 @@ static ssize_t gpuinfo_show(struct device *dev,
 			dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name,
 				nr_cores);
 	}
+
+	if ((product_id & product_id_mask) ==
+	    ((GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) {
+		const bool rt_supported =
+			GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features);
+		const u8 nr_cores = gpu_props->num_cores;
+
+		if ((nr_cores >= 10) && rt_supported)
+			product_name = "Mali-G720-Immortalis";
+		else
+			product_name = (nr_cores >= 6) ? "Mali-G720" : "Mali-G620";
+
+		dev_dbg(kbdev->dev, "GPU ID_Name: %s (ID: 0x%x), nr_cores(%u)\n", product_name,
+			nr_cores, product_id & product_id_mask);
+	}
 #endif /* MALI_USE_CSF */
 
 	return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name,
@@ -3435,8 +3707,9 @@ static ssize_t dvfs_period_show(struct device *dev,
 
 static DEVICE_ATTR_RW(dvfs_period);
 
-int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev)
+int kbase_pm_gpu_freq_init(struct kbase_device *kbdev)
 {
+	int err;
 	/* Uses default reference frequency defined in below macro */
 	u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
 
@@ -3471,7 +3744,16 @@ int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev)
 #endif
 
 	kbdev->lowest_gpu_freq_khz = lowest_freq_khz;
+
+	err = kbase_device_populate_max_freq(kbdev);
+	if (unlikely(err < 0))
+		return -1;
+
 	dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz);
+	dev_dbg(kbdev->dev,
+		"Setting default highest frequency to %u kHz (pending devfreq initialization",
+		kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+
 	return 0;
 }
 
@@ -3584,21 +3866,32 @@ static ssize_t reset_timeout_store(struct device *dev,
 {
 	struct kbase_device *kbdev;
 	int ret;
-	int reset_timeout;
+	u32 reset_timeout;
+	u32 default_reset_timeout;
 
 	kbdev = to_kbase_device(dev);
 	if (!kbdev)
 		return -ENODEV;
 
-	ret = kstrtoint(buf, 0, &reset_timeout);
-	if (ret || reset_timeout <= 0) {
+	ret = kstrtou32(buf, 0, &reset_timeout);
+	if (ret || reset_timeout == 0) {
 		dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
 				"Use format <reset_timeout_ms>\n");
 		return -EINVAL;
 	}
 
+#if MALI_USE_CSF
+	default_reset_timeout = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT);
+#else /* MALI_USE_CSF */
+	default_reset_timeout = JM_DEFAULT_RESET_TIMEOUT_MS;
+#endif /* !MALI_USE_CSF */
+
+	if (reset_timeout < default_reset_timeout)
+		dev_warn(kbdev->dev, "requested reset_timeout(%u) is smaller than default(%u)",
+			 reset_timeout, default_reset_timeout);
+
 	kbdev->reset_timeout_ms = reset_timeout;
-	dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+	dev_dbg(kbdev->dev, "Reset timeout: %ums\n", reset_timeout);
 
 	return count;
 }
@@ -4482,8 +4775,10 @@ static bool kbase_is_pm_enabled(const struct device_node *gpu_node)
 	const void *operating_point_node;
 	bool is_pm_enable = false;
 
-	power_model_node = of_get_child_by_name(gpu_node,
-		"power_model");
+	power_model_node = of_get_child_by_name(gpu_node, "power-model");
+	if (!power_model_node)
+		power_model_node = of_get_child_by_name(gpu_node, "power_model");
+
 	if (power_model_node)
 		is_pm_enable = true;
 
@@ -4504,8 +4799,9 @@ static bool kbase_is_pv_enabled(const struct device_node *gpu_node)
 {
 	const void *arbiter_if_node;
 
-	arbiter_if_node = of_get_property(gpu_node,
-		"arbiter_if", NULL);
+	arbiter_if_node = of_get_property(gpu_node, "arbiter-if", NULL);
+	if (!arbiter_if_node)
+		arbiter_if_node = of_get_property(gpu_node, "arbiter_if", NULL);
 
 	return arbiter_if_node ? true : false;
 }
@@ -5409,7 +5705,10 @@ static ssize_t idle_hysteresis_time_store(struct device *dev,
 		return -EINVAL;
 	}
 
-	kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
+	/* In sysFs, The unit of the input value of idle_hysteresis_time is us.
+	 * But the unit of the input parameter of this function is ns, so multiply by 1000
+	 */
+	kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur * NSEC_PER_USEC);
 
 	return count;
 }
@@ -5436,7 +5735,8 @@ static ssize_t idle_hysteresis_time_show(struct device *dev,
 	if (!kbdev)
 		return -ENODEV;
 
-	dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
+	/* The unit of return value of idle_hysteresis_time_show is us, So divide by 1000.*/
+	dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev) / NSEC_PER_USEC;
 	ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
 
 	return ret;
@@ -5445,6 +5745,74 @@ static ssize_t idle_hysteresis_time_show(struct device *dev,
 static DEVICE_ATTR_RW(idle_hysteresis_time);
 
 /**
+ * idle_hysteresis_time_ns_store - Store callback for CSF
+ *                     idle_hysteresis_time_ns sysfs file.
+ *
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the idle_hysteresis_time_ns sysfs
+ * file is written to.
+ *
+ * This file contains values of the idle hysteresis duration in ns.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t idle_hysteresis_time_ns_store(struct device *dev, struct device_attribute *attr,
+					     const char *buf, size_t count)
+{
+	struct kbase_device *kbdev;
+	u32 dur = 0;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kstrtou32(buf, 0, &dur)) {
+		dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time_ns write operation.\n"
+				    "Use format <idle_hysteresis_time_ns>\n");
+		return -EINVAL;
+	}
+
+	kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
+
+	return count;
+}
+
+/**
+ * idle_hysteresis_time_ns_show - Show callback for CSF
+ *                  idle_hysteresis_time_ns sysfs entry.
+ *
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current idle hysteresis duration in ns.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t idle_hysteresis_time_ns_show(struct device *dev, struct device_attribute *attr,
+					    char *const buf)
+{
+	struct kbase_device *kbdev;
+	ssize_t ret;
+	u32 dur;
+
+	kbdev = to_kbase_device(dev);
+	if (!kbdev)
+		return -ENODEV;
+
+	dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
+	ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RW(idle_hysteresis_time_ns);
+
+/**
  * mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value.
  *
  * @dev:  The device this sysfs file is for.
@@ -5466,7 +5834,8 @@ static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_
 	if (!kbdev)
 		return -ENODEV;
 
-	pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+	/* The unit of return value of the function is us, So divide by 1000.*/
+	pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev) / NSEC_PER_USEC;
 	return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff);
 }
 
@@ -5490,19 +5859,97 @@ static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device
 	struct kbase_device *kbdev = dev_get_drvdata(dev);
 	u32 dur;
 
+	const struct kbase_pm_policy *current_policy;
+	bool always_on;
+
 	if (!kbdev)
 		return -ENODEV;
 
 	if (kstrtouint(buf, 0, &dur))
 		return -EINVAL;
 
-	kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur);
+	current_policy = kbase_pm_get_policy(kbdev);
+	always_on = current_policy == &kbase_pm_always_on_policy_ops;
+	if (dur == 0 && !always_on)
+		return -EINVAL;
+
+	/* In sysFs, The unit of the input value of mcu_shader_pwroff_timeout is us.
+	 * But the unit of the input parameter of this function is ns, so multiply by 1000
+	 */
+	kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur * NSEC_PER_USEC);
 
 	return count;
 }
 
 static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout);
 
+/**
+ * mcu_shader_pwroff_timeout_ns_show - Get the MCU shader Core power-off time value.
+ *
+ * @dev:  The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf:  The output buffer for the sysfs file contents
+ *
+ * Get the internally recorded MCU shader Core power-off (nominal) timeout value.
+ * The unit of the value is in nanoseconds.
+ *
+ * Return: The number of bytes output to @buf if the
+ *         function succeeded. A Negative value on failure.
+ */
+static ssize_t mcu_shader_pwroff_timeout_ns_show(struct device *dev, struct device_attribute *attr,
+						 char *const buf)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	u32 pwroff;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+	return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff);
+}
+
+/**
+ * mcu_shader_pwroff_timeout_ns_store - Set the MCU shader core power-off time value.
+ *
+ * @dev:   The device with sysfs file is for
+ * @attr:  The attributes of the sysfs file
+ * @buf:   The value written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
+ *
+ * The duration value (unit: nanoseconds) for configuring MCU Shader Core
+ * timer, when the shader cores' power transitions are delegated to the
+ * MCU (normal operational mode)
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t mcu_shader_pwroff_timeout_ns_store(struct device *dev, struct device_attribute *attr,
+						  const char *buf, size_t count)
+{
+	struct kbase_device *kbdev = dev_get_drvdata(dev);
+	u32 dur;
+
+	const struct kbase_pm_policy *current_policy;
+	bool always_on;
+
+	if (!kbdev)
+		return -ENODEV;
+
+	if (kstrtouint(buf, 0, &dur))
+		return -EINVAL;
+
+	current_policy = kbase_pm_get_policy(kbdev);
+	always_on = current_policy == &kbase_pm_always_on_policy_ops;
+	if (dur == 0 && !always_on)
+		return -EINVAL;
+
+	kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout_ns);
+
 #endif /* MALI_USE_CSF */
 
 static struct attribute *kbase_scheduling_attrs[] = {
@@ -5563,7 +6010,9 @@ static struct attribute *kbase_attrs[] = {
 	&dev_attr_csg_scheduling_period.attr,
 	&dev_attr_fw_timeout.attr,
 	&dev_attr_idle_hysteresis_time.attr,
+	&dev_attr_idle_hysteresis_time_ns.attr,
 	&dev_attr_mcu_shader_pwroff_timeout.attr,
+	&dev_attr_mcu_shader_pwroff_timeout_ns.attr,
 #endif /* !MALI_USE_CSF */
 	&dev_attr_power_policy.attr,
 	&dev_attr_core_mask.attr,
@@ -5725,6 +6174,14 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
 		mutex_unlock(&kbase_probe_mutex);
 #endif
 	} else {
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+		/* Since upstream is not exporting mmap_min_addr, kbase at the
+		 * moment is unable to track possible kernel changes via sysfs.
+		 * Flag this out in a device info message.
+		 */
+		dev_info(kbdev->dev, KBASE_COMPILED_MMAP_MIN_ADDR_MSG);
+#endif
+
 		dev_info(kbdev->dev,
 			"Probed as %s\n", dev_name(kbdev->mdev.this_device));
 		kbase_increment_device_id();
@@ -5950,7 +6407,7 @@ static struct platform_driver kbase_platform_driver = {
 	.probe = kbase_platform_device_probe,
 	.remove = kbase_platform_device_remove,
 	.driver = {
-		   .name = kbase_drv_name,
+		   .name = KBASE_DRV_NAME,
 		   .pm = &kbase_pm_ops,
 		   .of_match_table = of_match_ptr(kbase_dt_ids),
 		   .probe_type = PROBE_PREFER_ASYNCHRONOUS,
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index dc6feb9..ea4f300 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -239,10 +239,11 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
 
 	WARN_ON(!kbdev->pm.backend.gpu_powered);
 
+	kbdev->mmu_unresponsive = false;
+
 	for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
 		struct kbase_context *kctx;
 
-		kbdev->as[i].is_unresponsive = false;
 #if MALI_USE_CSF
 		if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) {
 			kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu,
@@ -292,7 +293,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount(
 
 	found_kctx = kbdev->as_to_kctx[as_nr];
 
-	if (!WARN_ON(found_kctx == NULL))
+	if (found_kctx)
 		kbase_ctx_sched_retain_ctx_refcount(found_kctx);
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
diff --git a/mali_kbase/mali_kbase_debug_mem_allocs.c b/mali_kbase/mali_kbase_debug_mem_allocs.c
index 418bb19..0592187 100644
--- a/mali_kbase/mali_kbase_debug_mem_allocs.c
+++ b/mali_kbase/mali_kbase_debug_mem_allocs.c
@@ -34,8 +34,7 @@
 
 /**
  * debug_zone_mem_allocs_show - Show information from specific rbtree
- * @zone: Name of GPU virtual memory zone
- * @rbtree: Pointer to the root of the rbtree associated with @zone
+ * @zone: The memory zone to be displayed
  * @sfile: The debugfs entry
  *
  * This function is called to show information about all the GPU allocations of a
@@ -43,9 +42,10 @@
  * The information like the start virtual address and size (in bytes) is shown for
  * every GPU allocation mapped in the zone.
  */
-static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile)
+static void debug_zone_mem_allocs_show(struct kbase_reg_zone *zone, struct seq_file *sfile)
 {
 	struct rb_node *p;
+	struct rb_root *rbtree = &zone->reg_rbtree;
 	struct kbase_va_region *reg;
 	const char *type_names[5] = {
 		"Native",
@@ -57,7 +57,7 @@ static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struc
 
 #define MEM_ALLOCS_HEADER \
 	"              VA,          VA size,      Commit size,    Flags,     Mem type\n"
-	seq_printf(sfile, "Zone name: %s\n:", zone);
+	seq_printf(sfile, "Zone name: %s\n:", kbase_reg_zone_get_name(zone->id));
 	seq_printf(sfile, MEM_ALLOCS_HEADER);
 	for (p = rb_first(rbtree); p; p = rb_next(p)) {
 		reg = rb_entry(p, struct kbase_va_region, rblink);
@@ -82,18 +82,15 @@ static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struc
 static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data)
 {
 	struct kbase_context *const kctx = sfile->private;
+	enum kbase_memory_zone zone_idx;
 
 	kbase_gpu_vm_lock(kctx);
+	for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+		struct kbase_reg_zone *zone;
 
-	debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile);
-	debug_zone_mem_allocs_show("CUSTOM_VA:",  &kctx->reg_rbtree_custom, sfile);
-	debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile);
-
-#if MALI_USE_CSF
-	debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile);
-	debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile);
-#endif /* MALI_USE_CSF */
-
+		zone = &kctx->reg_zone[zone_idx];
+		debug_zone_mem_allocs_show(zone, sfile);
+	}
 	kbase_gpu_vm_unlock(kctx);
 	return 0;
 }
diff --git a/mali_kbase/mali_kbase_debug_mem_view.c b/mali_kbase/mali_kbase_debug_mem_view.c
index ce87a00..7086c6b 100644
--- a/mali_kbase/mali_kbase_debug_mem_view.c
+++ b/mali_kbase/mali_kbase_debug_mem_view.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -189,13 +189,13 @@ static const struct seq_operations ops = {
 	.show = debug_mem_show,
 };
 
-static int debug_mem_zone_open(struct rb_root *rbtree,
-						struct debug_mem_data *mem_data)
+static int debug_mem_zone_open(struct kbase_reg_zone *zone, struct debug_mem_data *mem_data)
 {
 	int ret = 0;
 	struct rb_node *p;
 	struct kbase_va_region *reg;
 	struct debug_mem_mapping *mapping;
+	struct rb_root *rbtree = &zone->reg_rbtree;
 
 	for (p = rb_first(rbtree); p; p = rb_next(p)) {
 		reg = rb_entry(p, struct kbase_va_region, rblink);
@@ -233,8 +233,9 @@ static int debug_mem_open(struct inode *i, struct file *file)
 	struct kbase_context *const kctx = i->i_private;
 	struct debug_mem_data *mem_data;
 	int ret;
+	enum kbase_memory_zone idx;
 
-	if (get_file_rcu(kctx->filp) == 0)
+	if (!kbase_file_inc_fops_count_unless_closed(kctx->kfile))
 		return -ENOENT;
 
 	/* Check if file was opened in write mode. GPU memory contents
@@ -263,37 +264,15 @@ static int debug_mem_open(struct inode *i, struct file *file)
 
 	mem_data->column_width = kctx->mem_view_column_width;
 
-	ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
-	if (ret != 0) {
-		kbase_gpu_vm_unlock(kctx);
-		goto out;
-	}
-
-	ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
-	if (ret != 0) {
-		kbase_gpu_vm_unlock(kctx);
-		goto out;
-	}
-
-	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
-	if (ret != 0) {
-		kbase_gpu_vm_unlock(kctx);
-		goto out;
-	}
+	for (idx = 0; idx < CONTEXT_ZONE_MAX; idx++) {
+		struct kbase_reg_zone *zone = &kctx->reg_zone[idx];
 
-#if MALI_USE_CSF
-	ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data);
-	if (ret != 0) {
-		kbase_gpu_vm_unlock(kctx);
-		goto out;
-	}
-
-	ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data);
-	if (ret != 0) {
-		kbase_gpu_vm_unlock(kctx);
-		goto out;
+		ret = debug_mem_zone_open(zone, mem_data);
+		if (ret != 0) {
+			kbase_gpu_vm_unlock(kctx);
+			goto out;
+		}
 	}
-#endif
 
 	kbase_gpu_vm_unlock(kctx);
 
@@ -316,7 +295,7 @@ out:
 	}
 	seq_release(i, file);
 open_fail:
-	fput(kctx->filp);
+	kbase_file_dec_fops_count(kctx->kfile);
 
 	return ret;
 }
@@ -346,7 +325,7 @@ static int debug_mem_release(struct inode *inode, struct file *file)
 		kfree(mem_data);
 	}
 
-	fput(kctx->filp);
+	kbase_file_dec_fops_count(kctx->kfile);
 
 	return 0;
 }
diff --git a/mali_kbase/mali_kbase_debug_mem_zones.c b/mali_kbase/mali_kbase_debug_mem_zones.c
index 1f8db32..115c9c3 100644
--- a/mali_kbase/mali_kbase_debug_mem_zones.c
+++ b/mali_kbase/mali_kbase_debug_mem_zones.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -47,30 +47,29 @@
 static int debug_mem_zones_show(struct seq_file *sfile, void *data)
 {
 	struct kbase_context *const kctx = sfile->private;
-	size_t i;
-
-	const char *zone_names[KBASE_REG_ZONE_MAX] = {
-		"SAME_VA",
-		"CUSTOM_VA",
-		"EXEC_VA"
-#if MALI_USE_CSF
-		,
-		"MCU_SHARED_VA",
-		"EXEC_FIXED_VA",
-		"FIXED_VA"
-#endif
-	};
+	struct kbase_reg_zone *reg_zone;
+	enum kbase_memory_zone zone_idx;
 
 	kbase_gpu_vm_lock(kctx);
 
-	for (i = 0; i < KBASE_REG_ZONE_MAX; i++) {
-		struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i];
+	for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+		reg_zone = &kctx->reg_zone[zone_idx];
 
 		if (reg_zone->base_pfn) {
-			seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i,
-				   reg_zone->base_pfn, reg_zone->va_size_pages);
+			seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n",
+				   kbase_reg_zone_get_name(zone_idx), zone_idx, reg_zone->base_pfn,
+				   reg_zone->va_size_pages);
 		}
 	}
+#if MALI_USE_CSF
+	reg_zone = &kctx->kbdev->csf.mcu_shared_zone;
+
+	if (reg_zone && reg_zone->base_pfn) {
+		seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n",
+			   kbase_reg_zone_get_name(MCU_SHARED_ZONE), MCU_SHARED_ZONE,
+			   reg_zone->base_pfn, reg_zone->va_size_pages);
+	}
+#endif
 
 	kbase_gpu_vm_unlock(kctx);
 	return 0;
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 12e90ac..efe690d 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -183,6 +183,60 @@ struct kbase_as;
 struct kbase_mmu_setup;
 struct kbase_kinstr_jm;
 
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/**
+ * struct kbase_gpu_metrics - Object containing members that are used to emit
+ *                            GPU metrics tracepoints for all applications that
+ *                            created Kbase context(s) for a GPU.
+ *
+ * @active_list:   List of applications that did some GPU activity in the recent work period.
+ * @inactive_list: List of applications that didn't do any GPU activity in the recent work period.
+ */
+struct kbase_gpu_metrics {
+	struct list_head active_list;
+	struct list_head inactive_list;
+};
+
+/**
+ * struct kbase_gpu_metrics_ctx - Object created for every application, that created
+ *                                Kbase context(s), containing members that are used
+ *                                to emit GPU metrics tracepoints for the application.
+ *
+ * @link:                    Links the object in kbase_device::gpu_metrics::active_list
+ *                           or kbase_device::gpu_metrics::inactive_list.
+ * @first_active_start_time: Records the time at which the application first became
+ *                           active in the current work period.
+ * @last_active_start_time:  Records the time at which the application last became
+ *                           active in the current work period.
+ * @last_active_end_time:    Records the time at which the application last became
+ *                           inactive in the current work period.
+ * @total_active:            Tracks the time for which application has been active
+ *                           in the current work period.
+ * @prev_wp_active_end_time: Records the time at which the application last became
+ *                           inactive in the previous work period.
+ * @aid:                     Unique identifier for an application.
+ * @kctx_count:              Counter to keep a track of the number of Kbase contexts
+ *                           created for an application. There may be multiple Kbase
+ *                           contexts contributing GPU activity data to a single GPU
+ *                           metrics context.
+ * @active_cnt:              Counter that is updated every time the GPU activity starts
+ *                           and ends in the current work period for an application.
+ * @flags:                   Flags to track the state of GPU metrics context.
+ */
+struct kbase_gpu_metrics_ctx {
+	struct list_head link;
+	u64 first_active_start_time;
+	u64 last_active_start_time;
+	u64 last_active_end_time;
+	u64 total_active;
+	u64 prev_wp_active_end_time;
+	unsigned int aid;
+	unsigned int kctx_count;
+	u8 active_cnt;
+	u8 flags;
+};
+#endif
+
 /**
  * struct kbase_io_access - holds information about 1 register access
  *
@@ -317,7 +371,7 @@ struct kbase_mmu_table {
 			u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
 		} teardown_pages;
 		/**
-		 * @free_pgds: Scratch memory user for insertion, update and teardown
+		 * @free_pgds: Scratch memory used for insertion, update and teardown
 		 *             operations to store a temporary list of PGDs to be freed
 		 *             at the end of the operation.
 		 */
@@ -331,18 +385,69 @@ struct kbase_mmu_table {
 };
 
 /**
- * struct kbase_reg_zone - Information about GPU memory region zones
+ * enum kbase_memory_zone - Kbase memory zone identifier
+ * @SAME_VA_ZONE: Memory zone for allocations where the GPU and CPU VA coincide.
+ * @CUSTOM_VA_ZONE: When operating in compatibility mode, this zone is used to
+ *                  allow 32-bit userspace (either on a 32-bit device or a
+ *                  32-bit application on a 64-bit device) to address the entirety
+ *                  of the GPU address space. The @CUSTOM_VA_ZONE is also used
+ *                  for JIT allocations: on 64-bit systems, the zone is created
+ *                  by reducing the size of the SAME_VA zone by a user-controlled
+ *                  amount, whereas on 32-bit systems, it is created as part of
+ *                  the existing CUSTOM_VA_ZONE
+ * @EXEC_VA_ZONE: Memory zone used to track GPU-executable memory. The start
+ *                and end of this zone depend on the individual platform,
+ *                and it is initialized upon user process request.
+ * @EXEC_FIXED_VA_ZONE: Memory zone used to contain GPU-executable memory
+ *                      that also permits FIXED/FIXABLE allocations.
+ * @FIXED_VA_ZONE: Memory zone used to allocate memory at userspace-supplied
+ *                 addresses.
+ * @MCU_SHARED_ZONE: Memory zone created for mappings shared between the MCU
+ *                   and Kbase. Currently this is the only zone type that is
+ *                   created on a per-device, rather than a per-context
+ *                   basis.
+ * @MEMORY_ZONE_MAX: Sentinel value used for iterating over all the memory zone
+ *                   identifiers.
+ * @CONTEXT_ZONE_MAX: Sentinel value used to keep track of the last per-context
+ *                    zone for iteration.
+ */
+enum kbase_memory_zone {
+	SAME_VA_ZONE,
+	CUSTOM_VA_ZONE,
+	EXEC_VA_ZONE,
+#if IS_ENABLED(MALI_USE_CSF)
+	EXEC_FIXED_VA_ZONE,
+	FIXED_VA_ZONE,
+	MCU_SHARED_ZONE,
+#endif
+	MEMORY_ZONE_MAX,
+#if IS_ENABLED(MALI_USE_CSF)
+	CONTEXT_ZONE_MAX = FIXED_VA_ZONE + 1
+#else
+	CONTEXT_ZONE_MAX = EXEC_VA_ZONE + 1
+#endif
+};
+
+/**
+ * struct kbase_reg_zone - GPU memory zone information and region tracking
+ * @reg_rbtree: RB tree used to track kbase memory regions.
  * @base_pfn: Page Frame Number in GPU virtual address space for the start of
  *            the Zone
  * @va_size_pages: Size of the Zone in pages
+ * @id: Memory zone identifier
+ * @cache: Pointer to a per-device slab allocator to allow for quickly allocating
+ *         new regions
  *
  * Track information about a zone KBASE_REG_ZONE() and related macros.
  * In future, this could also store the &rb_root that are currently in
  * &kbase_context and &kbase_csf_device.
  */
 struct kbase_reg_zone {
+	struct rb_root reg_rbtree;
 	u64 base_pfn;
 	u64 va_size_pages;
+	enum kbase_memory_zone id;
+	struct kmem_cache *cache;
 };
 
 #if MALI_USE_CSF
@@ -439,7 +544,15 @@ struct kbase_clk_rate_trace_manager {
  *                Note that some code paths keep shaders/the tiler
  *                powered whilst this is 0.
  *                Use kbase_pm_is_active() instead to check for such cases.
- * @suspending: Flag indicating suspending/suspended
+ * @suspending: Flag set to true when System suspend of GPU device begins and
+ *              set to false only when System resume of GPU device starts.
+ *              So GPU device could be in suspended state while the flag is set.
+ *              The flag is updated with @lock held.
+ * @resuming:   Flag set to true when System resume of GPU device starts and is set
+ *              to false when resume ends. The flag is set to true at the same time
+ *              when @suspending is set to false with @lock held.
+ *              The flag is currently used only to prevent Kbase context termination
+ *              during System resume of GPU device.
  * @runtime_active: Flag to track if the GPU is in runtime suspended or active
  *                  state. This ensures that runtime_put and runtime_get
  *                  functions are called in pairs. For example if runtime_get
@@ -450,7 +563,7 @@ struct kbase_clk_rate_trace_manager {
  *            This structure contains data for the power management framework.
  *            There is one instance of this structure per device in the system.
  * @zero_active_count_wait: Wait queue set when active_count == 0
- * @resume_wait: system resume of GPU device.
+ * @resume_wait: Wait queue to wait for the System suspend/resume of GPU device.
  * @debug_core_mask: Bit masks identifying the available shader cores that are
  *                   specified via sysfs. One mask per job slot.
  * @debug_core_mask_all: Bit masks identifying the available shader cores that
@@ -471,6 +584,7 @@ struct kbase_pm_device_data {
 	struct rt_mutex lock;
 	int active_count;
 	bool suspending;
+	bool resuming;
 #if MALI_USE_CSF
 	bool runtime_active;
 #endif
@@ -823,10 +937,14 @@ struct kbase_mem_migrate {
  *                         to the GPU device. This points to an internal memory
  *                         group manager if no platform-specific memory group
  *                         manager was retrieved through device tree.
+ * @mmu_unresponsive:      Flag to indicate MMU is not responding.
+ *                         Set if a MMU command isn't completed within
+ *                         &kbase_device:mmu_or_gpu_cache_op_wait_time_ms.
+ *                         Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
  * @as:                    Array of objects representing address spaces of GPU.
- * @as_free:               Bitpattern of free/available GPU address spaces.
  * @as_to_kctx:            Array of pointers to struct kbase_context, having
  *                         GPU adrress spaces assigned to them.
+ * @as_free:               Bitpattern of free/available GPU address spaces.
  * @mmu_mask_change:       Lock to serialize the access to MMU interrupt mask
  *                         register used in the handling of Bus & Page faults.
  * @pagesize_2mb:          Boolean to determine whether 2MiB page sizes are
@@ -1082,9 +1200,11 @@ struct kbase_mem_migrate {
  *                          KCPU queue. These structures may outlive kbase module
  *                          itself. Therefore, in such a case, a warning should be
  *                          be produced.
- * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
- *                          a MMU operation
+ * @mmu_or_gpu_cache_op_wait_time_ms: Maximum waiting time in ms for the completion of
+ *                          a cache operation via MMU_AS_CONTROL or GPU_CONTROL.
  * @va_region_slab:         kmem_cache (slab) for allocated kbase_va_region structures.
+ * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking
+ *                                is enabled.
  */
 struct kbase_device {
 	u32 hw_quirks_sc;
@@ -1135,9 +1255,10 @@ struct kbase_device {
 
 	struct memory_group_manager_device *mgm_dev;
 
+	bool mmu_unresponsive;
 	struct kbase_as as[BASE_MAX_NR_AS];
-	u16 as_free;
 	struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
+	u16 as_free;
 
 	spinlock_t mmu_mask_change;
 
@@ -1196,9 +1317,7 @@ struct kbase_device {
 
 	u64 lowest_gpu_freq_khz;
 
-#if MALI_USE_CSF
 	struct kbase_backend_time backend_time;
-#endif
 
 	bool cache_clean_in_progress;
 	u32 cache_clean_queued;
@@ -1396,8 +1515,18 @@ struct kbase_device {
 #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
 	atomic_t live_fence_metadata;
 #endif
-	u32 mmu_as_inactive_wait_time_ms;
+	u32 mmu_or_gpu_cache_op_wait_time_ms;
 	struct kmem_cache *va_region_slab;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	/**
+	 * @gpu_metrics: GPU device wide structure used for emitting GPU metrics tracepoints.
+	 */
+	struct kbase_gpu_metrics gpu_metrics;
+#endif
+#if MALI_USE_CSF
+	atomic_t fence_signal_timeout_enabled;
+#endif
 };
 
 /**
@@ -1414,6 +1543,9 @@ struct kbase_device {
  * @KBASE_FILE_COMPLETE:        Indicates if the setup for context has
  *                              completed, i.e. flags have been set for the
  *                              context.
+ * @KBASE_FILE_DESTROY_CTX:     Indicates that destroying of context has begun or
+ *                              is complete. This state can only be reached after
+ *                              @KBASE_FILE_COMPLETE.
  *
  * The driver allows only limited interaction with user-space until setup
  * is complete.
@@ -1423,7 +1555,8 @@ enum kbase_file_state {
 	KBASE_FILE_VSN_IN_PROGRESS,
 	KBASE_FILE_NEED_CTX,
 	KBASE_FILE_CTX_IN_PROGRESS,
-	KBASE_FILE_COMPLETE
+	KBASE_FILE_COMPLETE,
+	KBASE_FILE_DESTROY_CTX
 };
 
 /**
@@ -1433,6 +1566,12 @@ enum kbase_file_state {
  *                       allocated from the probe method of the Mali driver.
  * @filp:                Pointer to the struct file corresponding to device file
  *                       /dev/malixx instance, passed to the file's open method.
+ * @owner:               Pointer to the file table structure of a process that
+ *                       created the instance of /dev/malixx device file. Set to
+ *                       NULL when that process closes the file instance. No more
+ *                       file operations would be allowed once set to NULL.
+ *                       It would be updated only in the Userspace context, i.e.
+ *                       when @kbase_open or @kbase_flush is called.
  * @kctx:                Object representing an entity, among which GPU is
  *                       scheduled and which gets its own GPU address space.
  *                       Invalid until @setup_state is KBASE_FILE_COMPLETE.
@@ -1441,13 +1580,40 @@ enum kbase_file_state {
  *                       @setup_state is KBASE_FILE_NEED_CTX.
  * @setup_state:         Initialization state of the file. Values come from
  *                       the kbase_file_state enumeration.
+ * @destroy_kctx_work:   Work item for destroying the @kctx, enqueued only when
+ *                       @fops_count and @map_count becomes zero after /dev/malixx
+ *                       file was previously closed by the @owner.
+ * @lock:                Lock to serialize the access to members like @owner, @fops_count,
+ *                       @map_count.
+ * @fops_count:          Counter that is incremented at the beginning of a method
+ *                       defined for @kbase_fops and is decremented at the end.
+ *                       So the counter keeps a track of the file operations in progress
+ *                       for /dev/malixx file, that are being handled by the Kbase.
+ *                       The counter is needed to defer the context termination as
+ *                       Userspace can close the /dev/malixx file and flush() method
+ *                       can get called when some other file operation is in progress.
+ * @map_count:           Counter to keep a track of the memory mappings present on
+ *                       /dev/malixx file instance. The counter is needed to defer the
+ *                       context termination as Userspace can close the /dev/malixx
+ *                       file and flush() method can get called when mappings are still
+ *                       present.
+ * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0.
+ *                        Currently needed only for the "mem_view" debugfs file.
  */
 struct kbase_file {
 	struct kbase_device  *kbdev;
 	struct file          *filp;
+	fl_owner_t            owner;
 	struct kbase_context *kctx;
 	unsigned long         api_version;
 	atomic_t              setup_state;
+	struct work_struct    destroy_kctx_work;
+	spinlock_t            lock;
+	int                   fops_count;
+	int                   map_count;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+	wait_queue_head_t     zero_fops_count_wait;
+#endif
 };
 #if MALI_JIT_PRESSURE_LIMIT_BASE
 /**
@@ -1617,8 +1783,8 @@ struct kbase_sub_alloc {
 /**
  * struct kbase_context - Kernel base context
  *
- * @filp:                 Pointer to the struct file corresponding to device file
- *                        /dev/malixx instance, passed to the file's open method.
+ * @kfile:                Pointer to the object representing the /dev/malixx device
+ *                        file instance.
  * @kbdev:                Pointer to the Kbase device for which the context is created.
  * @kctx_list_link:       Node into Kbase device list of contexts.
  * @mmu:                  Structure holding details of the MMU tables for this
@@ -1653,22 +1819,6 @@ struct kbase_sub_alloc {
  *                        for the allocations >= 2 MB in size.
  * @reg_lock:             Lock used for GPU virtual address space management operations,
  *                        like adding/freeing a memory region in the address space.
- *                        Can be converted to a rwlock ?.
- * @reg_rbtree_same:      RB tree of the memory regions allocated from the SAME_VA
- *                        zone of the GPU virtual address space. Used for allocations
- *                        having the same value for GPU & CPU virtual address.
- * @reg_rbtree_custom:    RB tree of the memory regions allocated from the CUSTOM_VA
- *                        zone of the GPU virtual address space.
- * @reg_rbtree_exec:      RB tree of the memory regions allocated from the EXEC_VA
- *                        zone of the GPU virtual address space. Used for GPU-executable
- *                        allocations which don't need the SAME_VA property.
- * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the
- *                         EXEC_FIXED_VA zone of the GPU virtual address space. Used for
- *                        GPU-executable allocations with FIXED/FIXABLE GPU virtual
- *                        addresses.
- * @reg_rbtree_fixed:     RB tree of the memory regions allocated from the FIXED_VA zone
- *                        of the GPU virtual address space. Used for allocations with
- *                        FIXED/FIXABLE GPU virtual addresses.
  * @num_fixable_allocs:   A count for the number of memory allocations with the
  *                        BASE_MEM_FIXABLE property.
  * @num_fixed_allocs:     A count for the number of memory allocations with the
@@ -1881,6 +2031,7 @@ struct kbase_sub_alloc {
  *                        that created the Kbase context. It would be set only for the
  *                        contexts created by the Userspace and not for the contexts
  *                        created internally by the Kbase.
+ * @comm:                 Record the process name
  *
  * A kernel base context is an entity among which the GPU is scheduled.
  * Each context has its own GPU address space.
@@ -1889,7 +2040,7 @@ struct kbase_sub_alloc {
  * is made on the device file.
  */
 struct kbase_context {
-	struct file *filp;
+	struct kbase_file *kfile;
 	struct kbase_device *kbdev;
 	struct list_head kctx_list_link;
 	struct kbase_mmu_table mmu;
@@ -1914,17 +2065,11 @@ struct kbase_context {
 	struct list_head        mem_partials;
 
 	struct mutex            reg_lock;
-
-	struct rb_root reg_rbtree_same;
-	struct rb_root reg_rbtree_custom;
-	struct rb_root reg_rbtree_exec;
 #if MALI_USE_CSF
-	struct rb_root reg_rbtree_exec_fixed;
-	struct rb_root reg_rbtree_fixed;
 	atomic64_t num_fixable_allocs;
 	atomic64_t num_fixed_allocs;
 #endif
-	struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];
+	struct kbase_reg_zone reg_zone[CONTEXT_ZONE_MAX];
 
 #if MALI_USE_CSF
 	struct kbase_csf_context csf;
@@ -2031,6 +2176,16 @@ struct kbase_context {
 	void *platform_data;
 
 	struct task_struct *task;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	/**
+	 * @gpu_metrics_ctx: Pointer to the GPU metrics context corresponding to the
+	 *                   application that created the Kbase context.
+	 */
+	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+#endif
+
+	char comm[TASK_COMM_LEN];
 };
 
 #ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c
index 35934b9..c3c6046 100644
--- a/mali_kbase/mali_kbase_dummy_job_wa.c
+++ b/mali_kbase/mali_kbase_dummy_job_wa.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -183,9 +183,9 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
 
 	if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) {
 		/* wait for power-ups */
-		wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true);
+		wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), true);
 		if (cores >> 32)
-			wait(kbdev, SHADER_READY_HI, (cores >> 32), true);
+			wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), true);
 	}
 
 	if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) {
@@ -218,11 +218,11 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
 		kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32));
 
 		/* wait for power off complete */
-		wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false);
-		wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false);
+		wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), false);
+		wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO), (cores & U32_MAX), false);
 		if (cores >> 32) {
-			wait(kbdev, SHADER_READY_HI, (cores >> 32), false);
-			wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false);
+			wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), false);
+			wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI), (cores >> 32), false);
 		}
 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX);
 	}
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index f4507ac..ea2ac34 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -33,6 +33,7 @@
 #include "mali_kbase_fence_defs.h"
 #include "mali_kbase.h"
 #include "mali_kbase_refcount_defs.h"
+#include <linux/version_compat_defs.h>
 
 #if MALI_USE_CSF
 /* Maximum number of characters in DMA fence timeline name. */
@@ -160,16 +161,8 @@ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
 static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
 					 int status)
 {
-	if (status) {
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
-	  KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
-		fence_set_error(katom->dma_fence.fence, status);
-#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
-		dma_fence_set_error(katom->dma_fence.fence, status);
-#else
-		katom->dma_fence.fence->status = status;
-#endif
-	}
+	if (status)
+		dma_fence_set_error_helper(katom->dma_fence.fence, status);
 	return dma_fence_signal(katom->dma_fence.fence);
 }
 
diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c
index 25b4c9c..f14a55e 100644
--- a/mali_kbase/mali_kbase_fence_ops.c
+++ b/mali_kbase/mali_kbase_fence_ops.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,7 @@ kbase_fence_get_driver_name(struct fence *fence)
 kbase_fence_get_driver_name(struct dma_fence *fence)
 #endif
 {
-	return kbase_drv_name;
+	return KBASE_DRV_NAME;
 }
 
 static const char *
@@ -46,7 +46,7 @@ kbase_fence_get_timeline_name(struct dma_fence *fence)
 
 	return kcpu_fence->metadata->timeline_name;
 #else
-	return kbase_timeline_name;
+	return KBASE_TIMELINE_NAME;
 #endif /* MALI_USE_CSF */
 }
 
diff --git a/mali_kbase/mali_kbase_gpu_metrics.c b/mali_kbase/mali_kbase_gpu_metrics.c
new file mode 100644
index 0000000..af3a08d
--- /dev/null
+++ b/mali_kbase/mali_kbase_gpu_metrics.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include "mali_power_gpu_work_period_trace.h"
+#include <mali_kbase_gpu_metrics.h>
+
+/**
+ * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context
+ *
+ * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in
+ *                         the current work period.
+ *
+ * @INSIDE_ACTIVE_LIST:    Flag to track if object is in kbase_device::gpu_metrics::active_list
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum gpu_metrics_ctx_flags {
+	ACTIVE_INTERVAL_IN_WP = 1 << 0,
+	INSIDE_ACTIVE_LIST    = 1 << 1,
+};
+
+static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+					enum gpu_metrics_ctx_flags flag)
+{
+	return (gpu_metrics_ctx->flags & flag);
+}
+
+static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+					    enum gpu_metrics_ctx_flags flag)
+{
+	gpu_metrics_ctx->flags |= flag;
+}
+
+static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+					      enum gpu_metrics_ctx_flags flag)
+{
+	gpu_metrics_ctx->flags &= ~flag;
+}
+
+static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+					    u64 start_time, u64 end_time, u64 total_active)
+{
+#ifdef CONFIG_MALI_DEBUG
+	WARN(total_active > NSEC_PER_SEC,
+	     "total_active %llu > 1 second for aid %u active_cnt %u",
+	     total_active, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+
+	WARN(start_time >= end_time,
+	     "start_time %llu >= end_time %llu for aid %u active_cnt %u",
+	     start_time, end_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+
+	WARN(total_active > (end_time - start_time),
+	     "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u",
+	     total_active, end_time, start_time,
+	     gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+
+	WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time,
+	     "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u",
+	     gpu_metrics_ctx->prev_wp_active_end_time, start_time,
+	     gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+#endif
+}
+
+static void emit_tracepoint_for_active_gpu_metrics_ctx(struct kbase_device *kbdev,
+			struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time)
+{
+	const u64 start_time = gpu_metrics_ctx->first_active_start_time;
+	u64 total_active = gpu_metrics_ctx->total_active;
+	u64 end_time;
+
+	/* Check if the GPU activity is currently ongoing */
+	if (gpu_metrics_ctx->active_cnt) {
+		end_time = current_time;
+		total_active +=
+			end_time - gpu_metrics_ctx->last_active_start_time;
+
+		gpu_metrics_ctx->first_active_start_time = current_time;
+		gpu_metrics_ctx->last_active_start_time = current_time;
+	} else {
+		end_time = gpu_metrics_ctx->last_active_end_time;
+		gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP);
+	}
+
+	trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid,
+			      start_time, end_time, total_active);
+
+	validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active);
+	gpu_metrics_ctx->prev_wp_active_end_time = end_time;
+	gpu_metrics_ctx->total_active = 0;
+}
+
+void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev,
+			       struct kbase_gpu_metrics_ctx *gpu_metrics_ctx)
+{
+	WARN_ON(list_empty(&gpu_metrics_ctx->link));
+	WARN_ON(!gpu_metrics_ctx->kctx_count);
+
+	gpu_metrics_ctx->kctx_count--;
+	if (gpu_metrics_ctx->kctx_count)
+		return;
+
+	if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP))
+		emit_tracepoint_for_active_gpu_metrics_ctx(kbdev,
+			gpu_metrics_ctx, ktime_get_raw_ns());
+
+	list_del_init(&gpu_metrics_ctx->link);
+	kfree(gpu_metrics_ctx);
+}
+
+struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid)
+{
+	struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics;
+	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+
+	list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->active_list, link) {
+		if (gpu_metrics_ctx->aid == aid) {
+			WARN_ON(!gpu_metrics_ctx->kctx_count);
+			gpu_metrics_ctx->kctx_count++;
+			return gpu_metrics_ctx;
+		}
+	}
+
+	list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->inactive_list, link) {
+		if (gpu_metrics_ctx->aid == aid) {
+			WARN_ON(!gpu_metrics_ctx->kctx_count);
+			gpu_metrics_ctx->kctx_count++;
+			return gpu_metrics_ctx;
+		}
+	}
+
+	return NULL;
+}
+
+void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev,
+				struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid)
+{
+	gpu_metrics_ctx->aid = aid;
+	gpu_metrics_ctx->total_active = 0;
+	gpu_metrics_ctx->kctx_count = 1;
+	gpu_metrics_ctx->active_cnt = 0;
+	gpu_metrics_ctx->prev_wp_active_end_time = 0;
+	gpu_metrics_ctx->flags = 0;
+	list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list);
+}
+
+void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns)
+{
+	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx;
+
+	gpu_metrics_ctx->active_cnt++;
+	if (gpu_metrics_ctx->active_cnt == 1)
+		gpu_metrics_ctx->last_active_start_time = timestamp_ns;
+
+	if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) {
+		gpu_metrics_ctx->first_active_start_time = timestamp_ns;
+		gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP);
+	}
+
+	if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) {
+		list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list);
+		gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST);
+	}
+}
+
+void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns)
+{
+	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx;
+
+	if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt))
+		return;
+
+	if (--gpu_metrics_ctx->active_cnt)
+		return;
+
+	if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) {
+		gpu_metrics_ctx->last_active_end_time = timestamp_ns;
+		gpu_metrics_ctx->total_active +=
+			timestamp_ns - gpu_metrics_ctx->last_active_start_time;
+		return;
+	}
+
+	/* Due to conversion from system timestamp to CPU timestamp (which involves rounding)
+	 * the value for start and end timestamp could come as same.
+	 */
+	if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) {
+		gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1;
+		gpu_metrics_ctx->total_active += 1;
+		return;
+	}
+
+	/* The following check is to detect the situation where 'ACT=0' event was not visible to
+	 * the Kbase even though the system timestamp value sampled by FW was less than the system
+	 * timestamp value sampled by Kbase just before the draining of trace buffer.
+	 */
+	if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time &&
+	    gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) {
+		WARN_ON_ONCE(gpu_metrics_ctx->total_active);
+		gpu_metrics_ctx->last_active_end_time =
+			gpu_metrics_ctx->prev_wp_active_end_time + 1;
+		gpu_metrics_ctx->total_active = 1;
+		return;
+	}
+
+	WARN_ON_ONCE(1);
+}
+
+void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts)
+{
+	struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics;
+	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp;
+
+	list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) {
+		if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) {
+			WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST));
+			WARN_ON(gpu_metrics_ctx->active_cnt);
+			list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list);
+			gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST);
+			continue;
+		}
+
+		emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts);
+	}
+}
+
+int kbase_gpu_metrics_init(struct kbase_device *kbdev)
+{
+	INIT_LIST_HEAD(&kbdev->gpu_metrics.active_list);
+	INIT_LIST_HEAD(&kbdev->gpu_metrics.inactive_list);
+
+	dev_info(kbdev->dev, "GPU metrics tracepoint support enabled");
+	return 0;
+}
+
+void kbase_gpu_metrics_term(struct kbase_device *kbdev)
+{
+	WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.active_list));
+	WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.inactive_list));
+}
+
+#endif
diff --git a/mali_kbase/mali_kbase_gpu_metrics.h b/mali_kbase/mali_kbase_gpu_metrics.h
new file mode 100644
index 0000000..adc8816
--- /dev/null
+++ b/mali_kbase/mali_kbase_gpu_metrics.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: GPU metrics frontend APIs
+ */
+
+#ifndef _KBASE_GPU_METRICS_H_
+#define _KBASE_GPU_METRICS_H_
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase.h>
+
+/**
+ * kbase_gpu_metrics_get_emit_interval() - Return the trace point emission interval.
+ *
+ * Return: The time interval in nanosecond for GPU metrics trace point emission.
+ */
+unsigned long kbase_gpu_metrics_get_emit_interval(void);
+
+/**
+ * kbase_gpu_metrics_ctx_put() - Decrement the Kbase context count for the GPU metrics
+ *                               context and free it if the count becomes 0.
+ *
+ * @kbdev:           Pointer to the GPU device.
+ * @gpu_metrics_ctx: Pointer to the GPU metrics context.
+ *
+ * This function must be called when a Kbase context is destroyed.
+ * The function would decrement the Kbase context count for the GPU metrics context and
+ * free the memory if the count becomes 0.
+ * The function would emit a power/gpu_work_period tracepoint for the GPU metrics context
+ * if there was some GPU activity done for it since the last tracepoint was emitted.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ *       call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev,
+			       struct kbase_gpu_metrics_ctx *gpu_metrics_ctx);
+
+/**
+ * kbase_gpu_metrics_ctx_get() - Increment the Kbase context count for the GPU metrics
+ *                               context if it exists.
+ *
+ * @kbdev: Pointer to the GPU device.
+ * @aid:   Unique identifier of the Application that is creating the Kbase context.
+ *
+ * This function must be called when a Kbase context is created.
+ * The function would increment the Kbase context count for the GPU metrics context,
+ * corresponding to the @aid, if it exists.
+ *
+ * Return: Pointer to the GPU metrics context corresponding to the @aid if it already
+ * exists otherwise NULL.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ *       call to other GPU metrics functions declared in this file.
+ *       The caller shall allocate memory for GPU metrics context structure if the
+ *       function returns NULL.
+ */
+struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid);
+
+/**
+ * kbase_gpu_metrics_ctx_init() - Initialise the GPU metrics context
+ *
+ * @kbdev:           Pointer to the GPU device.
+ * @gpu_metrics_ctx: Pointer to the GPU metrics context.
+ * @aid:             Unique identifier of the Application for which GPU metrics
+ *                   context needs to be initialized.
+ *
+ * This function must be called when a Kbase context is created, after the call to
+ * kbase_gpu_metrics_ctx_get() returned NULL and memory for the GPU metrics context
+ * structure was allocated.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ *       call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev,
+				struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u32 aid);
+
+/**
+ * kbase_gpu_metrics_ctx_start_activity() - Report the start of some GPU activity
+ *                                          for GPU metrics context.
+ *
+ * @kctx:         Pointer to the Kbase context contributing data to the GPU metrics context.
+ * @timestamp_ns: CPU timestamp at which the GPU activity started.
+ *
+ * The provided timestamp would be later used as the "start_time_ns" for the
+ * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU
+ * metrics context in the current work period.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ *       call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns);
+
+/**
+ * kbase_gpu_metrics_ctx_end_activity() - Report the end of some GPU activity
+ *                                        for GPU metrics context.
+ *
+ * @kctx:         Pointer to the Kbase context contributing data to the GPU metrics context.
+ * @timestamp_ns: CPU timestamp at which the GPU activity ended.
+ *
+ * The provided timestamp would be later used as the "end_time_ns" for the
+ * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU
+ * metrics context in the current work period.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ *       call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns);
+
+/**
+ * kbase_gpu_metrics_emit_tracepoint() - Emit power/gpu_work_period tracepoint
+ *                                       for active GPU metrics contexts.
+ *
+ * @kbdev: Pointer to the GPU device.
+ * @ts:    Timestamp at which the tracepoint is being emitted.
+ *
+ * This function would loop through all the active GPU metrics contexts and emit a
+ * power/gpu_work_period tracepoint for them.
+ * The GPU metrics context that is found to be inactive since the last tracepoint
+ * was emitted would be moved to the inactive list.
+ * The current work period would be considered as over and a new work period would
+ * begin whenever any application does the GPU activity.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ *       call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts);
+
+/**
+ * kbase_gpu_metrics_init() - Initialise a gpu_metrics instance for a GPU
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * This function is called once for each @kbdev.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_gpu_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_metrics_term() - Terminate a gpu_metrics instance
+ *
+ * @kbdev: Pointer to the GPU device.
+ */
+void kbase_gpu_metrics_term(struct kbase_device *kbdev);
+
+#endif
+#endif  /* _KBASE_GPU_METRICS_H_ */
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index afbba3d..02d6bb2 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -49,7 +49,7 @@ static void kbase_gpuprops_construct_coherent_groups(
 	props->coherency_info.coherency = props->raw_props.mem_features;
 	props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
 
-	if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+	if (props->coherency_info.coherency & MEM_FEATURES_COHERENT_CORE_GROUP_MASK) {
 		/* Group is l2 coherent */
 		group_present = props->raw_props.l2_present;
 	} else {
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index 0eba889..32c9241 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -53,9 +53,9 @@ static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx,
 					unsigned long flag)
 {
 	kbase_gpu_gwt_setup_page_permission(kctx, flag,
-				rb_first(&(kctx->reg_rbtree_same)));
+					    rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree));
 	kbase_gpu_gwt_setup_page_permission(kctx, flag,
-				rb_first(&(kctx->reg_rbtree_custom)));
+					    rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree));
 }
 
 
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index ac2a26d..f16348f 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -22,13 +22,16 @@
 #ifndef _KBASE_BACKEND_TIME_H_
 #define _KBASE_BACKEND_TIME_H_
 
-#if MALI_USE_CSF
 /**
  * struct kbase_backend_time - System timestamp attributes.
  *
  * @multiplier:		Numerator of the converter's fraction.
  * @divisor:		Denominator of the converter's fraction.
  * @offset:		Converter's offset term.
+ * @device_scaled_timeouts: Timeouts in milliseconds that were scaled to be
+ *                          consistent with the minimum MCU frequency. This
+ *                          array caches the results of all of the conversions
+ *                          for ease of use later on.
  *
  * According to Generic timer spec, system timer:
  * - Increments at a fixed frequency
@@ -49,11 +52,15 @@
  *
  */
 struct kbase_backend_time {
+#if MALI_USE_CSF
 	u64 multiplier;
 	u64 divisor;
 	s64 offset;
+#endif
+	unsigned int device_scaled_timeouts[KBASE_TIMEOUT_SELECTOR_COUNT];
 };
 
+#if MALI_USE_CSF
 /**
  * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp.
  *
@@ -89,6 +96,40 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
 					  u64 *cycle_counter,
 					  u64 *system_time,
 					  struct timespec64 *ts);
+
+/**
+ * kbase_device_set_timeout_ms - Set an unscaled device timeout in milliseconds,
+ *                               subject to the maximum timeout constraint.
+ *
+ * @kbdev:            KBase device pointer.
+ * @selector:         The specific timeout that should be scaled.
+ * @timeout_ms:    The timeout in cycles which should be scaled.
+ *
+ * This function writes the absolute timeout in milliseconds to the table of
+ * precomputed device timeouts, while estabilishing an upped bound on the individual
+ * timeout of UINT_MAX milliseconds.
+ */
+void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+				 unsigned int timeout_ms);
+
+/**
+ * kbase_device_set_timeout - Calculate the given timeout using the provided
+ *                            timeout cycles and multiplier.
+ *
+ * @kbdev:            KBase device pointer.
+ * @selector:         The specific timeout that should be scaled.
+ * @timeout_cycles:    The timeout in cycles which should be scaled.
+ * @cycle_multiplier: A multiplier applied to the number of cycles, allowing
+ *                    the callsite to scale the minimum timeout based on the
+ *                    host device.
+ *
+ * This function writes the scaled timeout to the per-device table to avoid
+ * having to recompute the timeouts every single time that the related methods
+ * are called.
+ */
+void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+			      u64 timeout_cycles, u32 cycle_multiplier);
+
 /**
  * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled
  *                        GPU frequency, using a choice from
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 5dd7813..d7facb9 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -36,6 +36,20 @@
 #include "mali_kbase_hwaccess_jm.h"
 #include <mali_kbase_hwaccess_time.h>
 #include <linux/priority_control_manager.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+
+static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS;
+
+module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444);
+MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns,
+		 "Time interval in nano seconds at which GPU metrics tracepoints are emitted");
+
+unsigned long kbase_gpu_metrics_get_emit_interval(void)
+{
+	return gpu_metrics_tp_emit_interval_ns;
+}
+#endif
 
 /*
  * Private types
@@ -101,6 +115,118 @@ static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx)
  * Private functions
  */
 
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/**
+ * gpu_metrics_timer_callback() - Callback function for the GPU metrics hrtimer
+ *
+ * @timer: Pointer to the GPU metrics hrtimer
+ *
+ * This function will emit power/gpu_work_period tracepoint for all the active
+ * GPU metrics contexts. The timer will be restarted if needed.
+ *
+ * Return: enum value to indicate that timer should not be restarted.
+ */
+static enum hrtimer_restart gpu_metrics_timer_callback(struct hrtimer *timer)
+{
+	struct kbasep_js_device_data *js_devdata =
+		container_of(timer, struct kbasep_js_device_data, gpu_metrics_timer);
+	struct kbase_device *kbdev =
+		container_of(js_devdata, struct kbase_device, js_data);
+	unsigned long flags;
+
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns());
+	WARN_ON_ONCE(!js_devdata->gpu_metrics_timer_running);
+	if (js_devdata->gpu_metrics_timer_needed) {
+		hrtimer_start(&js_devdata->gpu_metrics_timer,
+			      HR_TIMER_DELAY_NSEC(gpu_metrics_tp_emit_interval_ns),
+			      HRTIMER_MODE_REL);
+	} else
+		js_devdata->gpu_metrics_timer_running = false;
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists,
+ *                          otherwise allocate and initialise one.
+ *
+ * @kctx: Pointer to the Kbase context.
+ *
+ * The GPU metrics context represents an "Application" for the purposes of GPU metrics
+ * reporting. There may be multiple kbase_contexts contributing data to a single GPU
+ * metrics context.
+ * This function takes a reference on GPU metrics context if it already exists
+ * corresponding to the Application that is creating the Kbase context, otherwise
+ * memory is allocated for it and initialised.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+static inline int gpu_metrics_ctx_init(struct kbase_context *kctx)
+{
+	struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	unsigned long flags;
+	int ret = 0;
+
+	const struct cred *cred = get_current_cred();
+	const unsigned int aid = cred->euid.val;
+
+	put_cred(cred);
+
+	/* Return early if this is not a Userspace created context */
+	if (unlikely(!kctx->kfile))
+		return 0;
+
+	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
+	mutex_lock(&kbdev->kctx_list_lock);
+	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+	gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid);
+	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+	if (!gpu_metrics_ctx) {
+		gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL);
+
+		if (gpu_metrics_ctx) {
+			spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+			kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid);
+			spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+		} else {
+			dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed");
+			ret = -ENOMEM;
+		}
+	}
+
+	kctx->gpu_metrics_ctx = gpu_metrics_ctx;
+	mutex_unlock(&kbdev->kctx_list_lock);
+
+	return ret;
+}
+
+/**
+ * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it
+ *                          if the refcount becomes 0.
+ *
+ * @kctx: Pointer to the Kbase context.
+ */
+static inline void gpu_metrics_ctx_term(struct kbase_context *kctx)
+{
+	unsigned long flags;
+
+	/* Return early if this is not a Userspace created context */
+	if (unlikely(!kctx->kfile))
+		return;
+
+	/* Serialize against the other threads trying to create/destroy Kbase contexts. */
+	mutex_lock(&kctx->kbdev->kctx_list_lock);
+	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+	kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx);
+	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+}
+#endif
+
 /**
  * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
  * @features: JSn_FEATURE register value
@@ -602,6 +728,21 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
 		}
 	}
 
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	if (!gpu_metrics_tp_emit_interval_ns || (gpu_metrics_tp_emit_interval_ns > NSEC_PER_SEC)) {
+		dev_warn(
+			kbdev->dev,
+			"Invalid value (%lu ns) for module param gpu_metrics_tp_emit_interval_ns. Using default value: %u ns",
+			gpu_metrics_tp_emit_interval_ns, DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS);
+		gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS;
+	}
+
+	hrtimer_init(&jsdd->gpu_metrics_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	jsdd->gpu_metrics_timer.function = gpu_metrics_timer_callback;
+	jsdd->gpu_metrics_timer_needed = false;
+	jsdd->gpu_metrics_timer_running = false;
+#endif
+
 	return 0;
 }
 
@@ -626,16 +767,29 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev)
 				  zero_ctx_attr_ref_count,
 				  sizeof(zero_ctx_attr_ref_count)) == 0);
 	CSTD_UNUSED(zero_ctx_attr_ref_count);
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	js_devdata->gpu_metrics_timer_needed = false;
+	hrtimer_cancel(&js_devdata->gpu_metrics_timer);
+#endif
 }
 
 int kbasep_js_kctx_init(struct kbase_context *const kctx)
 {
 	struct kbasep_js_kctx_info *js_kctx_info;
 	int i, j;
+	int ret;
 	CSTD_UNUSED(js_kctx_info);
 
 	KBASE_DEBUG_ASSERT(kctx != NULL);
 
+	CSTD_UNUSED(ret);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	ret = gpu_metrics_ctx_init(kctx);
+	if (ret)
+		return ret;
+#endif
+
 	kbase_ctx_sched_init_ctx(kctx);
 
 	for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
@@ -715,6 +869,9 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
 	}
 
 	kbase_ctx_sched_remove_ctx(kctx);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+	gpu_metrics_ctx_term(kctx);
+#endif
 }
 
 /*
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index cfafd11..f0c4da7 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,6 @@
 #include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
-#include <linux/overflow.h>
 #include <linux/version_compat_defs.h>
 #include <linux/workqueue.h>
 
@@ -1267,8 +1266,10 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 
 void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 {
-	if (WARN_ON(!kinstr_ctx))
+	if (!kinstr_ctx) {
+		pr_warn("%s: kinstr_ctx is NULL\n", __func__);
 		return;
+	}
 
 	mutex_lock(&kinstr_ctx->lock);
 
@@ -1297,8 +1298,10 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 
 void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
 {
-	if (WARN_ON(!kinstr_ctx))
+	if (!kinstr_ctx) {
+		pr_warn("%s: kinstr_ctx is NULL\n", __func__);
 		return;
+	}
 
 	mutex_lock(&kinstr_ctx->lock);
 
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 8912783..c07d520 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -43,7 +43,7 @@
 #include <mmu/mali_kbase_mmu.h>
 #include <mali_kbase_config_defaults.h>
 #include <mali_kbase_trace_gpu_mem.h>
-
+#include <linux/version_compat_defs.h>
 #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
 #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
 
@@ -101,56 +101,66 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
 	return cpu_va_bits;
 }
 
-/* This function finds out which RB tree the given pfn from the GPU VA belongs
- * to based on the memory zone the pfn refers to
- */
-static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
-								    u64 gpu_pfn)
+unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone)
 {
-	struct rb_root *rbtree = NULL;
+	return ((((unsigned long)zone) & ((1 << KBASE_REG_ZONE_BITS) - 1ul))
+		<< KBASE_REG_ZONE_SHIFT);
+}
 
-	struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits)
+{
+	return (enum kbase_memory_zone)(((zone_bits) & KBASE_REG_ZONE_MASK)
+		>> KBASE_REG_ZONE_SHIFT);
+}
 
+char *kbase_reg_zone_get_name(enum kbase_memory_zone zone)
+{
+	switch (zone) {
+	case SAME_VA_ZONE:
+		return "SAME_VA";
+	case CUSTOM_VA_ZONE:
+		return "CUSTOM_VA";
+	case EXEC_VA_ZONE:
+		return "EXEC_VA";
 #if MALI_USE_CSF
-	struct kbase_reg_zone *fixed_va_zone =
-		kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA);
-
-	struct kbase_reg_zone *exec_fixed_va_zone =
-		kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA);
-
-	if (gpu_pfn >= fixed_va_zone->base_pfn) {
-		rbtree = &kctx->reg_rbtree_fixed;
-		return rbtree;
-	} else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) {
-		rbtree = &kctx->reg_rbtree_exec_fixed;
-		return rbtree;
-	}
+	case MCU_SHARED_ZONE:
+		return "MCU_SHARED";
+	case EXEC_FIXED_VA_ZONE:
+		return "EXEC_FIXED_VA";
+	case FIXED_VA_ZONE:
+		return "FIXED_VA";
 #endif
-	if (gpu_pfn >= exec_va_zone->base_pfn)
-		rbtree = &kctx->reg_rbtree_exec;
-	else {
-		u64 same_va_end;
+	default:
+		return NULL;
+	}
+}
 
-		if (kbase_ctx_compat_mode(kctx)) {
-			same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
-		} else {
-			struct kbase_reg_zone *same_va_zone =
-				kbase_ctx_reg_zone_get(kctx,
-						       KBASE_REG_ZONE_SAME_VA);
-			same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
-		}
+/**
+ * kbase_gpu_pfn_to_rbtree - find the rb-tree tracking the region with the indicated GPU
+ *                           page frame number
+ * @kctx: kbase context
+ * @gpu_pfn: GPU PFN address
+ *
+ * Context: any context.
+ *
+ * Return: reference to the rb-tree root, NULL if not found
+ */
+static struct rb_root *kbase_gpu_pfn_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn)
+{
+	enum kbase_memory_zone zone_idx;
+	struct kbase_reg_zone *zone;
 
-		if (gpu_pfn >= same_va_end)
-			rbtree = &kctx->reg_rbtree_custom;
-		else
-			rbtree = &kctx->reg_rbtree_same;
+	for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+		zone = &kctx->reg_zone[zone_idx];
+		if ((gpu_pfn >= zone->base_pfn) && (gpu_pfn < kbase_reg_zone_end_pfn(zone)))
+			return &zone->reg_rbtree;
 	}
 
-	return rbtree;
+	return NULL;
 }
 
 /* This function inserts a region into the tree. */
-static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
+void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
 {
 	u64 start_pfn = new_reg->start_pfn;
 	struct rb_node **link = NULL;
@@ -251,7 +261,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+	rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn);
+	if (unlikely(!rbtree))
+		return NULL;
 
 	return kbase_find_region_enclosing_address(rbtree, gpu_addr);
 }
@@ -289,7 +301,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+	rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn);
+	if (unlikely(!rbtree))
+		return NULL;
 
 	return kbase_find_region_base_address(rbtree, gpu_addr);
 }
@@ -376,6 +390,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 			    struct kbase_va_region *reg)
 {
 	struct rb_node *rbprev;
+	struct kbase_reg_zone *zone = container_of(reg->rbtree, struct kbase_reg_zone, reg_rbtree);
 	struct kbase_va_region *prev = NULL;
 	struct rb_node *rbnext;
 	struct kbase_va_region *next = NULL;
@@ -400,8 +415,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 			 */
 			u64 prev_end_pfn = prev->start_pfn + prev->nr_pages;
 
-			WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
-					    (reg->flags & KBASE_REG_ZONE_MASK));
+			WARN_ON((kbase_bits_to_zone(prev->flags)) !=
+				(kbase_bits_to_zone(reg->flags)));
 			if (!WARN_ON(reg->start_pfn < prev_end_pfn))
 				prev->nr_pages += reg->start_pfn - prev_end_pfn;
 			prev->nr_pages += reg->nr_pages;
@@ -422,8 +437,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 			 */
 			u64 reg_end_pfn = reg->start_pfn + reg->nr_pages;
 
-			WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
-					    (reg->flags & KBASE_REG_ZONE_MASK));
+			WARN_ON((kbase_bits_to_zone(next->flags)) !=
+				(kbase_bits_to_zone(reg->flags)));
 			if (!WARN_ON(next->start_pfn < reg_end_pfn))
 				next->nr_pages += next->start_pfn - reg_end_pfn;
 			next->start_pfn = reg->start_pfn;
@@ -445,8 +460,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
 		 */
 		struct kbase_va_region *free_reg;
 
-		free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
-						   reg->flags & KBASE_REG_ZONE_MASK);
+		free_reg = kbase_alloc_free_region(zone, reg->start_pfn, reg->nr_pages);
 		if (!free_reg) {
 			/* In case of failure, we cannot allocate a replacement
 			 * free region, so we will be left with a 'gap' in the
@@ -507,6 +521,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
 					 size_t nr_pages)
 {
 	struct rb_root *reg_rbtree = NULL;
+	struct kbase_reg_zone *zone =
+		container_of(at_reg->rbtree, struct kbase_reg_zone, reg_rbtree);
 	int err = 0;
 
 	reg_rbtree = at_reg->rbtree;
@@ -548,9 +564,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
 	else {
 		struct kbase_va_region *new_front_reg;
 
-		new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
-							start_pfn - at_reg->start_pfn,
-							at_reg->flags & KBASE_REG_ZONE_MASK);
+		new_front_reg = kbase_alloc_free_region(zone, at_reg->start_pfn,
+							start_pfn - at_reg->start_pfn);
 
 		if (new_front_reg) {
 			at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -603,9 +618,9 @@ int kbase_add_va_region(struct kbase_context *kctx,
 #endif
 	if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
 #if MALI_USE_CSF
-		((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) &&
+	    ((kbase_bits_to_zone(reg->flags)) != EXEC_FIXED_VA_ZONE) &&
 #endif
-	    ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
+	    ((kbase_bits_to_zone(reg->flags)) != EXEC_VA_ZONE)) {
 		if (cpu_va_bits > gpu_pc_bits) {
 			align = max(align, (size_t)((1ULL << gpu_pc_bits)
 						>> PAGE_SHIFT));
@@ -623,8 +638,7 @@ int kbase_add_va_region(struct kbase_context *kctx,
 		 * then don't retry, we're out of VA and there is
 		 * nothing which can be done about it.
 		 */
-		if ((reg->flags & KBASE_REG_ZONE_MASK) !=
-				KBASE_REG_ZONE_CUSTOM_VA)
+		if ((kbase_bits_to_zone(reg->flags)) != CUSTOM_VA_ZONE)
 			break;
 	} while (kbase_jit_evict(kctx));
 
@@ -728,119 +742,27 @@ exit:
 	return err;
 }
 
-/*
- * @brief Initialize the internal region tracker data structure.
+/**
+ * kbase_reg_to_kctx - Obtain the kbase context tracking a VA region.
+ * @reg: VA region
+ *
+ * Return:
+ * * pointer to kbase context of the memory allocation
+ * * NULL if the region does not belong to a kbase context (for instance,
+ *   if the allocation corresponds to a shared MCU region on CSF).
  */
-#if MALI_USE_CSF
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
-					 struct kbase_va_region *same_va_reg,
-					 struct kbase_va_region *custom_va_reg,
-					 struct kbase_va_region *exec_va_reg,
-					 struct kbase_va_region *exec_fixed_va_reg,
-					 struct kbase_va_region *fixed_va_reg)
-{
-	u64 last_zone_end_pfn;
-
-	kctx->reg_rbtree_same = RB_ROOT;
-	kbase_region_tracker_insert(same_va_reg);
-
-	last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages;
-
-	/* Although custom_va_reg doesn't always exist, initialize
-	 * unconditionally because of the mem_view debugfs
-	 * implementation which relies on it being empty.
-	 */
-	kctx->reg_rbtree_custom = RB_ROOT;
-	kctx->reg_rbtree_exec = RB_ROOT;
-
-	if (custom_va_reg) {
-		WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn);
-		kbase_region_tracker_insert(custom_va_reg);
-		last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages;
-	}
-
-	/* Initialize exec, fixed and exec_fixed. These are always
-	 * initialized at this stage, if they will exist at all.
-	 */
-	kctx->reg_rbtree_fixed = RB_ROOT;
-	kctx->reg_rbtree_exec_fixed = RB_ROOT;
-
-	if (exec_va_reg) {
-		WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn);
-		kbase_region_tracker_insert(exec_va_reg);
-		last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages;
-	}
-
-	if (exec_fixed_va_reg) {
-		WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn);
-		kbase_region_tracker_insert(exec_fixed_va_reg);
-		last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages;
-	}
-
-	if (fixed_va_reg) {
-		WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn);
-		kbase_region_tracker_insert(fixed_va_reg);
-		last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages;
-	}
-}
-#else
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
-		struct kbase_va_region *same_va_reg,
-		struct kbase_va_region *custom_va_reg)
-{
-	kctx->reg_rbtree_same = RB_ROOT;
-	kbase_region_tracker_insert(same_va_reg);
-
-	/* Although custom_va_reg and exec_va_reg don't always exist,
-	 * initialize unconditionally because of the mem_view debugfs
-	 * implementation which relies on them being empty.
-	 *
-	 * The difference between the two is that the EXEC_VA region
-	 * is never initialized at this stage.
-	 */
-	kctx->reg_rbtree_custom = RB_ROOT;
-	kctx->reg_rbtree_exec = RB_ROOT;
-
-	if (custom_va_reg)
-		kbase_region_tracker_insert(custom_va_reg);
-}
-#endif /* MALI_USE_CSF */
-
-static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
+static struct kbase_context *kbase_reg_to_kctx(struct kbase_va_region *reg)
 {
-	struct kbase_context *kctx = NULL;
 	struct rb_root *rbtree = reg->rbtree;
+	struct kbase_reg_zone *zone = container_of(rbtree, struct kbase_reg_zone, reg_rbtree);
 
-	switch (reg->flags & KBASE_REG_ZONE_MASK) {
-	case KBASE_REG_ZONE_CUSTOM_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
-		break;
-	case KBASE_REG_ZONE_SAME_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
-		break;
-	case KBASE_REG_ZONE_EXEC_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
-		break;
-#if MALI_USE_CSF
-	case KBASE_REG_ZONE_EXEC_FIXED_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
-		break;
-	case KBASE_REG_ZONE_FIXED_VA:
-		kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
-		break;
-	case KBASE_REG_ZONE_MCU_SHARED:
-		/* This is only expected to be called on driver unload. */
-		break;
-#endif
-	default:
-		WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
-		break;
-	}
+	if (!kbase_is_ctx_reg_zone(zone->id))
+		return NULL;
 
-	return kctx;
+	return container_of(zone - zone->id, struct kbase_context, reg_zone[0]);
 }
 
-static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 {
 	struct rb_node *rbnode;
 	struct kbase_va_region *reg;
@@ -851,8 +773,12 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 			rb_erase(rbnode, rbtree);
 			reg = rb_entry(rbnode, struct kbase_va_region, rblink);
 			WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
-			if (kbase_page_migration_enabled)
-				kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
+			if (kbase_is_page_migration_enabled()) {
+				struct kbase_context *kctx = kbase_reg_to_kctx(reg);
+
+				if (kctx)
+					kbase_gpu_munmap(kctx, reg);
+			}
 			/* Reset the start_pfn - as the rbtree is being
 			 * destroyed and we've already erased this region, there
 			 * is no further need to attempt to remove it.
@@ -867,209 +793,261 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
 	} while (rbnode);
 }
 
-void kbase_region_tracker_term(struct kbase_context *kctx)
-{
-	WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
-	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
-	     kctx->tgid, kctx->id);
-
-	kbase_gpu_vm_lock(kctx);
-	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
-	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
-	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
-#if MALI_USE_CSF
-	WARN_ON(!list_empty(&kctx->csf.event_pages_head));
-	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed);
-	kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed);
-
-#endif
-	kbase_gpu_vm_unlock(kctx);
-}
-
-void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
-{
-	kbase_region_tracker_erase_rbtree(rbtree);
-}
-
 static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
 {
 	return min_t(size_t, kbase_get_num_cpu_va_bits(kctx),
 			kctx->kbdev->gpu_props.mmu.va_bits);
 }
 
-int kbase_region_tracker_init(struct kbase_context *kctx)
+static int kbase_reg_zone_same_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
 {
-	struct kbase_va_region *same_va_reg;
-	struct kbase_va_region *custom_va_reg = NULL;
-	size_t same_va_bits = kbase_get_same_va_bits(kctx);
-	u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
-	u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
-	u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
-	u64 same_va_pages;
-	u64 same_va_base = 1u;
 	int err;
-#if MALI_USE_CSF
-	struct kbase_va_region *exec_va_reg;
-	struct kbase_va_region *exec_fixed_va_reg;
-	struct kbase_va_region *fixed_va_reg;
-
-	u64 exec_va_base;
-	u64 fixed_va_end;
-	u64 exec_fixed_va_base;
-	u64 fixed_va_base;
-	u64 fixed_va_pages;
-#endif
-
-	/* Take the lock as kbase_free_alloced_region requires it */
-	kbase_gpu_vm_lock(kctx);
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
+	const size_t same_va_bits = kbase_get_same_va_bits(kctx);
+	const u64 base_pfn = 1u;
+	u64 nr_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - base_pfn;
 
-	same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
+	lockdep_assert_held(&kctx->reg_lock);
 
 #if MALI_USE_CSF
-	if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
+	if ((base_pfn + nr_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
 		/* Depending on how the kernel is configured, it's possible (eg on aarch64) for
 		 * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone
 		 * doesn't cross into the exec_va zone.
 		 */
-		same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base;
+		nr_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - base_pfn;
 	}
 #endif
+	err = kbase_reg_zone_init(kctx->kbdev, zone, SAME_VA_ZONE, base_pfn, nr_pages);
+	if (err)
+		return -ENOMEM;
 
-	/* all have SAME_VA */
-	same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
-					      same_va_pages, KBASE_REG_ZONE_SAME_VA);
+	kctx->gpu_va_end = base_pfn + nr_pages;
 
-	if (!same_va_reg) {
-		err = -ENOMEM;
-		goto fail_unlock;
-	}
-	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
-				same_va_pages);
+	return 0;
+}
 
-	if (kbase_ctx_compat_mode(kctx)) {
-		if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
-			err = -EINVAL;
-			goto fail_free_same_va;
-		}
-		/* If the current size of TMEM is out of range of the
-		 * virtual address space addressable by the MMU then
-		 * we should shrink it to fit
-		 */
-		if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
-			custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+static void kbase_reg_zone_same_va_term(struct kbase_context *kctx)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
 
-		custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
-							KBASE_REG_ZONE_CUSTOM_VA_BASE,
-							custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+	kbase_reg_zone_term(zone);
+}
 
-		if (!custom_va_reg) {
-			err = -ENOMEM;
-			goto fail_free_same_va;
-		}
-		kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
-					KBASE_REG_ZONE_CUSTOM_VA_BASE,
-					custom_va_size);
-	} else {
-		custom_va_size = 0;
-	}
+static int kbase_reg_zone_custom_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
+	u64 nr_pages = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
 
-#if MALI_USE_CSF
-	/* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
-	exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+	/* If the context does not support CUSTOM_VA zones, then we don't need to
+	 * proceed past this point, and can pretend that it was initialized properly.
+	 * In practice, this will mean that the zone metadata structure will be zero
+	 * initialized and not contain a valid zone ID.
+	 */
+	if (!kbase_ctx_compat_mode(kctx))
+		return 0;
+
+	if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+		return -EINVAL;
 
-	/* Similarly the end of the FIXED_VA zone also depends on whether the client
-	 * is 32 or 64-bits.
+	/* If the current size of TMEM is out of range of the
+	 * virtual address space addressable by the MMU then
+	 * we should shrink it to fit
 	 */
-	fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+	if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+		nr_pages = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
 
-	if (kbase_ctx_compat_mode(kctx)) {
-		exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
-		fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
-	}
+	if (kbase_reg_zone_init(kctx->kbdev, zone, CUSTOM_VA_ZONE, KBASE_REG_ZONE_CUSTOM_VA_BASE,
+				nr_pages))
+		return -ENOMEM;
 
-	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
-				KBASE_REG_ZONE_EXEC_VA_SIZE);
+	/* On JM systems, this is the last memory zone that gets initialized,
+	 * so the GPU VA ends right after the end of the CUSTOM_VA zone. On CSF,
+	 * setting here is harmless, as the FIXED_VA initializer will overwrite
+	 * it
+	 */
+	kctx->gpu_va_end += nr_pages;
 
-	exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
-					      KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
+	return 0;
+}
 
-	if (!exec_va_reg) {
-		err = -ENOMEM;
-		goto fail_free_custom_va;
-	}
+static void kbase_reg_zone_custom_va_term(struct kbase_context *kctx)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
 
-	exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE;
+	kbase_reg_zone_term(zone);
+}
 
-	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base,
-				KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+static inline u64 kbase_get_exec_va_zone_base(struct kbase_context *kctx)
+{
+	u64 base_pfn;
 
-	exec_fixed_va_reg =
-		kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
-					exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
-					KBASE_REG_ZONE_EXEC_FIXED_VA);
+#if MALI_USE_CSF
+	base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+	if (kbase_ctx_compat_mode(kctx))
+		base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_32;
+#else
+	/* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
+	 * initially U64_MAX
+	 */
+	base_pfn = U64_MAX;
+#endif
 
-	if (!exec_fixed_va_reg) {
-		err = -ENOMEM;
-		goto fail_free_exec_va;
-	}
+	return base_pfn;
+}
+
+static inline int kbase_reg_zone_exec_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
+	const u64 base_pfn = kbase_get_exec_va_zone_base(kctx);
+	u64 nr_pages = KBASE_REG_ZONE_EXEC_VA_SIZE;
+
+#if !MALI_USE_CSF
+	nr_pages = 0;
+#endif
 
-	fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
-	fixed_va_pages = fixed_va_end - fixed_va_base;
+	return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_VA_ZONE, base_pfn, nr_pages);
+}
 
-	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
+static void kbase_reg_zone_exec_va_term(struct kbase_context *kctx)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
 
-	fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
-					       fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
+	kbase_reg_zone_term(zone);
+}
+
+#if MALI_USE_CSF
+static inline u64 kbase_get_exec_fixed_va_zone_base(struct kbase_context *kctx)
+{
+	return kbase_get_exec_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_VA_SIZE;
+}
+
+static int kbase_reg_zone_exec_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE);
+	const u64 base_pfn = kbase_get_exec_fixed_va_zone_base(kctx);
+
+	return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_FIXED_VA_ZONE, base_pfn,
+				   KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+}
+
+static void kbase_reg_zone_exec_fixed_va_term(struct kbase_context *kctx)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE);
+
+	WARN_ON(!list_empty(&kctx->csf.event_pages_head));
+	kbase_reg_zone_term(zone);
+}
+
+static int kbase_reg_zone_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE);
+	const u64 base_pfn =
+		kbase_get_exec_fixed_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
+	u64 fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+	u64 nr_pages;
+
+	if (kbase_ctx_compat_mode(kctx))
+		fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
+
+	nr_pages = fixed_va_end - base_pfn;
+
+	if (kbase_reg_zone_init(kctx->kbdev, zone, FIXED_VA_ZONE, base_pfn, nr_pages))
+		return -ENOMEM;
 
 	kctx->gpu_va_end = fixed_va_end;
 
-	if (!fixed_va_reg) {
-		err = -ENOMEM;
-		goto fail_free_exec_fixed_va;
-	}
+	return 0;
+}
 
-	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg,
-				     exec_fixed_va_reg, fixed_va_reg);
+static void kbase_reg_zone_fixed_va_term(struct kbase_context *kctx)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE);
 
-	INIT_LIST_HEAD(&kctx->csf.event_pages_head);
-#else
-	/* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
-	 * initially U64_MAX
-	 */
-	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u);
-	/* Other zones are 0: kbase_create_context() uses vzalloc */
+	kbase_reg_zone_term(zone);
+}
+#endif
+
+typedef int kbase_memory_zone_init(struct kbase_context *kctx, u64 gpu_va_limit);
+typedef void kbase_memory_zone_term(struct kbase_context *kctx);
 
-	kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
-	kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
+struct kbase_memory_zone_init_meta {
+	kbase_memory_zone_init *init;
+	kbase_memory_zone_term *term;
+	char *error_msg;
+};
+
+static const struct kbase_memory_zone_init_meta zones_init[] = {
+	[SAME_VA_ZONE] = { kbase_reg_zone_same_va_init, kbase_reg_zone_same_va_term,
+			   "Could not initialize SAME_VA zone" },
+	[CUSTOM_VA_ZONE] = { kbase_reg_zone_custom_va_init, kbase_reg_zone_custom_va_term,
+			     "Could not initialize CUSTOM_VA zone" },
+	[EXEC_VA_ZONE] = { kbase_reg_zone_exec_va_init, kbase_reg_zone_exec_va_term,
+			   "Could not initialize EXEC_VA zone" },
+#if MALI_USE_CSF
+	[EXEC_FIXED_VA_ZONE] = { kbase_reg_zone_exec_fixed_va_init,
+				 kbase_reg_zone_exec_fixed_va_term,
+				 "Could not initialize EXEC_FIXED_VA zone" },
+	[FIXED_VA_ZONE] = { kbase_reg_zone_fixed_va_init, kbase_reg_zone_fixed_va_term,
+			    "Could not initialize FIXED_VA zone" },
 #endif
-	kctx->jit_va = false;
+};
 
-	kbase_gpu_vm_unlock(kctx);
-	return 0;
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+	const u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
+	const u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
+	int err;
+	unsigned int i;
 
+	/* Take the lock as kbase_free_alloced_region requires it */
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < ARRAY_SIZE(zones_init); i++) {
+		err = zones_init[i].init(kctx, gpu_va_limit);
+		if (unlikely(err)) {
+			dev_err(kctx->kbdev->dev, "%s, err = %d\n", zones_init[i].error_msg, err);
+			goto term;
+		}
+	}
 #if MALI_USE_CSF
-fail_free_exec_fixed_va:
-	kbase_free_alloced_region(exec_fixed_va_reg);
-fail_free_exec_va:
-	kbase_free_alloced_region(exec_va_reg);
-fail_free_custom_va:
-	if (custom_va_reg)
-		kbase_free_alloced_region(custom_va_reg);
+	INIT_LIST_HEAD(&kctx->csf.event_pages_head);
 #endif
+	kctx->jit_va = false;
+
+	kbase_gpu_vm_unlock(kctx);
+
+	return 0;
+term:
+	while (i-- > 0)
+		zones_init[i].term(kctx);
 
-fail_free_same_va:
-	kbase_free_alloced_region(same_va_reg);
-fail_unlock:
 	kbase_gpu_vm_unlock(kctx);
 	return err;
 }
 
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+	unsigned int i;
+
+	WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
+	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
+	     kctx->tgid, kctx->id);
+
+	kbase_gpu_vm_lock(kctx);
+
+	for (i = 0; i < ARRAY_SIZE(zones_init); i++)
+		zones_init[i].term(kctx);
+
+	kbase_gpu_vm_unlock(kctx);
+}
+
 static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx)
 {
 	struct kbase_reg_zone *exec_va_zone;
 
 	lockdep_assert_held(&kctx->reg_lock);
-	exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+	exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
 
 	return (exec_va_zone->base_pfn != U64_MAX);
 }
@@ -1109,16 +1087,16 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
 
 	lockdep_assert_held(&kctx->reg_lock);
 
-	for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) {
+	for (zone_idx = 0; zone_idx < MEMORY_ZONE_MAX; zone_idx++) {
 		struct kbase_reg_zone *zone;
 		struct kbase_va_region *reg;
 		u64 zone_base_addr;
-		unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
-		unsigned long reg_zone;
+		enum kbase_memory_zone reg_zone;
 
-		if (!kbase_is_ctx_reg_zone(zone_bits))
+		if (!kbase_is_ctx_reg_zone(zone_idx))
 			continue;
-		zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
+
+		zone = kbase_ctx_reg_zone_get(kctx, zone_idx);
 		zone_base_addr = zone->base_pfn << PAGE_SHIFT;
 
 		reg = kbase_region_tracker_find_region_base_address(
@@ -1126,21 +1104,21 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
 
 		if (!zone->va_size_pages) {
 			WARN(reg,
-			     "Should not have found a region that starts at 0x%.16llx for zone 0x%lx",
-			     (unsigned long long)zone_base_addr, zone_bits);
+			     "Should not have found a region that starts at 0x%.16llx for zone %s",
+			     (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx));
 			continue;
 		}
 
 		if (WARN(!reg,
-			 "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it",
-			 (unsigned long long)zone_base_addr, zone_bits))
+			 "There should always be a region that starts at 0x%.16llx for zone %s, couldn't find it",
+			 (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx)))
 			return true; /* Safest return value */
 
-		reg_zone = reg->flags & KBASE_REG_ZONE_MASK;
-		if (WARN(reg_zone != zone_bits,
-			 "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx",
-			 (unsigned long long)zone_base_addr, zone_bits,
-			 reg_zone))
+		reg_zone = kbase_bits_to_zone(reg->flags);
+		if (WARN(reg_zone != zone_idx,
+			 "The region that starts at 0x%.16llx should be in zone %s but was found in the wrong zone %s",
+			 (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx),
+			 kbase_reg_zone_get_name(reg_zone)))
 			return true; /* Safest return value */
 
 		/* Unless the region is completely free, of the same size as
@@ -1161,10 +1139,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 		u64 jit_va_pages)
 {
 	struct kbase_va_region *same_va_reg;
-	struct kbase_reg_zone *same_va_zone;
+	struct kbase_reg_zone *same_va_zone, *custom_va_zone;
 	u64 same_va_zone_base_addr;
-	const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA;
-	struct kbase_va_region *custom_va_reg;
 	u64 jit_va_start;
 
 	lockdep_assert_held(&kctx->reg_lock);
@@ -1175,14 +1151,14 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 	 * cause an overlap to happen with existing same VA allocations and the
 	 * custom VA zone.
 	 */
-	same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits);
+	same_va_zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
 	same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT;
 
 	same_va_reg = kbase_region_tracker_find_region_base_address(
 		kctx, same_va_zone_base_addr);
 	if (WARN(!same_va_reg,
-		 "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
-		 (unsigned long long)same_va_zone_base_addr, same_va_zone_bits))
+		 "Already found a free region at the start of every zone, but now cannot find any region for zone SAME_VA base 0x%.16llx",
+		 (unsigned long long)same_va_zone_base_addr))
 		return -ENOMEM;
 
 	/* kbase_region_tracker_has_allocs() in the caller has already ensured
@@ -1203,24 +1179,15 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
 
 	/*
 	 * Create a custom VA zone at the end of the VA for allocations which
-	 * JIT can use so it doesn't have to allocate VA from the kernel.
+	 * JIT can use so it doesn't have to allocate VA from the kernel. Note
+	 * that while the zone has already been zero-initialized during the
+	 * region tracker initialization, we can just overwrite it.
 	 */
-	custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
-						jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
-
-	/*
-	 * The context will be destroyed if we fail here so no point
-	 * reverting the change we made to same_va.
-	 */
-	if (!custom_va_reg)
+	custom_va_zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
+	if (kbase_reg_zone_init(kctx->kbdev, custom_va_zone, CUSTOM_VA_ZONE, jit_va_start,
+				jit_va_pages))
 		return -ENOMEM;
-	/* Since this is 64-bit, the custom zone will not have been
-	 * initialized, so initialize it now
-	 */
-	kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start,
-				jit_va_pages);
 
-	kbase_region_tracker_insert(custom_va_reg);
 	return 0;
 }
 
@@ -1291,12 +1258,11 @@ exit_unlock:
 int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
 {
 #if !MALI_USE_CSF
-	struct kbase_va_region *exec_va_reg;
 	struct kbase_reg_zone *exec_va_zone;
 	struct kbase_reg_zone *target_zone;
 	struct kbase_va_region *target_reg;
 	u64 target_zone_base_addr;
-	unsigned long target_zone_bits;
+	enum kbase_memory_zone target_zone_id;
 	u64 exec_va_start;
 	int err;
 #endif
@@ -1342,20 +1308,21 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 
 	if (kbase_ctx_compat_mode(kctx)) {
 		/* 32-bit client: take from CUSTOM_VA zone */
-		target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
+		target_zone_id = CUSTOM_VA_ZONE;
 	} else {
 		/* 64-bit client: take from SAME_VA zone */
-		target_zone_bits = KBASE_REG_ZONE_SAME_VA;
+		target_zone_id = SAME_VA_ZONE;
 	}
 
-	target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
+	target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_id);
 	target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
 
 	target_reg = kbase_region_tracker_find_region_base_address(
 		kctx, target_zone_base_addr);
 	if (WARN(!target_reg,
-		 "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
-		 (unsigned long long)target_zone_base_addr, target_zone_bits)) {
+		 "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone %s",
+		 (unsigned long long)target_zone_base_addr,
+		 kbase_reg_zone_get_name(target_zone_id))) {
 		err = -ENOMEM;
 		goto exit_unlock;
 	}
@@ -1374,26 +1341,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 
 	/* Taken from the end of the target zone */
 	exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
-
-	exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
-					      exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
-	if (!exec_va_reg) {
-		err = -ENOMEM;
-		goto exit_unlock;
-	}
-	/* Update EXEC_VA zone
-	 *
-	 * not using kbase_ctx_reg_zone_init() - it was already initialized
-	 */
-	exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
-	exec_va_zone->base_pfn = exec_va_start;
-	exec_va_zone->va_size_pages = exec_va_pages;
+	exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
+	if (kbase_reg_zone_init(kctx->kbdev, exec_va_zone, EXEC_VA_ZONE, exec_va_start,
+				exec_va_pages))
+		return -ENOMEM;
 
 	/* Update target zone and corresponding region */
 	target_reg->nr_pages -= exec_va_pages;
 	target_zone->va_size_pages -= exec_va_pages;
-
-	kbase_region_tracker_insert(exec_va_reg);
 	err = 0;
 
 exit_unlock:
@@ -1405,28 +1360,13 @@ exit_unlock:
 #if MALI_USE_CSF
 void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev)
 {
-	kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree);
+	kbase_reg_zone_term(&kbdev->csf.mcu_shared_zone);
 }
 
 int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
 {
-	struct kbase_va_region *shared_reg;
-	u64 shared_reg_start_pfn;
-	u64 shared_reg_size;
-
-	shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE;
-	shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE;
-
-	kbdev->csf.shared_reg_rbtree = RB_ROOT;
-
-	shared_reg =
-		kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
-					shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
-	if (!shared_reg)
-		return -ENOMEM;
-
-	kbase_region_tracker_insert(shared_reg);
-	return 0;
+	return kbase_reg_zone_init(kbdev, &kbdev->csf.mcu_shared_zone, MCU_SHARED_ZONE,
+				   KBASE_REG_ZONE_MCU_SHARED_BASE, MCU_SHARED_ZONE_SIZE);
 }
 #endif
 
@@ -1583,33 +1523,31 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
 /**
  * kbase_alloc_free_region - Allocate a free region object.
  *
- * @kbdev:     kbase device
- * @rbtree:    Backlink to the red-black tree of memory regions.
+ * @zone:      CUSTOM_VA_ZONE or SAME_VA_ZONE
  * @start_pfn: The Page Frame Number in GPU virtual address space.
  * @nr_pages:  The size of the region in pages.
- * @zone:      KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA
  *
  * The allocated object is not part of any list yet, and is flagged as
  * KBASE_REG_FREE. No mapping is allocated yet.
  *
- * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
- *
  * Return: pointer to the allocated region object on success, NULL otherwise.
  */
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
-						u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn,
+						size_t nr_pages)
 {
 	struct kbase_va_region *new_reg;
 
-	KBASE_DEBUG_ASSERT(rbtree != NULL);
-
-	/* zone argument should only contain zone related region flags */
-	KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
 	KBASE_DEBUG_ASSERT(nr_pages > 0);
 	/* 64-bit address range is the max */
 	KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
 
-	new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
+	if (WARN_ON(!zone))
+		return NULL;
+
+	if (unlikely(!zone->base_pfn || !zone->va_size_pages))
+		return NULL;
+
+	new_reg = kmem_cache_zalloc(zone->cache, GFP_KERNEL);
 
 	if (!new_reg)
 		return NULL;
@@ -1618,8 +1556,8 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru
 	atomic_set(&new_reg->no_user_free_count, 0);
 	new_reg->cpu_alloc = NULL; /* no alloc bound yet */
 	new_reg->gpu_alloc = NULL; /* no alloc bound yet */
-	new_reg->rbtree = rbtree;
-	new_reg->flags = zone | KBASE_REG_FREE;
+	new_reg->rbtree = &zone->reg_rbtree;
+	new_reg->flags = kbase_zone_to_bits(zone->id) | KBASE_REG_FREE;
 
 	new_reg->flags |= KBASE_REG_GROWABLE;
 
@@ -1631,9 +1569,17 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru
 
 	return new_reg;
 }
-
 KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
 
+struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx,
+						    enum kbase_memory_zone id, u64 start_pfn,
+						    size_t nr_pages)
+{
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, id);
+
+	return kbase_alloc_free_region(zone, start_pfn, nr_pages);
+}
+
 /**
  * kbase_free_alloced_region - Free a region object.
  *
@@ -1645,19 +1591,18 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
  * alloc object will be released.
  * It is a bug if no alloc object exists for non-free regions.
  *
- * If region is KBASE_REG_ZONE_MCU_SHARED it is freed
+ * If region is MCU_SHARED_ZONE it is freed
  */
 void kbase_free_alloced_region(struct kbase_va_region *reg)
 {
 #if MALI_USE_CSF
-	if ((reg->flags & KBASE_REG_ZONE_MASK) ==
-			KBASE_REG_ZONE_MCU_SHARED) {
+	if (kbase_bits_to_zone(reg->flags) == MCU_SHARED_ZONE) {
 		kfree(reg);
 		return;
 	}
 #endif
 	if (!(reg->flags & KBASE_REG_FREE)) {
-		struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+		struct kbase_context *kctx = kbase_reg_to_kctx(reg);
 
 		if (WARN_ON(!kctx))
 			return;
@@ -1665,8 +1610,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
 		if (WARN_ON(kbase_is_region_invalid(reg)))
 			return;
 
-		dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n",
-			(void *)reg);
+		dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n of zone %s", (void *)reg,
+			kbase_reg_zone_get_name(kbase_bits_to_zone(reg->flags)));
 #if MALI_USE_CSF
 		if (reg->flags & KBASE_REG_CSF_EVENT)
 			/*
@@ -1802,8 +1747,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 	} else {
 		if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
 		    reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
-
-			err = kbase_mmu_insert_imported_pages(
+			err = kbase_mmu_insert_pages_skip_status_update(
 				kctx->kbdev, &kctx->mmu, reg->start_pfn,
 				kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
 				reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
@@ -1812,7 +1756,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
 						     kbase_get_gpu_phy_pages(reg),
 						     kbase_reg_current_backed_size(reg),
 						     reg->flags & gwt_mask, kctx->as_nr, group_id,
-						     mmu_sync_info, reg, true);
+						     mmu_sync_info, reg);
 		}
 
 		if (err)
@@ -1856,8 +1800,7 @@ bad_aliased_insert:
 
 		kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
 					 phys_alloc, alloc->imported.alias.aliased[i].length,
-					 alloc->imported.alias.aliased[i].length, kctx->as_nr,
-					 false);
+					 alloc->imported.alias.aliased[i].length, kctx->as_nr);
 	}
 bad_insert:
 	kbase_remove_va_region(kctx->kbdev, reg);
@@ -1868,7 +1811,7 @@ bad_insert:
 KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
 
 static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
-				    struct kbase_va_region *reg, bool writeable);
+				    struct kbase_va_region *reg);
 
 int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 {
@@ -1889,9 +1832,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 			size_t i = 0;
 			/* Due to the way the number of valid PTEs and ATEs are tracked
 			 * currently, only the GPU virtual range that is backed & mapped
-			 * should be passed to the kbase_mmu_teardown_pages() function,
-			 * hence individual aliased regions needs to be unmapped
-			 * separately.
+			 * should be passed to the page teardown function, hence individual
+			 * aliased regions needs to be unmapped separately.
 			 */
 			for (i = 0; i < alloc->imported.alias.nents; i++) {
 				struct tagged_addr *phys_alloc = NULL;
@@ -1905,8 +1847,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 					kctx->kbdev, &kctx->mmu,
 					reg->start_pfn + (i * alloc->imported.alias.stride),
 					phys_alloc, alloc->imported.alias.aliased[i].length,
-					alloc->imported.alias.aliased[i].length, kctx->as_nr,
-					false);
+					alloc->imported.alias.aliased[i].length, kctx->as_nr);
 
 				if (WARN_ON_ONCE(err_loop))
 					err = err_loop;
@@ -1928,17 +1869,19 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 			if (reg->flags & KBASE_REG_IMPORT_PAD)
 				nr_phys_pages = alloc->nents + 1;
 
-			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-						       alloc->pages, nr_phys_pages, nr_virt_pages,
-						       kctx->as_nr, true);
+			err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu,
+								reg->start_pfn, alloc->pages,
+								nr_phys_pages, nr_virt_pages,
+								kctx->as_nr);
 		}
 		break;
 	case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
 			size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
 
-			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-						       alloc->pages, nr_reg_pages, nr_reg_pages,
-						       kctx->as_nr, true);
+			err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu,
+								reg->start_pfn, alloc->pages,
+								nr_reg_pages, nr_reg_pages,
+								kctx->as_nr);
 		}
 		break;
 	default: {
@@ -1946,7 +1889,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 
 			err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
 						       alloc->pages, nr_reg_pages, nr_reg_pages,
-						       kctx->as_nr, false);
+						       kctx->as_nr);
 		}
 		break;
 	}
@@ -1966,9 +1909,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
 
 			/* The allocation could still have active mappings. */
 			if (user_buf->current_mapping_usage_count == 0) {
-				kbase_jd_user_buf_unmap(kctx, alloc, reg,
-							(reg->flags &
-							 (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)));
+				kbase_jd_user_buf_unmap(kctx, alloc, reg);
 			}
 		}
 	}
@@ -2112,18 +2053,18 @@ void kbase_sync_single(struct kbase_context *kctx,
 		dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset;
 
 		if (sync_fn == KBASE_SYNC_TO_DEVICE) {
-			src = ((unsigned char *)kmap(cpu_page)) + offset;
-			dst = ((unsigned char *)kmap(gpu_page)) + offset;
+			src = ((unsigned char *)kbase_kmap(cpu_page)) + offset;
+			dst = ((unsigned char *)kbase_kmap(gpu_page)) + offset;
 		} else if (sync_fn == KBASE_SYNC_TO_CPU) {
 			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size,
 						DMA_BIDIRECTIONAL);
-			src = ((unsigned char *)kmap(gpu_page)) + offset;
-			dst = ((unsigned char *)kmap(cpu_page)) + offset;
+			src = ((unsigned char *)kbase_kmap(gpu_page)) + offset;
+			dst = ((unsigned char *)kbase_kmap(cpu_page)) + offset;
 		}
 
 		memcpy(dst, src, size);
-		kunmap(gpu_page);
-		kunmap(cpu_page);
+		kbase_kunmap(gpu_page, src);
+		kbase_kunmap(cpu_page, dst);
 		if (sync_fn == KBASE_SYNC_TO_DEVICE)
 			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size,
 						   DMA_BIDIRECTIONAL);
@@ -2303,8 +2244,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
 	}
 
 #if MALI_USE_CSF
-	if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) ||
-	    ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+	if (((kbase_bits_to_zone(reg->flags)) == FIXED_VA_ZONE) ||
+	    ((kbase_bits_to_zone(reg->flags)) == EXEC_FIXED_VA_ZONE)) {
 		if (reg->flags & KBASE_REG_FIXED_ADDRESS)
 			atomic64_dec(&kctx->num_fixed_allocs);
 		else
@@ -2381,7 +2322,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
 			goto out_unlock;
 		}
 
-		if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+		if ((kbase_bits_to_zone(reg->flags)) == SAME_VA_ZONE) {
 			/* SAME_VA must be freed through munmap */
 			dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
 					gpu_addr);
@@ -2544,6 +2485,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 	 * allocation is visible to the OOM killer
 	 */
 	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+	kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
 
 	tp = alloc->pages + alloc->nents;
 
@@ -2665,8 +2607,6 @@ no_new_partial:
 
 	alloc->nents += nr_pages_requested;
 
-	kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
-
 done:
 	return 0;
 
@@ -2676,19 +2616,13 @@ alloc_failed:
 		size_t nr_pages_to_free = nr_pages_requested - nr_left;
 
 		alloc->nents += nr_pages_to_free;
-
-		kbase_process_page_usage_inc(kctx, nr_pages_to_free);
-		atomic_add(nr_pages_to_free, &kctx->used_pages);
-		atomic_add(nr_pages_to_free,
-			&kctx->kbdev->memdev.used_pages);
-
 		kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
 	}
 
-	kbase_process_page_usage_dec(kctx, nr_pages_requested);
-	atomic_sub(nr_pages_requested, &kctx->used_pages);
-	atomic_sub(nr_pages_requested,
-		&kctx->kbdev->memdev.used_pages);
+	kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_left);
+	kbase_process_page_usage_dec(kctx, nr_left);
+	atomic_sub(nr_left, &kctx->used_pages);
+	atomic_sub(nr_left, &kctx->kbdev->memdev.used_pages);
 
 invalid_request:
 	return -ENOMEM;
@@ -2737,6 +2671,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 	 * allocation is visible to the OOM killer
 	 */
 	kbase_process_page_usage_inc(kctx, nr_pages_requested);
+	kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
 
 	tp = alloc->pages + alloc->nents;
 	new_pages = tp;
@@ -2839,8 +2774,6 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
 
 	alloc->nents += nr_pages_requested;
 
-	kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
-
 done:
 	return new_pages;
 
@@ -2877,6 +2810,7 @@ alloc_failed:
 		}
 	}
 
+	kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_pages_requested);
 	kbase_process_page_usage_dec(kctx, nr_pages_requested);
 	atomic_sub(nr_pages_requested, &kctx->used_pages);
 	atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages);
@@ -4539,7 +4473,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
 			/* A suitable JIT allocation existed on the evict list, so we need
 			 * to make sure that the NOT_MOVABLE property is cleared.
 			 */
-			if (kbase_page_migration_enabled) {
+			if (kbase_is_page_migration_enabled()) {
 				kbase_gpu_vm_lock(kctx);
 				mutex_lock(&kctx->jit_evict_lock);
 				kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
@@ -4717,14 +4651,14 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
 	 * by page migration. Once freed, they will enter into the page migration
 	 * state machine via the mempools.
 	 */
-	if (kbase_page_migration_enabled)
+	if (kbase_is_page_migration_enabled())
 		kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
 	mutex_unlock(&kctx->jit_evict_lock);
 }
 
 void kbase_jit_backing_lost(struct kbase_va_region *reg)
 {
-	struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+	struct kbase_context *kctx = kbase_reg_to_kctx(reg);
 
 	if (WARN_ON(!kctx))
 		return;
@@ -5035,6 +4969,15 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	 * MMU operations.
 	 */
 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+	bool write;
+	enum dma_data_direction dma_dir;
+
+	/* If neither the CPU nor the GPU needs write access, use DMA_TO_DEVICE
+	 * to avoid potentially-destructive CPU cache invalidates that could
+	 * corruption of user data.
+	 */
+	write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+	dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -5068,9 +5011,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	for (i = 0; i < pinned_pages; i++) {
 		dma_addr_t dma_addr;
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-		dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir);
 #else
-		dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+		dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir,
 					      DMA_ATTR_SKIP_CPU_SYNC);
 #endif
 		err = dma_mapping_error(dev, dma_addr);
@@ -5080,7 +5023,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
 		pa[i] = as_tagged(page_to_phys(pages[i]));
 
-		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
 	}
 
 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -5088,10 +5031,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
-	err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
-					      kbase_reg_current_backed_size(reg),
-					      reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
-					      mmu_sync_info, NULL);
+	err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+							kbase_reg_current_backed_size(reg),
+							reg->flags & gwt_mask, kctx->as_nr,
+							alloc->group_id, mmu_sync_info, NULL);
 	if (err == 0)
 		return 0;
 
@@ -5111,12 +5054,11 @@ unwind:
 	for (i = 0; i < dma_mapped_pages; i++) {
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
 
-		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir);
 #else
-		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
-				     DMA_ATTR_SKIP_CPU_SYNC);
+		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
 #endif
 	}
 
@@ -5134,17 +5076,113 @@ unwind:
 	return err;
 }
 
+/* user_buf_sync_read_only_page - This function handles syncing a single page that has read access,
+ *                                only, on both the CPU and * GPU, so it is ready to be unmapped.
+ * @kctx: kbase context
+ * @imported_size: the number of bytes to sync
+ * @dma_addr: DMA address of the bytes to be sync'd
+ * @offset_within_page: (unused) offset of the bytes within the page. Passed so that the calling
+ * signature is identical to user_buf_sync_writable_page().
+ */
+static void user_buf_sync_read_only_page(struct kbase_context *kctx, unsigned long imported_size,
+					 dma_addr_t dma_addr, unsigned long offset_within_page)
+{
+	/* Manual cache synchronization.
+	 *
+	 * Writes from neither the CPU nor GPU are possible via this mapping,
+	 * so we just sync the entire page to the device.
+	 */
+	dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, imported_size, DMA_TO_DEVICE);
+}
+
+/* user_buf_sync_writable_page - This function handles syncing a single page that has read
+ *                                and writable access, from either (or both of) the CPU and GPU,
+ *                                so it is ready to be unmapped.
+ * @kctx: kbase context
+ * @imported_size: the number of bytes to unmap
+ * @dma_addr: DMA address of the bytes to be unmapped
+ * @offset_within_page: offset of the bytes within the page. This is the offset to the subrange of
+ *                      the memory that is "imported" and so is intended for GPU access. Areas of
+ *                      the page outside of this - whilst still GPU accessible - are not intended
+ *                      for use by GPU work, and should also not be modified as the userspace CPU
+ *                      threads may be modifying them.
+ */
+static void user_buf_sync_writable_page(struct kbase_context *kctx, unsigned long imported_size,
+					dma_addr_t dma_addr, unsigned long offset_within_page)
+{
+	/* Manual CPU cache synchronization.
+	 *
+	 * When the GPU returns ownership of the buffer to the CPU, the driver
+	 * needs to treat imported and non-imported memory differently.
+	 *
+	 * The first case to consider is non-imported sub-regions at the
+	 * beginning of the first page and at the end of last page. For these
+	 * sub-regions: CPU cache shall be committed with a clean+invalidate,
+	 * in order to keep the last CPU write.
+	 *
+	 * Imported region prefers the opposite treatment: this memory has been
+	 * legitimately mapped and used by the GPU, hence GPU writes shall be
+	 * committed to memory, while CPU cache shall be invalidated to make
+	 * sure that CPU reads the correct memory content.
+	 *
+	 * The following diagram shows the expect value of the variables
+	 * used in this loop in the corner case of an imported region encloed
+	 * by a single memory page:
+	 *
+	 * page boundary ->|---------- | <- dma_addr (initial value)
+	 *                 |           |
+	 *                 | - - - - - | <- offset_within_page
+	 *                 |XXXXXXXXXXX|\
+	 *                 |XXXXXXXXXXX| \
+	 *                 |XXXXXXXXXXX|  }- imported_size
+	 *                 |XXXXXXXXXXX| /
+	 *                 |XXXXXXXXXXX|/
+	 *                 | - - - - - | <- offset_within_page + imported_size
+	 *                 |           |\
+	 *                 |           | }- PAGE_SIZE - imported_size -
+	 *                 |           |/   offset_within_page
+	 *                 |           |
+	 * page boundary ->|-----------|
+	 *
+	 * If the imported region is enclosed by more than one page, then
+	 * offset_within_page = 0 for any page after the first.
+	 */
+
+	/* Only for first page: handle non-imported range at the beginning. */
+	if (offset_within_page > 0) {
+		dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+					   DMA_BIDIRECTIONAL);
+		dma_addr += offset_within_page;
+	}
+
+	/* For every page: handle imported range. */
+	if (imported_size > 0)
+		dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+					DMA_BIDIRECTIONAL);
+
+	/* Only for last page (that may coincide with first page):
+	 * handle non-imported range at the end.
+	 */
+	if ((imported_size + offset_within_page) < PAGE_SIZE) {
+		dma_addr += imported_size;
+		dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+					   PAGE_SIZE - imported_size - offset_within_page,
+					   DMA_BIDIRECTIONAL);
+	}
+}
+
 /* This function would also perform the work of unpinning pages on Job Manager
  * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
  * have a corresponding call to kbase_jd_user_buf_unpin_pages().
  */
 static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
-				    struct kbase_va_region *reg, bool writeable)
+				    struct kbase_va_region *reg)
 {
 	long i;
 	struct page **pages;
 	unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
 	unsigned long remaining_size = alloc->imported.user_buf.size;
+	bool writable = (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR));
 
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -5153,8 +5191,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 
 #if !MALI_USE_CSF
 	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents);
-#else
-	CSTD_UNUSED(reg);
 #endif
 
 	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
@@ -5173,75 +5209,24 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 		 * whole memory page.
 		 */
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+		enum dma_data_direction dma_dir = writable ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
-		/* Manual CPU cache synchronization.
-		 *
-		 * When the GPU returns ownership of the buffer to the CPU, the driver
-		 * needs to treat imported and non-imported memory differently.
-		 *
-		 * The first case to consider is non-imported sub-regions at the
-		 * beginning of the first page and at the end of last page. For these
-		 * sub-regions: CPU cache shall be committed with a clean+invalidate,
-		 * in order to keep the last CPU write.
-		 *
-		 * Imported region prefers the opposite treatment: this memory has been
-		 * legitimately mapped and used by the GPU, hence GPU writes shall be
-		 * committed to memory, while CPU cache shall be invalidated to make
-		 * sure that CPU reads the correct memory content.
-		 *
-		 * The following diagram shows the expect value of the variables
-		 * used in this loop in the corner case of an imported region encloed
-		 * by a single memory page:
-		 *
-		 * page boundary ->|---------- | <- dma_addr (initial value)
-		 *                 |           |
-		 *                 | - - - - - | <- offset_within_page
-		 *                 |XXXXXXXXXXX|\
-		 *                 |XXXXXXXXXXX| \
-		 *                 |XXXXXXXXXXX|  }- imported_size
-		 *                 |XXXXXXXXXXX| /
-		 *                 |XXXXXXXXXXX|/
-		 *                 | - - - - - | <- offset_within_page + imported_size
-		 *                 |           |\
-		 *                 |           | }- PAGE_SIZE - imported_size - offset_within_page
-		 *                 |           |/
-		 * page boundary ->|-----------|
-		 *
-		 * If the imported region is enclosed by more than one page, then
-		 * offset_within_page = 0 for any page after the first.
-		 */
-
-		/* Only for first page: handle non-imported range at the beginning. */
-		if (offset_within_page > 0) {
-			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
-						   DMA_BIDIRECTIONAL);
-			dma_addr += offset_within_page;
-		}
-
-		/* For every page: handle imported range. */
-		if (imported_size > 0)
-			dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
-						DMA_BIDIRECTIONAL);
-
-		/* Only for last page (that may coincide with first page):
-		 * handle non-imported range at the end.
-		 */
-		if ((imported_size + offset_within_page) < PAGE_SIZE) {
-			dma_addr += imported_size;
-			dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
-						   PAGE_SIZE - imported_size - offset_within_page,
-						   DMA_BIDIRECTIONAL);
-		}
+		if (writable)
+			user_buf_sync_writable_page(kctx, imported_size, dma_addr,
+						    offset_within_page);
+		else
+			user_buf_sync_read_only_page(kctx, imported_size, dma_addr,
+						     offset_within_page);
 
-		/* Notice: use the original DMA address to unmap the whole memory page. */
+			/* Notice: use the original DMA address to unmap the whole memory page. */
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
 		dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
-			       DMA_BIDIRECTIONAL);
+			       dma_dir);
 #else
 		dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
-				     PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+				     PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
 #endif
-		if (writeable)
+		if (writable)
 			set_page_dirty_lock(pages[i]);
 #if !MALI_USE_CSF
 		kbase_unpin_user_buf_page(pages[i]);
@@ -5260,7 +5245,8 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
 		void *src_page, size_t *to_copy, unsigned int nr_pages,
 		unsigned int *target_page_nr, size_t offset)
 {
-	void *target_page = kmap(dest_pages[*target_page_nr]);
+	void *target_page = kbase_kmap(dest_pages[*target_page_nr]);
+
 	size_t chunk = PAGE_SIZE-offset;
 
 	if (!target_page) {
@@ -5273,13 +5259,13 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
 	memcpy(target_page + offset, src_page, chunk);
 	*to_copy -= chunk;
 
-	kunmap(dest_pages[*target_page_nr]);
+	kbase_kunmap(dest_pages[*target_page_nr], target_page);
 
 	*target_page_nr += 1;
 	if (*target_page_nr >= nr_pages || *to_copy == 0)
 		return 0;
 
-	target_page = kmap(dest_pages[*target_page_nr]);
+	target_page = kbase_kmap(dest_pages[*target_page_nr]);
 	if (!target_page) {
 		pr_err("%s: kmap failure", __func__);
 		return -ENOMEM;
@@ -5291,7 +5277,7 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
 	memcpy(target_page, src_page + PAGE_SIZE-offset, chunk);
 	*to_copy -= chunk;
 
-	kunmap(dest_pages[*target_page_nr]);
+	kbase_kunmap(dest_pages[*target_page_nr], target_page);
 
 	return 0;
 }
@@ -5358,20 +5344,14 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
 		alloc->imported.user_buf.current_mapping_usage_count--;
 
 		if (alloc->imported.user_buf.current_mapping_usage_count == 0) {
-			bool writeable = true;
-
 			if (!kbase_is_region_invalid_or_free(reg)) {
-				kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-							 alloc->pages,
-							 kbase_reg_current_backed_size(reg),
-							 kbase_reg_current_backed_size(reg),
-							 kctx->as_nr, true);
+				kbase_mmu_teardown_imported_pages(
+					kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+					kbase_reg_current_backed_size(reg),
+					kbase_reg_current_backed_size(reg), kctx->as_nr);
 			}
 
-			if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
-				writeable = false;
-
-			kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable);
+			kbase_jd_user_buf_unmap(kctx, alloc, reg);
 		}
 		}
 	break;
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 02e5509..aa67717 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -62,6 +62,186 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
 #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
 
+/* Free region */
+#define KBASE_REG_FREE (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED (1ul << 4)
+/* Is GPU cached?
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ */
+#define KBASE_REG_GPU_CACHED (1ul << 5)
+
+#define KBASE_REG_GROWABLE (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW (1ul << 7)
+
+/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
+#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH (1ul << 10)
+
+#if MALI_USE_CSF
+/* Space for 8 different zones */
+#define KBASE_REG_ZONE_BITS 3
+#else
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_BITS 2
+#endif
+
+/* The bits 11-13 (inclusive) of the kbase_va_region flag are reserved
+ * for information about the zone in which it was allocated.
+ */
+#define KBASE_REG_ZONE_SHIFT (11ul)
+#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << KBASE_REG_ZONE_SHIFT)
+
+#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS)
+#error "Too many zones for the number of zone bits defined"
+#endif
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD (1ul << 14)
+/* CPU read access */
+#define KBASE_REG_CPU_RD (1ul << 15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x) (((x)&7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x) (((x)&KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_PROTECTED (1ul << 19)
+
+/* Region belongs to a shrinker.
+ *
+ * This can either mean that it is part of the JIT/Ephemeral or tiler heap
+ * shrinker paths. Should be removed only after making sure that there are
+ * no references remaining to it in these paths, as it may cause the physical
+ * backing of the region to disappear during use.
+ */
+#define KBASE_REG_DONT_NEED (1ul << 20)
+
+/* Imported buffer is padded? */
+#define KBASE_REG_IMPORT_PAD (1ul << 21)
+
+#if MALI_USE_CSF
+/* CSF event memory */
+#define KBASE_REG_CSF_EVENT (1ul << 22)
+/* Bit 23 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define KBASE_REG_RESERVED_BIT_23 (1ul << 23)
+#else
+/* Bit 22 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define KBASE_REG_RESERVED_BIT_22 (1ul << 22)
+/* The top of the initial commit is aligned to extension pages.
+ * Extent must be a power of 2
+ */
+#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23)
+#endif /* MALI_USE_CSF */
+
+/* Bit 24 is currently unused and is available for use for a new flag */
+
+/* Memory has permanent kernel side mapping */
+#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
+
+/* GPU VA region has been freed by the userspace, but still remains allocated
+ * due to the reference held by CPU mappings created on the GPU VA region.
+ *
+ * A region with this flag set has had kbase_gpu_munmap() called on it, but can
+ * still be looked-up in the region tracker as a non-free region. Hence must
+ * not create or update any more GPU mappings on such regions because they will
+ * not be unmapped when the region is finally destroyed.
+ *
+ * Since such regions are still present in the region tracker, new allocations
+ * attempted with BASE_MEM_SAME_VA might fail if their address intersects with
+ * a region with this flag set.
+ *
+ * In addition, this flag indicates the gpu_alloc member might no longer valid
+ * e.g. in infinite cache simulation.
+ */
+#define KBASE_REG_VA_FREED (1ul << 26)
+
+/* If set, the heap info address points to a u32 holding the used size in bytes;
+ * otherwise it points to a u64 holding the lowest address of unused memory.
+ */
+#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27)
+
+/* Allocation is actively used for JIT memory */
+#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28)
+
+#if MALI_USE_CSF
+/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA
+ * memory zones, and it determines whether they were created with a fixed
+ * GPU VA address requested by the user.
+ */
+#define KBASE_REG_FIXED_ADDRESS (1ul << 29)
+#else
+#define KBASE_REG_RESERVED_BIT_29 (1ul << 29)
+#endif
+
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT)
+
+#if MALI_USE_CSF
+/* only used with 32-bit clients */
+/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43).
+ */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+#else
+/* only used with 32-bit clients */
+/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the
+ * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44
+ * bytes, see mmap64 man page for reference).  So we put the default limit to the
+ * maximum possible on Linux and shrink it down, if required by the GPU, during
+ * initialization.
+ */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+#endif
+
+/* The starting address and size of the GPU-executable zone are dynamic
+ * and depend on the platform and the number of pages requested by the
+ * user process, with an upper limit of 4 GB.
+ */
+#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */
+#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
+
+#if MALI_USE_CSF
+#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT)
+#define MCU_SHARED_ZONE_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - KBASE_REG_ZONE_MCU_SHARED_BASE)
+
+/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit
+ * clients, and 2^43 for 32-bit clients.
+ */
+#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT)
+/* Executable zone supporting FIXED/FIXABLE allocations.
+ * It is always 4GB in size.
+ */
+#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
+
+/* Non-executable zone supporting FIXED/FIXABLE allocations.
+ * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from
+ * (2^43) up to (2^44)-1 for 32-bit userspace clients. For the same reason,
+ * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1.
+ */
+#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT)
+
+#endif
+
 /*
  * A CPU mapping
  */
@@ -307,6 +487,32 @@ enum kbase_jit_report_flags {
 };
 
 /**
+ * kbase_zone_to_bits - Convert a memory zone @zone to the corresponding
+ *                      bitpattern, for ORing together with other flags.
+ * @zone: Memory zone
+ *
+ * Return: Bitpattern with the appropriate bits set.
+ */
+unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone);
+
+/**
+ * kbase_bits_to_zone - Convert the bitpattern @zone_bits to the corresponding
+ *                      zone identifier
+ * @zone_bits: Memory allocation flag containing a zone pattern
+ *
+ * Return: Zone identifier for valid zone bitpatterns,
+ */
+enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits);
+
+/**
+ * kbase_mem_zone_get_name - Get the string name for a given memory zone
+ * @zone: Memory zone identifier
+ *
+ * Return: string for valid memory zone, NULL otherwise
+ */
+char *kbase_reg_zone_get_name(enum kbase_memory_zone zone);
+
+/**
  * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying
  *                                   physical allocation.
  * @alloc:  the physical allocation containing the pages whose metadata is going
@@ -449,204 +655,6 @@ struct kbase_va_region {
 	size_t nr_pages;
 	size_t initial_commit;
 	size_t threshold_pages;
-
-/* Free region */
-#define KBASE_REG_FREE              (1ul << 0)
-/* CPU write access */
-#define KBASE_REG_CPU_WR            (1ul << 1)
-/* GPU write access */
-#define KBASE_REG_GPU_WR            (1ul << 2)
-/* No eXecute flag */
-#define KBASE_REG_GPU_NX            (1ul << 3)
-/* Is CPU cached? */
-#define KBASE_REG_CPU_CACHED        (1ul << 4)
-/* Is GPU cached?
- * Some components within the GPU might only be able to access memory that is
- * GPU cacheable. Refer to the specific GPU implementation for more details.
- */
-#define KBASE_REG_GPU_CACHED        (1ul << 5)
-
-#define KBASE_REG_GROWABLE          (1ul << 6)
-/* Can grow on pf? */
-#define KBASE_REG_PF_GROW           (1ul << 7)
-
-/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
-#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
-
-/* inner shareable coherency */
-#define KBASE_REG_SHARE_IN          (1ul << 9)
-/* inner & outer shareable coherency */
-#define KBASE_REG_SHARE_BOTH        (1ul << 10)
-
-#if MALI_USE_CSF
-/* Space for 8 different zones */
-#define KBASE_REG_ZONE_BITS 3
-#else
-/* Space for 4 different zones */
-#define KBASE_REG_ZONE_BITS 2
-#endif
-
-#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << 11)
-#define KBASE_REG_ZONE(x) (((x) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) << 11)
-#define KBASE_REG_ZONE_IDX(x)       (((x) & KBASE_REG_ZONE_MASK) >> 11)
-
-#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS)
-#error "Too many zones for the number of zone bits defined"
-#endif
-
-/* GPU read access */
-#define KBASE_REG_GPU_RD (1ul << 14)
-/* CPU read access */
-#define KBASE_REG_CPU_RD (1ul << 15)
-
-/* Index of chosen MEMATTR for this region (0..7) */
-#define KBASE_REG_MEMATTR_MASK      (7ul << 16)
-#define KBASE_REG_MEMATTR_INDEX(x)  (((x) & 7) << 16)
-#define KBASE_REG_MEMATTR_VALUE(x)  (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
-
-#define KBASE_REG_PROTECTED         (1ul << 19)
-
-/* Region belongs to a shrinker.
- *
- * This can either mean that it is part of the JIT/Ephemeral or tiler heap
- * shrinker paths. Should be removed only after making sure that there are
- * no references remaining to it in these paths, as it may cause the physical
- * backing of the region to disappear during use.
- */
-#define KBASE_REG_DONT_NEED         (1ul << 20)
-
-/* Imported buffer is padded? */
-#define KBASE_REG_IMPORT_PAD        (1ul << 21)
-
-#if MALI_USE_CSF
-/* CSF event memory */
-#define KBASE_REG_CSF_EVENT         (1ul << 22)
-#else
-/* Bit 22 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define KBASE_REG_RESERVED_BIT_22   (1ul << 22)
-#endif
-
-#if !MALI_USE_CSF
-/* The top of the initial commit is aligned to extension pages.
- * Extent must be a power of 2
- */
-#define KBASE_REG_TILER_ALIGN_TOP   (1ul << 23)
-#else
-/* Bit 23 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define KBASE_REG_RESERVED_BIT_23   (1ul << 23)
-#endif /* !MALI_USE_CSF */
-
-/* Bit 24 is currently unused and is available for use for a new flag */
-
-/* Memory has permanent kernel side mapping */
-#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
-
-/* GPU VA region has been freed by the userspace, but still remains allocated
- * due to the reference held by CPU mappings created on the GPU VA region.
- *
- * A region with this flag set has had kbase_gpu_munmap() called on it, but can
- * still be looked-up in the region tracker as a non-free region. Hence must
- * not create or update any more GPU mappings on such regions because they will
- * not be unmapped when the region is finally destroyed.
- *
- * Since such regions are still present in the region tracker, new allocations
- * attempted with BASE_MEM_SAME_VA might fail if their address intersects with
- * a region with this flag set.
- *
- * In addition, this flag indicates the gpu_alloc member might no longer valid
- * e.g. in infinite cache simulation.
- */
-#define KBASE_REG_VA_FREED (1ul << 26)
-
-/* If set, the heap info address points to a u32 holding the used size in bytes;
- * otherwise it points to a u64 holding the lowest address of unused memory.
- */
-#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27)
-
-/* Allocation is actively used for JIT memory */
-#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28)
-
-#if MALI_USE_CSF
-/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA
- * memory zones, and it determines whether they were created with a fixed
- * GPU VA address requested by the user.
- */
-#define KBASE_REG_FIXED_ADDRESS (1ul << 29)
-#else
-#define KBASE_REG_RESERVED_BIT_29 (1ul << 29)
-#endif
-
-#define KBASE_REG_ZONE_SAME_VA      KBASE_REG_ZONE(0)
-
-#define KBASE_REG_ZONE_CUSTOM_VA         KBASE_REG_ZONE(1)
-#define KBASE_REG_ZONE_CUSTOM_VA_BASE    (0x100000000ULL >> PAGE_SHIFT)
-
-#if MALI_USE_CSF
-/* only used with 32-bit clients */
-/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43).
- */
-#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \
-		(((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
-#else
-/* only used with 32-bit clients */
-/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the
- * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44
- * bytes, see mmap64 man page for reference).  So we put the default limit to the
- * maximum possible on Linux and shrink it down, if required by the GPU, during
- * initialization.
- */
-#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \
-		(((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
-/* end 32-bit clients only */
-#endif
-
-/* The starting address and size of the GPU-executable zone are dynamic
- * and depend on the platform and the number of pages requested by the
- * user process, with an upper limit of 4 GB.
- */
-#define KBASE_REG_ZONE_EXEC_VA           KBASE_REG_ZONE(2)
-#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */
-
-#if MALI_USE_CSF
-#define KBASE_REG_ZONE_MCU_SHARED      KBASE_REG_ZONE(3)
-#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \
-		KBASE_REG_ZONE_MCU_SHARED_BASE)
-
-/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit
- * clients, and 2^43 for 32-bit clients.
- */
-#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
-
-/* Executable zone supporting FIXED/FIXABLE allocations.
- * It is always 4GB in size.
- */
-
-#define KBASE_REG_ZONE_EXEC_FIXED_VA KBASE_REG_ZONE(4)
-#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
-
-/* Non-executable zone supporting FIXED/FIXABLE allocations.
- * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from
- * (2^43) up to (2^44)-1 for 32-bit userspace clients.
- */
-#define KBASE_REG_ZONE_FIXED_VA KBASE_REG_ZONE(5)
-
-/* Again - 32-bit userspace cannot map addresses beyond 2^44, but 64-bit can - and so
- * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1.
- */
-#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT)
-
-#endif
-
 	unsigned long flags;
 	size_t extension;
 	struct kbase_mem_phy_alloc *cpu_alloc;
@@ -687,20 +695,19 @@ struct kbase_va_region {
 };
 
 /**
- * kbase_is_ctx_reg_zone - determine whether a KBASE_REG_ZONE_<...> is for a
- *                         context or for a device
- * @zone_bits: A KBASE_REG_ZONE_<...> to query
+ * kbase_is_ctx_reg_zone - Determine whether a zone is associated with a
+ *                         context or with the device
+ * @zone: Zone identifier
  *
- * Return: True if the zone for @zone_bits is a context zone, False otherwise
+ * Return: True if @zone is a context zone, False otherwise
  */
-static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits)
+static inline bool kbase_is_ctx_reg_zone(enum kbase_memory_zone zone)
 {
-	WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
-	return (zone_bits == KBASE_REG_ZONE_SAME_VA ||
 #if MALI_USE_CSF
-		zone_bits == KBASE_REG_ZONE_EXEC_FIXED_VA || zone_bits == KBASE_REG_ZONE_FIXED_VA ||
+	return !(zone == MCU_SHARED_ZONE);
+#else
+	return true;
 #endif
-		zone_bits == KBASE_REG_ZONE_CUSTOM_VA || zone_bits == KBASE_REG_ZONE_EXEC_VA);
 }
 
 /* Special marker for failed JIT allocations that still must be marked as
@@ -1359,18 +1366,19 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
 void kbase_region_tracker_term(struct kbase_context *kctx);
 
 /**
- * kbase_region_tracker_term_rbtree - Free memory for a region tracker
+ * kbase_region_tracker_erase_rbtree - Free memory for a region tracker
  *
  * @rbtree: Region tracker tree root
  *
  * This will free all the regions within the region tracker
  */
-void kbase_region_tracker_term_rbtree(struct rb_root *rbtree);
+void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree);
 
 struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
 		struct kbase_context *kctx, u64 gpu_addr);
 struct kbase_va_region *kbase_find_region_enclosing_address(
 		struct rb_root *rbtree, u64 gpu_addr);
+void kbase_region_tracker_insert(struct kbase_va_region *new_reg);
 
 /**
  * kbase_region_tracker_find_region_base_address - Check that a pointer is
@@ -1387,8 +1395,11 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
 struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
 		u64 gpu_addr);
 
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
-						u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn,
+						size_t nr_pages);
+struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx,
+						    enum kbase_memory_zone id, u64 start_pfn,
+						    size_t nr_pages);
 void kbase_free_alloced_region(struct kbase_va_region *reg);
 int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
 		u64 addr, size_t nr_pages, size_t align);
@@ -1866,7 +1877,7 @@ static inline struct kbase_page_metadata *kbase_page_private(struct page *p)
 
 static inline dma_addr_t kbase_dma_addr(struct page *p)
 {
-	if (kbase_page_migration_enabled)
+	if (kbase_is_page_migration_enabled())
 		return kbase_page_private(p)->dma_addr;
 
 	return kbase_dma_addr_as_priv(p);
@@ -1876,8 +1887,9 @@ static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa
 {
 	phys_addr_t pa = as_phys_addr_t(tagged_pa);
 	struct page *page = pfn_to_page(PFN_DOWN(pa));
-	dma_addr_t dma_addr =
-		is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page);
+	dma_addr_t dma_addr = (is_huge(tagged_pa) || is_partial(tagged_pa)) ?
+					    kbase_dma_addr_as_priv(page) :
+					    kbase_dma_addr(page);
 
 	return dma_addr;
 }
@@ -2434,75 +2446,95 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
 		unsigned int *target_page_nr, size_t offset);
 
 /**
- * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone
- * @zone: zone to query
+ * kbase_ctx_reg_zone_get_nolock - Get a zone from @kctx where the caller does
+ *                                 not have @kctx 's region lock
+ * @kctx: Pointer to kbase context
+ * @zone: Zone identifier
  *
- * Return: The end of the zone corresponding to @zone
+ * This should only be used in performance-critical paths where the code is
+ * resilient to a race with the zone changing, and only when the zone is tracked
+ * by the @kctx.
+ *
+ * Return: The zone corresponding to @zone
  */
-static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone)
+static inline struct kbase_reg_zone *kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx,
+								   enum kbase_memory_zone zone)
 {
-	return zone->base_pfn + zone->va_size_pages;
+	WARN_ON(!kbase_is_ctx_reg_zone(zone));
+	return &kctx->reg_zone[zone];
 }
 
 /**
- * kbase_ctx_reg_zone_init - initialize a zone in @kctx
+ * kbase_ctx_reg_zone_get - Get a memory zone from @kctx
  * @kctx: Pointer to kbase context
- * @zone_bits: A KBASE_REG_ZONE_<...> to initialize
+ * @zone: Zone identifier
+ *
+ * Note that the zone is not refcounted, so there is no corresponding operation to
+ * put the zone back.
+ *
+ * Return: The zone corresponding to @zone
+ */
+static inline struct kbase_reg_zone *kbase_ctx_reg_zone_get(struct kbase_context *kctx,
+							    enum kbase_memory_zone zone)
+{
+	lockdep_assert_held(&kctx->reg_lock);
+	return kbase_ctx_reg_zone_get_nolock(kctx, zone);
+}
+
+/**
+ * kbase_reg_zone_init - Initialize a zone in @kctx
+ * @kbdev: Pointer to kbase device in order to initialize the VA region cache
+ * @zone: Memory zone
+ * @id: Memory zone identifier to facilitate lookups
  * @base_pfn: Page Frame Number in GPU virtual address space for the start of
  *            the Zone
  * @va_size_pages: Size of the Zone in pages
+ *
+ * Return:
+ * * 0 on success
+ * * -ENOMEM on error
  */
-static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx,
-					   unsigned long zone_bits,
-					   u64 base_pfn, u64 va_size_pages)
+static inline int kbase_reg_zone_init(struct kbase_device *kbdev, struct kbase_reg_zone *zone,
+				      enum kbase_memory_zone id, u64 base_pfn, u64 va_size_pages)
 {
-	struct kbase_reg_zone *zone;
+	struct kbase_va_region *reg;
 
-	lockdep_assert_held(&kctx->reg_lock);
-	WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
+	*zone = (struct kbase_reg_zone){ .reg_rbtree = RB_ROOT,
+					 .base_pfn = base_pfn,
+					 .va_size_pages = va_size_pages,
+					 .id = id,
+					 .cache = kbdev->va_region_slab };
+
+	if (unlikely(!va_size_pages))
+		return 0;
+
+	reg = kbase_alloc_free_region(zone, base_pfn, va_size_pages);
+	if (unlikely(!reg))
+		return -ENOMEM;
+
+	kbase_region_tracker_insert(reg);
 
-	zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
-	*zone = (struct kbase_reg_zone){
-		.base_pfn = base_pfn, .va_size_pages = va_size_pages,
-	};
+	return 0;
 }
 
 /**
- * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does
- *                                 not have @kctx 's region lock
- * @kctx: Pointer to kbase context
- * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve
- *
- * This should only be used in performance-critical paths where the code is
- * resilient to a race with the zone changing.
+ * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone
+ * @zone: zone to query
  *
- * Return: The zone corresponding to @zone_bits
+ * Return: The end of the zone corresponding to @zone
  */
-static inline struct kbase_reg_zone *
-kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx,
-			      unsigned long zone_bits)
+static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone)
 {
-	WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
-
-	return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
+	return zone->base_pfn + zone->va_size_pages;
 }
 
 /**
- * kbase_ctx_reg_zone_get - get a zone from @kctx
- * @kctx: Pointer to kbase context
- * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve
- *
- * The get is not refcounted - there is no corresponding 'put' operation
- *
- * Return: The zone corresponding to @zone_bits
+ * kbase_reg_zone_term - Terminate the memory zone tracker
+ * @zone: Memory zone
  */
-static inline struct kbase_reg_zone *
-kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
+static inline void kbase_reg_zone_term(struct kbase_reg_zone *zone)
 {
-	lockdep_assert_held(&kctx->reg_lock);
-	WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
-
-	return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
+	kbase_region_tracker_erase_rbtree(&zone->reg_rbtree);
 }
 
 /**
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index e8df130..1af833d 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -287,9 +287,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 					u64 extension, u64 *flags, u64 *gpu_va,
 					enum kbase_caller_mmu_sync_info mmu_sync_info)
 {
-	int zone;
 	struct kbase_va_region *reg;
-	struct rb_root *rbtree;
+	enum kbase_memory_zone zone;
 	struct device *dev;
 
 	KBASE_DEBUG_ASSERT(kctx);
@@ -359,31 +358,25 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
 #endif
 
 	/* find out which VA zone to use */
-	if (*flags & BASE_MEM_SAME_VA) {
-		rbtree = &kctx->reg_rbtree_same;
-		zone = KBASE_REG_ZONE_SAME_VA;
-	}
+	if (*flags & BASE_MEM_SAME_VA)
+		zone = SAME_VA_ZONE;
 #if MALI_USE_CSF
 	/* fixed va_zone always exists */
 	else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) {
 		if (*flags & BASE_MEM_PROT_GPU_EX) {
-			rbtree = &kctx->reg_rbtree_exec_fixed;
-			zone = KBASE_REG_ZONE_EXEC_FIXED_VA;
+			zone = EXEC_FIXED_VA_ZONE;
 		} else {
-			rbtree = &kctx->reg_rbtree_fixed;
-			zone = KBASE_REG_ZONE_FIXED_VA;
+			zone = FIXED_VA_ZONE;
 		}
 	}
 #endif
 	else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
-		rbtree = &kctx->reg_rbtree_exec;
-		zone = KBASE_REG_ZONE_EXEC_VA;
+		zone = EXEC_VA_ZONE;
 	} else {
-		rbtree = &kctx->reg_rbtree_custom;
-		zone = KBASE_REG_ZONE_CUSTOM_VA;
+		zone = CUSTOM_VA_ZONE;
 	}
 
-	reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone);
+	reg = kbase_ctx_alloc_free_region(kctx, zone, PFN_DOWN(*gpu_va), va_pages);
 
 	if (!reg) {
 		dev_err(dev, "Failed to allocate free region");
@@ -634,8 +627,8 @@ int kbase_mem_query(struct kbase_context *kctx,
 #if MALI_USE_CSF
 		if (KBASE_REG_CSF_EVENT & reg->flags)
 			*out |= BASE_MEM_CSF_EVENT;
-		if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) ||
-		    ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+		if ((kbase_bits_to_zone(reg->flags) == FIXED_VA_ZONE) ||
+		    (kbase_bits_to_zone(reg->flags) == EXEC_FIXED_VA_ZONE)) {
 			if (KBASE_REG_FIXED_ADDRESS & reg->flags)
 				*out |= BASE_MEM_FIXED;
 			else
@@ -680,9 +673,6 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
 	int evict_nents = atomic_read(&kctx->evict_nents);
 	unsigned long nr_freeable_items;
 
-	WARN((sc->gfp_mask & __GFP_ATOMIC),
-	     "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n",
-	     sc->gfp_mask);
 	WARN(in_atomic(),
 	     "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n",
 	     sc->gfp_mask);
@@ -875,7 +865,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
 
 	/* Indicate to page migration that the memory can be reclaimed by the shrinker.
 	 */
-	if (kbase_page_migration_enabled)
+	if (kbase_is_page_migration_enabled())
 		kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE);
 
 	mutex_unlock(&kctx->jit_evict_lock);
@@ -936,7 +926,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
 			 * in which a physical allocation could transition to NOT_MOVABLE
 			 * from.
 			 */
-			if (kbase_page_migration_enabled)
+			if (kbase_is_page_migration_enabled())
 				kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED);
 		}
 	}
@@ -1316,11 +1306,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 		gwt_mask = ~KBASE_REG_GPU_WR;
 #endif
 
-	err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					      kbase_get_gpu_phy_pages(reg),
-					      kbase_reg_current_backed_size(reg),
-					      reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
-					      mmu_sync_info, NULL);
+	err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+							kbase_get_gpu_phy_pages(reg),
+							kbase_reg_current_backed_size(reg),
+							reg->flags & gwt_mask, kctx->as_nr,
+							alloc->group_id, mmu_sync_info, NULL);
 	if (err)
 		goto bad_insert;
 
@@ -1345,8 +1335,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
 	return 0;
 
 bad_pad_insert:
-	kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
-				 alloc->nents, alloc->nents, kctx->as_nr, true);
+	kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+					  alloc->nents, alloc->nents, kctx->as_nr);
 bad_insert:
 	kbase_mem_umm_unmap_attachment(kctx, alloc);
 bad_map_attachment:
@@ -1374,9 +1364,9 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
 	if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) {
 		int err;
 
-		err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
-					       alloc->pages, reg->nr_pages, reg->nr_pages,
-					       kctx->as_nr, true);
+		err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+							alloc->pages, reg->nr_pages, reg->nr_pages,
+							kctx->as_nr);
 		WARN_ON(err);
 	}
 
@@ -1423,6 +1413,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 	struct kbase_va_region *reg;
 	struct dma_buf *dma_buf;
 	struct dma_buf_attachment *dma_attachment;
+	enum kbase_memory_zone zone;
 	bool shared_zone = false;
 	bool need_sync = false;
 	int group_id;
@@ -1481,12 +1472,11 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
 
 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages,
-					      KBASE_REG_ZONE_SAME_VA);
-	} else {
-		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages,
-					      KBASE_REG_ZONE_CUSTOM_VA);
-	}
+		zone = SAME_VA_ZONE;
+	} else
+		zone = CUSTOM_VA_ZONE;
+
+	reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages);
 
 	if (!reg) {
 		dma_buf_detach(dma_buf, dma_attachment);
@@ -1572,9 +1562,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 {
 	long i, dma_mapped_pages;
 	struct kbase_va_region *reg;
-	struct rb_root *rbtree;
 	long faulted_pages;
-	int zone = KBASE_REG_ZONE_CUSTOM_VA;
+	enum kbase_memory_zone zone = CUSTOM_VA_ZONE;
 	bool shared_zone = false;
 	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
 	struct kbase_alloc_import_user_buf *user_buf;
@@ -1582,6 +1571,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	struct tagged_addr *pa;
 	struct device *dev;
 	int write;
+	enum dma_data_direction dma_dir;
 
 	/* Flag supported only for dma-buf imported memory */
 	if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
@@ -1637,13 +1627,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 
 	if (shared_zone) {
 		*flags |= BASE_MEM_NEED_MMAP;
-		zone = KBASE_REG_ZONE_SAME_VA;
-		rbtree = &kctx->reg_rbtree_same;
-	} else
-		rbtree = &kctx->reg_rbtree_custom;
-
-	reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone);
+		zone = SAME_VA_ZONE;
+	}
 
+	reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages);
 	if (!reg)
 		goto no_region;
 
@@ -1693,6 +1680,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	down_read(kbase_mem_get_process_mmap_lock());
 
 	write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+	dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
 #if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
 	faulted_pages = get_user_pages(address, *va_pages,
@@ -1751,10 +1739,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 		for (i = 0; i < faulted_pages; i++) {
 			dma_addr_t dma_addr;
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-			dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir);
 #else
-			dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
-						      DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+			dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir,
+						      DMA_ATTR_SKIP_CPU_SYNC);
 #endif
 			if (dma_mapping_error(dev, dma_addr))
 				goto unwind_dma_map;
@@ -1762,7 +1750,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 			user_buf->dma_addrs[i] = dma_addr;
 			pa[i] = as_tagged(page_to_phys(pages[i]));
 
-			dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
 		}
 
 		reg->gpu_alloc->nents = faulted_pages;
@@ -1781,12 +1769,11 @@ unwind_dma_map:
 	for (i = 0; i < dma_mapped_pages; i++) {
 		dma_addr_t dma_addr = user_buf->dma_addrs[i];
 
-		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
 #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
-		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir);
 #else
-		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
-				     DMA_ATTR_SKIP_CPU_SYNC);
+		dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
 #endif
 	}
 fault_mismatch:
@@ -1819,6 +1806,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 	size_t i;
 	bool coherent;
 	uint64_t max_stride;
+	enum kbase_memory_zone zone;
 
 	/* Calls to this function are inherently asynchronous, with respect to
 	 * MMU operations.
@@ -1870,13 +1858,12 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
 		/* 64-bit tasks must MMAP anyway, but not expose this address to
 		 * clients
 		 */
+		zone = SAME_VA_ZONE;
 		*flags |= BASE_MEM_NEED_MMAP;
-		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages,
-					      KBASE_REG_ZONE_SAME_VA);
-	} else {
-		reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages,
-					      KBASE_REG_ZONE_CUSTOM_VA);
-	}
+	} else
+		zone = CUSTOM_VA_ZONE;
+
+	reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *num_pages);
 
 	if (!reg)
 		goto no_reg;
@@ -2200,7 +2187,7 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
 	phy_pages = kbase_get_gpu_phy_pages(reg);
 	ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
 				     phy_pages + old_pages, delta, reg->flags, kctx->as_nr,
-				     reg->gpu_alloc->group_id, mmu_sync_info, reg, false);
+				     reg->gpu_alloc->group_id, mmu_sync_info, reg);
 
 	return ret;
 }
@@ -2215,7 +2202,7 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
 		/* Nothing to do */
 		return;
 
-	unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+	unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping,
 			(gpu_va_start + new_pages)<<PAGE_SHIFT,
 			(old_pages - new_pages)<<PAGE_SHIFT, 1);
 }
@@ -2229,7 +2216,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
 	int ret = 0;
 
 	ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
-				       alloc->pages + new_pages, delta, delta, kctx->as_nr, false);
+				       alloc->pages + new_pages, delta, delta, kctx->as_nr);
 
 	return ret;
 }
@@ -2388,6 +2375,21 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
 		return -EINVAL;
 
 	delta = old_pages - new_pages;
+	if (kctx->kbdev->pagesize_2mb) {
+		struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages;
+
+		/* Move the end of new committed range to a valid location.
+		 * This mirrors the adjustment done inside kbase_free_phy_pages_helper().
+		 */
+		while (delta && is_huge(*start_free) && !is_huge_head(*start_free)) {
+			start_free++;
+			new_pages++;
+			delta--;
+		}
+
+		if (!delta)
+			return 0;
+	}
 
 	/* Update the GPU mapping */
 	err = kbase_mem_shrink_gpu_mapping(kctx, reg,
@@ -2400,19 +2402,6 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
 		kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
 		if (reg->cpu_alloc != reg->gpu_alloc)
 			kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
-
-		if (kctx->kbdev->pagesize_2mb) {
-			if (kbase_reg_current_backed_size(reg) > new_pages) {
-				old_pages = new_pages;
-				new_pages = kbase_reg_current_backed_size(reg);
-
-				/* Update GPU mapping. */
-				err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages,
-								 CALLER_MMU_ASYNC);
-			}
-		} else {
-			WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
-		}
 	}
 
 	return err;
@@ -2446,8 +2435,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
 	kbase_gpu_vm_lock(map->kctx);
 
 	if (map->free_on_close) {
-		KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
-				KBASE_REG_ZONE_SAME_VA);
+		KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE);
 		/* Avoid freeing memory on the process death which results in
 		 * GPU Page Fault. Memory will be freed in kbase_destroy_context
 		 */
@@ -2461,6 +2449,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
 	kbase_gpu_vm_unlock(map->kctx);
 
 	kbase_mem_phy_alloc_put(map->alloc);
+	kbase_file_dec_cpu_mapping_count(map->kctx->kfile);
 	kfree(map);
 }
 
@@ -2519,9 +2508,17 @@ static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf)
 	KBASE_DEBUG_ASSERT(map->kctx);
 	KBASE_DEBUG_ASSERT(map->alloc);
 
+	kbase_gpu_vm_lock(map->kctx);
+
+	/* Reject faults for SAME_VA mapping of UMM allocations */
+	if ((map->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) && map->free_on_close) {
+		dev_warn(map->kctx->kbdev->dev, "Invalid CPU access to UMM memory for ctx %d_%d",
+			 map->kctx->tgid, map->kctx->id);
+		goto exit;
+	}
+
 	map_start_pgoff = vma->vm_pgoff - map->region->start_pfn;
 
-	kbase_gpu_vm_lock(map->kctx);
 	if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
 		struct kbase_aliased *aliased =
 		      get_aliased_alloc(vma, map->region, &map_start_pgoff, 1);
@@ -2608,7 +2605,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
 	 * See MIDBASE-1057
 	 */
 
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+	vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO);
 	vma->vm_ops = &kbase_vm_ops;
 	vma->vm_private_data = map;
 
@@ -2636,12 +2633,12 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	}
 
-	if (!kaddr) {
-		vma->vm_flags |= VM_PFNMAP;
-	} else {
+	if (!kaddr)
+		vm_flags_set(vma, VM_PFNMAP);
+	else {
 		WARN_ON(aligned_offset);
 		/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
-		vma->vm_flags |= VM_MIXEDMAP;
+		vm_flags_set(vma, VM_MIXEDMAP);
 		/* vmalloc remaping is easy... */
 		err = remap_vmalloc_range(vma, kaddr, 0);
 		WARN_ON(err);
@@ -2662,6 +2659,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
 		map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
 
 	list_add(&map->mappings_list, &map->alloc->mappings);
+	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 
  out:
 	return err;
@@ -2710,8 +2708,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
 		goto out;
 	}
 
-	new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages,
-					  KBASE_REG_ZONE_SAME_VA);
+	new_reg = kbase_ctx_alloc_free_region(kctx, SAME_VA_ZONE, 0, nr_pages);
 	if (!new_reg) {
 		err = -ENOMEM;
 		WARN_ON(1);
@@ -2855,9 +2852,9 @@ int kbase_context_mmap(struct kbase_context *const kctx,
 	dev_dbg(dev, "kbase_mmap\n");
 
 	if (!(vma->vm_flags & VM_READ))
-		vma->vm_flags &= ~VM_MAYREAD;
+		vm_flags_clear(vma, VM_MAYREAD);
 	if (!(vma->vm_flags & VM_WRITE))
-		vma->vm_flags &= ~VM_MAYWRITE;
+		vm_flags_clear(vma, VM_MAYWRITE);
 
 	if (nr_pages == 0) {
 		err = -EINVAL;
@@ -3070,6 +3067,9 @@ static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pag
 {
 	size_t i;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return;
+
 	for (i = 0; i < page_count; i++) {
 		struct page *p = as_page(pages[i]);
 		struct kbase_page_metadata *page_md = kbase_page_private(p);
@@ -3119,6 +3119,9 @@ static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *p
 {
 	size_t i;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return;
+
 	for (i = 0; i < page_count; i++) {
 		struct page *p = as_page(pages[i]);
 		struct kbase_page_metadata *page_md = kbase_page_private(p);
@@ -3219,7 +3222,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
 	 * of all physical pages. In case of errors, e.g. too many mappings,
 	 * make the page not movable to prevent trouble.
 	 */
-	if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type))
+	if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(reg->gpu_alloc->type))
 		kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags);
 
 	kfree(pages);
@@ -3331,7 +3334,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
 	 * for all physical pages. Now is a good time to do it because references
 	 * haven't been released yet.
 	 */
-	if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) {
+	if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(map->gpu_alloc->type)) {
 		const size_t page_count = PFN_UP(map->offset_in_page + map->size);
 		struct tagged_addr *pages_array = map->cpu_pages;
 
@@ -3367,11 +3370,14 @@ KBASE_EXPORT_TEST_API(kbase_vunmap);
 
 static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
 {
-#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
-	/* To avoid the build breakage due to an unexported kernel symbol
-	 * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards,
-	 * we inline here the equivalent of 'add_mm_counter()' from linux
-	 * kernel V5.4.0~8.
+#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE)
+	/* To avoid the build breakage due to the type change in rss_stat,
+	 * we inline here the equivalent of 'add_mm_counter()' from linux kernel V6.2.
+	 */
+	percpu_counter_add(&mm->rss_stat[member], value);
+#elif (KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE)
+	/* To avoid the build breakage due to an unexported kernel symbol 'mm_trace_rss_stat',
+	 * we inline here the equivalent of 'add_mm_counter()' from linux kernel V5.5.
 	 */
 	atomic_long_add(value, &mm->rss_stat.count[member]);
 #else
@@ -3396,15 +3402,37 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
 #endif
 }
 
+static void kbase_special_vm_open(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = vma->vm_private_data;
+
+	kbase_file_inc_cpu_mapping_count(kctx->kfile);
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+	struct kbase_context *kctx = vma->vm_private_data;
+
+	kbase_file_dec_cpu_mapping_count(kctx->kfile);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+	.open = kbase_special_vm_open,
+	.close = kbase_special_vm_close,
+};
+
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
 {
 	if (vma_pages(vma) != 1)
 		return -EINVAL;
 
 	/* no real access */
-	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+	vm_flags_clear(vma, VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+	vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO);
+	vma->vm_ops = &kbase_vm_special_ops;
+	vma->vm_private_data = kctx;
 
+	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 	return 0;
 }
 
@@ -3459,6 +3487,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
 	struct kbase_device *kbdev;
 	int err;
 	bool reset_prevented = false;
+	struct kbase_file *kfile;
 
 	if (!queue) {
 		pr_debug("Close method called for the new User IO pages mapping vma\n");
@@ -3467,6 +3496,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
 
 	kctx = queue->kctx;
 	kbdev = kctx->kbdev;
+	kfile = kctx->kfile;
 
 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
 	if (err)
@@ -3484,8 +3514,9 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
 	if (reset_prevented)
 		kbase_reset_gpu_allow(kbdev);
 
+	kbase_file_dec_cpu_mapping_count(kfile);
 	/* Now as the vma is closed, drop the reference on mali device file */
-	fput(kctx->filp);
+	fput(kfile->filp);
 }
 
 #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
@@ -3618,13 +3649,13 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
 	if (err)
 		goto map_failed;
 
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+	vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO);
 	/* TODO use VM_MIXEDMAP, since it is more appropriate as both types of
 	 * memory with and without "struct page" backing are being inserted here.
 	 * Hw Doorbell pages comes from the device register area so kernel does
 	 * not use "struct page" for them.
 	 */
-	vma->vm_flags |= VM_PFNMAP;
+	vm_flags_set(vma, VM_PFNMAP);
 
 	vma->vm_ops = &kbase_csf_user_io_pages_vm_ops;
 	vma->vm_private_data = queue;
@@ -3638,6 +3669,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
 	/* Also adjust the vm_pgoff */
 	vma->vm_pgoff = queue->db_file_offset;
 
+	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 	return 0;
 
 map_failed:
@@ -3677,6 +3709,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 {
 	struct kbase_context *kctx = vma->vm_private_data;
 	struct kbase_device *kbdev;
+	struct kbase_file *kfile;
 
 	if (unlikely(!kctx)) {
 		pr_debug("Close function called for the unexpected mapping");
@@ -3684,6 +3717,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 	}
 
 	kbdev = kctx->kbdev;
+	kfile = kctx->kfile;
 
 	if (unlikely(!kctx->csf.user_reg.vma))
 		dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
@@ -3695,8 +3729,9 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
 
 	kctx->csf.user_reg.vma = NULL;
 
+	kbase_file_dec_cpu_mapping_count(kfile);
 	/* Now as the VMA is closed, drop the reference on mali device file */
-	fput(kctx->filp);
+	fput(kfile->filp);
 }
 
 /**
@@ -3801,12 +3836,12 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 	/* Map uncached */
 	vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
 
-	vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+	vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO);
 
 	/* User register page comes from the device register area so
 	 * "struct page" isn't available for it.
 	 */
-	vma->vm_flags |= VM_PFNMAP;
+	vm_flags_set(vma, VM_PFNMAP);
 
 	kctx->csf.user_reg.vma = vma;
 
@@ -3826,6 +3861,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
 	vma->vm_ops = &kbase_csf_user_reg_vm_ops;
 	vma->vm_private_data = kctx;
 
+	kbase_file_inc_cpu_mapping_count(kctx->kfile);
 	return 0;
 }
 
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
index 1dc76d0..f2014f6 100644
--- a/mali_kbase/mali_kbase_mem_migrate.c
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -32,10 +32,33 @@
  * provided and if page migration feature is enabled.
  * Feature is disabled on all platforms by default.
  */
-int kbase_page_migration_enabled;
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+/* If page migration support is explicitly compiled out, there should be no way to change
+ * this int. Its value is automatically 0 as a global.
+ */
+const int kbase_page_migration_enabled;
+/* module_param is not called so this value cannot be changed at insmod when compiled
+ * without support for page migration.
+ */
+#else
+/* -1 as default, 0 when manually set as off and 1 when manually set as on */
+int kbase_page_migration_enabled = -1;
 module_param(kbase_page_migration_enabled, int, 0444);
+MODULE_PARM_DESC(kbase_page_migration_enabled,
+		 "Explicitly enable or disable page migration with 1 or 0 respectively.");
+#endif /* !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) */
+
 KBASE_EXPORT_TEST_API(kbase_page_migration_enabled);
 
+bool kbase_is_page_migration_enabled(void)
+{
+	/* Handle uninitialised int case */
+	if (kbase_page_migration_enabled < 0)
+		return false;
+	return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled;
+}
+KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled);
+
 #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
 static const struct movable_operations movable_ops;
 #endif
@@ -43,9 +66,15 @@ static const struct movable_operations movable_ops;
 bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
 			       u8 group_id)
 {
-	struct kbase_page_metadata *page_md =
-		kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
+	struct kbase_page_metadata *page_md;
+
+	/* A check for kbase_page_migration_enabled would help here too but it's already being
+	 * checked in the only caller of this function.
+	 */
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return false;
 
+	page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
 	if (!page_md)
 		return false;
 
@@ -95,6 +124,8 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p,
 	struct kbase_page_metadata *page_md;
 	dma_addr_t dma_addr;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return;
 	page_md = kbase_page_private(p);
 	if (!page_md)
 		return;
@@ -109,6 +140,10 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p,
 	ClearPagePrivate(p);
 }
 
+#if IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+/* This function is only called when page migration
+ * support is not explicitly compiled out.
+ */
 static void kbase_free_pages_worker(struct work_struct *work)
 {
 	struct kbase_mem_migrate *mem_migrate =
@@ -121,14 +156,13 @@ static void kbase_free_pages_worker(struct work_struct *work)
 	spin_lock(&mem_migrate->free_pages_lock);
 	list_splice_init(&mem_migrate->free_pages_list, &free_list);
 	spin_unlock(&mem_migrate->free_pages_lock);
-
 	list_for_each_entry_safe(p, tmp, &free_list, lru) {
 		u8 group_id = 0;
 		list_del_init(&p->lru);
 
 		lock_page(p);
 		page_md = kbase_page_private(p);
-		if (IS_PAGE_MOVABLE(page_md->status)) {
+		if (page_md && IS_PAGE_MOVABLE(page_md->status)) {
 			__ClearPageMovable(p);
 			page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
 		}
@@ -138,11 +172,14 @@ static void kbase_free_pages_worker(struct work_struct *work)
 		kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0);
 	}
 }
+#endif
 
 void kbase_free_page_later(struct kbase_device *kbdev, struct page *p)
 {
 	struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return;
 	spin_lock(&mem_migrate->free_pages_lock);
 	list_add(&p->lru, &mem_migrate->free_pages_list);
 	spin_unlock(&mem_migrate->free_pages_lock);
@@ -161,6 +198,9 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p)
  * the movable property. The meta data attached to the PGD page is transferred to the
  * new (replacement) page.
  *
+ * This function returns early with an error if called when not compiled with
+ * CONFIG_PAGE_MIGRATION_SUPPORT.
+ *
  * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure
  *          and the migration is aborted.
  */
@@ -173,6 +213,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
 	dma_addr_t new_dma_addr;
 	int ret;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return -EINVAL;
+
 	/* Create a new dma map for the new page */
 	new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(kbdev->dev, new_dma_addr))
@@ -227,6 +270,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
  * allocation, which is used to create CPU mappings. Before returning, the new
  * page shall be set as movable and not isolated, while the old page shall lose
  * the movable property.
+ *
+ * This function returns early with an error if called when not compiled with
+ * CONFIG_PAGE_MIGRATION_SUPPORT.
  */
 static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page)
 {
@@ -235,6 +281,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
 	dma_addr_t old_dma_addr, new_dma_addr;
 	int ret;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return -EINVAL;
 	old_dma_addr = page_md->dma_addr;
 	new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr))
@@ -246,7 +294,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
 	kbase_gpu_vm_lock(kctx);
 
 	/* Unmap the old physical range. */
-	unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT,
+	unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping,
+			    page_md->data.mapped.vpfn << PAGE_SHIFT,
 			    PAGE_SIZE, 1);
 
 	ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)),
@@ -290,6 +339,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
  * @mode: LRU Isolation modes.
  *
  * Callback function for Linux to isolate a page and prepare it for migration.
+ * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT.
  *
  * Return: true on success, false otherwise.
  */
@@ -299,6 +349,8 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
 	struct kbase_mem_pool *mem_pool = NULL;
 	struct kbase_page_metadata *page_md = kbase_page_private(p);
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return false;
 	CSTD_UNUSED(mode);
 
 	if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
@@ -390,6 +442,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
  *
  * Callback function for Linux to migrate the content of the old page to the
  * new page provided.
+ * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT.
  *
  * Return: 0 on success, error code otherwise.
  */
@@ -415,7 +468,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
 #endif
 	CSTD_UNUSED(mode);
 
-	if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
+	if (!kbase_is_page_migration_enabled() || !page_md || !IS_PAGE_MOVABLE(page_md->status))
 		return -EINVAL;
 
 	if (!spin_trylock(&page_md->migrate_lock))
@@ -500,6 +553,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
  * will only be called for a page that has been isolated but failed to
  * migrate. This function will put back the given page to the state it was
  * in before it was isolated.
+ * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT.
  */
 static void kbase_page_putback(struct page *p)
 {
@@ -509,6 +563,8 @@ static void kbase_page_putback(struct page *p)
 	struct kbase_page_metadata *page_md = kbase_page_private(p);
 	struct kbase_device *kbdev = NULL;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return;
 	/* If we don't have page metadata, the page may not belong to the
 	 * driver or may already have been freed, and there's nothing we can do
 	 */
@@ -585,6 +641,9 @@ static const struct address_space_operations kbase_address_space_ops = {
 #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
 void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp)
 {
+	if (!kbase_is_page_migration_enabled())
+		return;
+
 	mutex_lock(&kbdev->fw_load_lock);
 
 	if (filp) {
@@ -607,10 +666,23 @@ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct
 
 void kbase_mem_migrate_init(struct kbase_device *kbdev)
 {
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+	/* Page migration explicitly disabled at compile time - do nothing */
+	return;
+#else
 	struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
 
+	/* Page migration support compiled in, either explicitly or
+	 * by default, so the default behaviour is to follow the choice
+	 * of large pages if not selected at insmod. Check insmod parameter
+	 * integer for a negative value to see if insmod parameter was
+	 * passed in at all (it will override the default negative value).
+	 */
 	if (kbase_page_migration_enabled < 0)
-		kbase_page_migration_enabled = 0;
+		kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0;
+	else
+		dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.",
+			 kbase_page_migration_enabled ? "enabled" : "disabled");
 
 	spin_lock_init(&mem_migrate->free_pages_lock);
 	INIT_LIST_HEAD(&mem_migrate->free_pages_list);
@@ -621,12 +693,17 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev)
 	mem_migrate->free_pages_workq =
 		alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
 	INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker);
+#endif
 }
 
 void kbase_mem_migrate_term(struct kbase_device *kbdev)
 {
 	struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
 
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+	/* Page migration explicitly disabled at compile time - do nothing */
+	return;
+#endif
 	if (mem_migrate->free_pages_workq)
 		destroy_workqueue(mem_migrate->free_pages_workq);
 #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h
index 76bbc99..e9f3fc4 100644
--- a/mali_kbase/mali_kbase_mem_migrate.h
+++ b/mali_kbase/mali_kbase_mem_migrate.h
@@ -18,6 +18,8 @@
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
  */
+#ifndef _KBASE_MEM_MIGRATE_H
+#define _KBASE_MEM_MIGRATE_H
 
 /**
  * DOC: Base kernel page migration implementation.
@@ -43,7 +45,11 @@
 /* Global integer used to determine if module parameter value has been
  * provided and if page migration feature is enabled.
  */
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+extern const int kbase_page_migration_enabled;
+#else
 extern int kbase_page_migration_enabled;
+#endif
 
 /**
  * kbase_alloc_page_metadata - Allocate and initialize page metadata
@@ -63,6 +69,8 @@ extern int kbase_page_migration_enabled;
 bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
 			       u8 group_id);
 
+bool kbase_is_page_migration_enabled(void);
+
 /**
  * kbase_free_page_later - Defer freeing of given page.
  * @kbdev:  Pointer to kbase device
@@ -106,3 +114,5 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev);
  * and destroy workqueue associated.
  */
 void kbase_mem_migrate_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_migrate_H */
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index 58716be..d942ff5 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -141,17 +141,21 @@ static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page
 	 * Only update page status and add the page to the memory pool if
 	 * it is not isolated.
 	 */
-	spin_lock(&page_md->migrate_lock);
-	if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) {
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
 		not_movable = true;
-	} else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) {
-		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL);
-		page_md->data.mem_pool.pool = pool;
-		page_md->data.mem_pool.kbdev = pool->kbdev;
-		list_add(&p->lru, page_list);
-		(*list_size)++;
+	else {
+		spin_lock(&page_md->migrate_lock);
+		if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) {
+			not_movable = true;
+		} else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) {
+			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL);
+			page_md->data.mem_pool.pool = pool;
+			page_md->data.mem_pool.kbdev = pool->kbdev;
+			list_add(&p->lru, page_list);
+			(*list_size)++;
+		}
+		spin_unlock(&page_md->migrate_lock);
 	}
-	spin_unlock(&page_md->migrate_lock);
 
 	if (not_movable) {
 		kbase_free_page_later(pool->kbdev, p);
@@ -173,7 +177,7 @@ static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
 
 	lockdep_assert_held(&pool->pool_lock);
 
-	if (!pool->order && kbase_page_migration_enabled) {
+	if (!pool->order && kbase_is_page_migration_enabled()) {
 		if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
 			queue_work_to_free = true;
 	} else {
@@ -204,7 +208,7 @@ static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
 
 	lockdep_assert_held(&pool->pool_lock);
 
-	if (!pool->order && kbase_page_migration_enabled) {
+	if (!pool->order && kbase_is_page_migration_enabled()) {
 		struct page *p, *tmp;
 
 		list_for_each_entry_safe(p, tmp, page_list, lru) {
@@ -246,7 +250,7 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool,
 
 	p = list_first_entry(&pool->page_list, struct page, lru);
 
-	if (!pool->order && kbase_page_migration_enabled) {
+	if (!pool->order && kbase_is_page_migration_enabled()) {
 		struct kbase_page_metadata *page_md = kbase_page_private(p);
 
 		spin_lock(&page_md->migrate_lock);
@@ -322,7 +326,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
 	if (pool->order)
 		gfp |= GFP_HIGHUSER | __GFP_NOWARN;
 	else
-		gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
+		gfp |= kbase_is_page_migration_enabled() ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
 
 	p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev,
 		pool->group_id, gfp, pool->order);
@@ -339,7 +343,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
 	}
 
 	/* Setup page metadata for 4KB pages when page migration is enabled */
-	if (!pool->order && kbase_page_migration_enabled) {
+	if (!pool->order && kbase_is_page_migration_enabled()) {
 		INIT_LIST_HEAD(&p->lru);
 		if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) {
 			dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -360,7 +364,7 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool)
 {
 	struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
 
-	if (!pool->order && kbase_page_migration_enabled)
+	if (!pool->order && kbase_is_page_migration_enabled())
 		queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
 }
 
@@ -375,7 +379,7 @@ void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p)
 
 	kbdev = pool->kbdev;
 
-	if (!pool->order && kbase_page_migration_enabled) {
+	if (!pool->order && kbase_is_page_migration_enabled()) {
 		kbase_free_page_later(kbdev, p);
 		pool_dbg(pool, "page to be freed to kernel later\n");
 	} else {
@@ -677,9 +681,10 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
 	/* Before returning wait to make sure there are no pages undergoing page isolation
 	 * which will require reference to this pool.
 	 */
-	while (atomic_read(&pool->isolation_in_progress_cnt))
-		cpu_relax();
-
+	if (kbase_is_page_migration_enabled()) {
+		while (atomic_read(&pool->isolation_in_progress_cnt))
+			cpu_relax();
+	}
 	pool_dbg(pool, "terminated\n");
 }
 KBASE_EXPORT_TEST_API(kbase_mem_pool_term);
diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c
index b65f9e7..b446bd5 100644
--- a/mali_kbase/mali_kbase_pbha.c
+++ b/mali_kbase/mali_kbase_pbha.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,10 @@
 
 #include <device/mali_kbase_device.h>
 #include <mali_kbase.h>
+
+#if MALI_USE_CSF
 #define DTB_SET_SIZE 2
+#endif
 
 static bool read_setting_valid(unsigned int id, unsigned int read_setting)
 {
@@ -209,6 +212,7 @@ void kbase_pbha_write_settings(struct kbase_device *kbdev)
 	}
 }
 
+#if MALI_USE_CSF
 static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
 						    const struct device_node *pbha_node)
 {
@@ -216,17 +220,28 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
 	int sz, i;
 	bool valid = true;
 
-	sz = of_property_count_elems_of_size(pbha_node, "int_id_override",
-					     sizeof(u32));
+	sz = of_property_count_elems_of_size(pbha_node, "int-id-override", sizeof(u32));
+
+	if (sz == -EINVAL) {
+		/* There is no int-id-override field. Fallback to int_id_override instead */
+		sz = of_property_count_elems_of_size(pbha_node, "int_id_override", sizeof(u32));
+	}
+	if (sz == -EINVAL) {
+		/* There is no int_id_override field. This is valid - but there's nothing further
+		 * to do here.
+		 */
+		return 0;
+	}
 	if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) {
 		dev_err(kbdev->dev, "Bad DTB format: pbha.int_id_override\n");
 		return -EINVAL;
 	}
-	if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data,
-				       sz) != 0) {
-		dev_err(kbdev->dev,
-			"Failed to read DTB pbha.int_id_override\n");
-		return -EINVAL;
+	if (of_property_read_u32_array(pbha_node, "int-id-override", dtb_data, sz) != 0) {
+		/* There may be no int-id-override field. Fallback to int_id_override instead */
+		if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, sz) != 0) {
+			dev_err(kbdev->dev, "Failed to read DTB pbha.int_id_override\n");
+			return -EINVAL;
+		}
 	}
 
 	for (i = 0; valid && i < sz; i = i + DTB_SET_SIZE) {
@@ -250,17 +265,20 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
 	return 0;
 }
 
-#if MALI_USE_CSF
 static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
 						   const struct device_node *pbha_node)
 {
-	u32 bits;
+	u32 bits = 0;
 	int err;
 
 	if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU))
 		return 0;
 
-	err = of_property_read_u32(pbha_node, "propagate_bits", &bits);
+	err = of_property_read_u32(pbha_node, "propagate-bits", &bits);
+
+	if (err == -EINVAL) {
+		err = of_property_read_u32(pbha_node, "propagate_bits", &bits);
+	}
 
 	if (err < 0) {
 		if (err != -EINVAL) {
@@ -268,6 +286,10 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
 				"DTB value for propagate_bits is improperly formed (err=%d)\n",
 				err);
 			return err;
+		} else {
+			/* Property does not exist */
+			kbdev->pbha_propagate_bits = 0;
+			return 0;
 		}
 	}
 
@@ -279,10 +301,11 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
 	kbdev->pbha_propagate_bits = bits;
 	return 0;
 }
-#endif
+#endif /* MALI_USE_CSF */
 
 int kbase_pbha_read_dtb(struct kbase_device *kbdev)
 {
+#if MALI_USE_CSF
 	const struct device_node *pbha_node;
 	int err;
 
@@ -295,12 +318,12 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev)
 
 	err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node);
 
-#if MALI_USE_CSF
 	if (err < 0)
 		return err;
 
 	err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node);
-#endif
-
 	return err;
+#else
+	return 0;
+#endif
 }
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index bfd5b7e..40278a8 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -211,10 +211,28 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev)
 		kbdev->pm.active_count == 0);
 	dev_dbg(kbdev->dev, ">wait_event - waiting done\n");
 
+#if MALI_USE_CSF
+	/* At this point, any kbase context termination should either have run to
+	 * completion and any further context termination can only begin after
+	 * the system resumes. Therefore, it is now safe to skip taking the context
+	 * list lock when traversing the context list.
+	 */
+	if (kbase_csf_kcpu_queue_halt_timers(kbdev)) {
+		rt_mutex_lock(&kbdev->pm.lock);
+		kbdev->pm.suspending = false;
+		rt_mutex_unlock(&kbdev->pm.lock);
+		return -1;
+	}
+#endif
+
 	/* NOTE: We synchronize with anything that was just finishing a
 	 * kbase_pm_context_idle() call by locking the pm.lock below
 	 */
 	if (kbase_hwaccess_pm_suspend(kbdev)) {
+#if MALI_USE_CSF
+		/* Resume the timers in case of suspend failure. */
+		kbase_csf_kcpu_queue_resume_timers(kbdev);
+#endif
 		rt_mutex_lock(&kbdev->pm.lock);
 		kbdev->pm.suspending = false;
 		rt_mutex_unlock(&kbdev->pm.lock);
@@ -262,6 +280,8 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
 	kbasep_js_resume(kbdev);
 #else
 	kbase_csf_scheduler_pm_resume(kbdev);
+
+	kbase_csf_kcpu_queue_resume_timers(kbdev);
 #endif
 
 	/* Matching idle call, to power off the GPU/cores if we didn't actually
@@ -283,6 +303,10 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
 	/* Resume HW counters intermediaries. */
 	kbase_vinstr_resume(kbdev->vinstr_ctx);
 	kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx);
+	/* System resume callback is complete */
+	kbdev->pm.resuming = false;
+	/* Unblock the threads waiting for the completion of System suspend/resume */
+	wake_up_all(&kbdev->pm.resume_wait);
 }
 
 int kbase_pm_suspend(struct kbase_device *kbdev)
diff --git a/mali_kbase/mali_kbase_pm.h b/mali_kbase/mali_kbase_pm.h
index 0639762..4ff3699 100644
--- a/mali_kbase/mali_kbase_pm.h
+++ b/mali_kbase/mali_kbase_pm.h
@@ -292,13 +292,14 @@ void kbase_pm_apc_term(struct kbase_device *kbdev);
  */
 void kbase_pm_apc_request(struct kbase_device *kbdev, u32 dur_usec);
 
-/*
- * Print debug message indicating power state of GPU.
+/**
+ * Print debug message indicating power state of GPU
  * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @timeout_msg: A message to print.
  *
  * Prerequisite: GPU is powered.
- * Takes and releases kbdev->hwaccess_lock
+ * Takes and releases kbdev->hwaccess_lock on CSF GPUs.
  */
-void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev);
+void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg);
 
 #endif /* _KBASE_PM_H_ */
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index d65ff2d..0ad2bf8 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -41,6 +41,7 @@
 #include <linux/kernel.h>
 #include <linux/cache.h>
 #include <linux/file.h>
+#include <linux/version_compat_defs.h>
 
 #if !MALI_USE_CSF
 /**
@@ -751,7 +752,7 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
 		if (page_index == page_num) {
 			*page = sg_page_iter_page(&sg_iter);
 
-			return kmap(*page);
+			return kbase_kmap(*page);
 		}
 		page_index++;
 	}
@@ -797,14 +798,13 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 		for (i = 0; i < buf_data->nr_extres_pages &&
 				target_page_nr < buf_data->nr_pages; i++) {
 			struct page *pg = buf_data->extres_pages[i];
-			void *extres_page = kmap(pg);
-
+			void *extres_page = kbase_kmap(pg);
 			if (extres_page) {
 				ret = kbase_mem_copy_to_pinned_user_pages(
 						pages, extres_page, &to_copy,
 						buf_data->nr_pages,
 						&target_page_nr, offset);
-				kunmap(pg);
+				kbase_kunmap(pg, extres_page);
 				if (ret)
 					goto out_unlock;
 			}
@@ -839,7 +839,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
 						&target_page_nr, offset);
 
 #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
-				kunmap(pg);
+				kbase_kunmap(pg, extres_page);
 #else
 				dma_buf_kunmap(dma_buf, i, extres_page);
 #endif
diff --git a/mali_kbase/mali_kbase_strings.h b/mali_kbase/mali_kbase_strings.h
deleted file mode 100644
index c3f94f9..0000000
--- a/mali_kbase/mali_kbase_strings.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-extern const char kbase_drv_name[];
-extern const char kbase_timeline_name[];
diff --git a/mali_kbase/mali_kbase_utility.h b/mali_kbase/mali_kbase_utility.h
deleted file mode 100644
index 2dad49b..0000000
--- a/mali_kbase/mali_kbase_utility.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_UTILITY_H
-#define _KBASE_UTILITY_H
-
-#ifndef _KBASE_H_
-#error "Don't include this file directly, use mali_kbase.h instead"
-#endif
-
-static inline void kbase_timer_setup(struct timer_list *timer,
-				     void (*callback)(struct timer_list *timer))
-{
-#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE
-	setup_timer(timer, (void (*)(unsigned long)) callback,
-			(unsigned long) timer);
-#else
-	timer_setup(timer, callback, 0);
-#endif
-}
-
-#ifndef WRITE_ONCE
-	#ifdef ASSIGN_ONCE
-		#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
-	#else
-		#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
-	#endif
-#endif
-
-#ifndef READ_ONCE
-	#define READ_ONCE(x) ACCESS_ONCE(x)
-#endif
-
-#endif				/* _KBASE_UTILITY_H */
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index 5f3dabd..3fce09c 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -541,8 +541,10 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
 
 void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx)
 {
-	if (WARN_ON(!vctx))
+	if (!vctx) {
+		pr_warn("%s: vctx is NULL\n", __func__);
 		return;
+	}
 
 	mutex_lock(&vctx->lock);
 
@@ -571,8 +573,10 @@ void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx)
 
 void kbase_vinstr_resume(struct kbase_vinstr_context *vctx)
 {
-	if (WARN_ON(!vctx))
+	if (!vctx) {
+		pr_warn("%s:vctx is NULL\n", __func__);
 		return;
+	}
 
 	mutex_lock(&vctx->lock);
 
diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h
index 49058d3..1293a0b 100644
--- a/mali_kbase/mali_linux_trace.h
+++ b/mali_kbase/mali_linux_trace.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -173,7 +173,7 @@ TRACE_EVENT(mali_total_alloc_pages_change,
 		((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK)
 #define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\
 	{AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \
-	{AS_FAULTSTATUS_ACCESS_TYPE_EX,     "EXECUTE"}, \
+	{AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE,     "EXECUTE"}, \
 	{AS_FAULTSTATUS_ACCESS_TYPE_READ,   "READ"   }, \
 	{AS_FAULTSTATUS_ACCESS_TYPE_WRITE,  "WRITE"  })
 #define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \
diff --git a/mali_kbase/mali_kbase_strings.c b/mali_kbase/mali_power_gpu_work_period_trace.c
index 84784be..8e7bf6f 100644
--- a/mali_kbase/mali_kbase_strings.c
+++ b/mali_kbase/mali_power_gpu_work_period_trace.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -19,10 +19,10 @@
  *
  */
 
-#include "mali_kbase_strings.h"
-
-#define KBASE_DRV_NAME "mali"
-#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
-
-const char kbase_drv_name[] = KBASE_DRV_NAME;
-const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
+/* Create the trace point if not configured in kernel */
+#ifndef CONFIG_TRACE_POWER_GPU_WORK_PERIOD
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#define CREATE_TRACE_POINTS
+#include "mali_power_gpu_work_period_trace.h"
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+#endif
diff --git a/mali_kbase/mali_power_gpu_work_period_trace.h b/mali_kbase/mali_power_gpu_work_period_trace.h
new file mode 100644
index 0000000..46e86ad
--- /dev/null
+++ b/mali_kbase/mali_power_gpu_work_period_trace.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _TRACE_POWER_GPU_WORK_PERIOD_MALI
+#define _TRACE_POWER_GPU_WORK_PERIOD_MALI
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_power_gpu_work_period_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#if !defined(_TRACE_POWER_GPU_WORK_PERIOD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_GPU_WORK_PERIOD_H
+
+#include <linux/tracepoint.h>
+
+/**
+ * gpu_work_period - Reports GPU work period metrics
+ *
+ * @gpu_id: Unique GPU Identifier
+ * @uid: UID of an application
+ * @start_time_ns: Start time of a GPU work period in nanoseconds
+ * @end_time_ns: End time of a GPU work period in nanoseconds
+ * @total_active_duration_ns: Total amount of time the GPU was running GPU work for given
+ *                            UID during the GPU work period, in nanoseconds. This duration does
+ *                            not double-account parallel GPU work for the same UID.
+ */
+TRACE_EVENT(gpu_work_period,
+
+	TP_PROTO(
+		u32 gpu_id,
+		u32 uid,
+		u64 start_time_ns,
+		u64 end_time_ns,
+		u64 total_active_duration_ns
+	),
+
+	TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns),
+
+	TP_STRUCT__entry(
+		__field(u32, gpu_id)
+		__field(u32, uid)
+		__field(u64, start_time_ns)
+		__field(u64, end_time_ns)
+		__field(u64, total_active_duration_ns)
+	),
+
+	TP_fast_assign(
+		__entry->gpu_id = gpu_id;
+		__entry->uid = uid;
+		__entry->start_time_ns = start_time_ns;
+		__entry->end_time_ns = end_time_ns;
+		__entry->total_active_duration_ns = total_active_duration_ns;
+	),
+
+	TP_printk("gpu_id=%u uid=%u start_time_ns=%llu end_time_ns=%llu total_active_duration_ns=%llu",
+		__entry->gpu_id,
+		__entry->uid,
+		__entry->start_time_ns,
+		__entry->end_time_ns,
+		__entry->total_active_duration_ns)
+);
+
+#endif /* _TRACE_POWER_GPU_WORK_PERIOD_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 4cac787..a057d3c 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -146,8 +146,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 				GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT;
 	int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >>
 				GPU_FAULTSTATUS_SOURCE_ID_SHIFT;
-	const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ?
-					"true" : "false";
+	const char *addr_valid = (status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK) ? "true" : "false";
 	int as_no = as->number;
 	unsigned long flags;
 	const uintptr_t fault_addr = fault->addr;
@@ -247,12 +246,13 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	kbase_mmu_disable(kctx);
 	kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT);
+	kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT);
+	kbase_csf_ctx_report_page_fault_for_active_groups(kctx, fault);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	mutex_unlock(&kbdev->mmu_hw_mutex);
 	/* AS transaction end */
 
-	kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT);
 	/* Switching to UNMAPPED mode above would have enabled the firmware to
 	 * recover from the fault (if the memory access was made by firmware)
 	 * and it can then respond to CSG termination requests to be sent now.
@@ -368,9 +368,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 
 	/* remember current mask */
 	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
-	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	new_mask = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
 	/* mask interrupts for now */
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0);
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 
 	while (pf_bits) {
@@ -380,11 +380,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		struct kbase_fault *fault = &as->pf_data;
 
 		/* find faulting address */
-		fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
-				AS_FAULTADDRESS_HI));
+		fault->addr = kbase_reg_read(kbdev,
+					     MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_HI)));
 		fault->addr <<= 32;
-		fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no,
-				AS_FAULTADDRESS_LO));
+		fault->addr |= kbase_reg_read(
+			kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_LO)));
 
 		/* Mark the fault protected or not */
 		fault->protected_mode = false;
@@ -393,14 +393,14 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		kbase_as_fault_debugfs_new(kbdev, as_no);
 
 		/* record the fault status */
-		fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
-				AS_FAULTSTATUS));
+		fault->status =
+			kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTSTATUS)));
 
-		fault->extra_addr = kbase_reg_read(kbdev,
-					MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+		fault->extra_addr =
+			kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)));
 		fault->extra_addr <<= 32;
-		fault->extra_addr |= kbase_reg_read(kbdev,
-					MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
+		fault->extra_addr |=
+			kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)));
 
 		/* Mark page fault as handled */
 		pf_bits &= ~(1UL << as_no);
@@ -432,9 +432,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 
 	/* reenable interrupts */
 	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
-	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	tmp = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
 	new_mask |= tmp;
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), new_mask);
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 }
 
@@ -470,19 +470,16 @@ static void kbase_mmu_gpu_fault_worker(struct work_struct *data)
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 	fault = &faulting_as->gf_data;
 	status = fault->status;
-	as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG;
+	as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK;
 	address = fault->addr;
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
 
 	dev_warn(kbdev->dev,
 		 "GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n"
 		 "ASID_VALID: %s,  ADDRESS_VALID: %s\n",
-		 status,
-		 kbase_gpu_exception_name(
-			GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)),
-		 as_nr, address,
-		 as_valid ? "true" : "false",
-		 status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false");
+		 status, kbase_gpu_exception_name(GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)),
+		 as_nr, address, as_valid ? "true" : "false",
+		 status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK ? "true" : "false");
 
 	kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr);
 	kbase_csf_ctx_handle_fault(kctx, fault);
@@ -558,9 +555,8 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
 	kbdev->as[i].gf_data.addr = 0ULL;
-	kbdev->as[i].is_unresponsive = false;
 
-	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
+	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 0, i);
 	if (!kbdev->as[i].pf_wq)
 		return -ENOMEM;
 
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index d716ce0..5c774c2 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -322,9 +322,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 
 	/* remember current mask */
 	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
-	new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	new_mask = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
 	/* mask interrupts for now */
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0);
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 
 	while (bf_bits | pf_bits) {
@@ -355,11 +355,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no);
 
 		/* find faulting address */
-		fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
-				AS_FAULTADDRESS_HI));
+		fault->addr = kbase_reg_read(kbdev,
+					     MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_HI)));
 		fault->addr <<= 32;
-		fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no,
-				AS_FAULTADDRESS_LO));
+		fault->addr |= kbase_reg_read(
+			kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_LO)));
 		/* Mark the fault protected or not */
 		fault->protected_mode = kbdev->protected_mode;
 
@@ -372,13 +372,13 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 		kbase_as_fault_debugfs_new(kbdev, as_no);
 
 		/* record the fault status */
-		fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
-				AS_FAULTSTATUS));
-		fault->extra_addr = kbase_reg_read(kbdev,
-				MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+		fault->status =
+			kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTSTATUS)));
+		fault->extra_addr =
+			kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)));
 		fault->extra_addr <<= 32;
-		fault->extra_addr |= kbase_reg_read(kbdev,
-				MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
+		fault->extra_addr |=
+			kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)));
 
 		if (kbase_as_has_bus_fault(as, fault)) {
 			/* Mark bus fault as handled.
@@ -406,9 +406,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
 
 	/* reenable interrupts */
 	spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
-	tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+	tmp = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
 	new_mask |= tmp;
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), new_mask);
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
 
 	dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n",
@@ -429,9 +429,8 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
 	kbdev->as[i].number = i;
 	kbdev->as[i].bf_data.addr = 0ULL;
 	kbdev->as[i].pf_data.addr = 0ULL;
-	kbdev->as[i].is_unresponsive = false;
 
-	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
+	kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 0, i);
 	if (!kbdev->as[i].pf_wq)
 		return -ENOMEM;
 
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index ccbd9c3..f8641a6 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -46,6 +46,7 @@
 #if !MALI_USE_CSF
 #include <mali_kbase_hwaccess_jm.h>
 #endif
+#include <linux/version_compat_defs.h>
 
 #include <mali_kbase_trace_gpu_mem.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
@@ -57,6 +58,11 @@
 /* Macro to convert updated PDGs to flags indicating levels skip in flush */
 #define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF)
 
+static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				     const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+				     unsigned long flags, int const group_id, u64 *dirty_pgds,
+				     struct kbase_va_region *reg, bool ignore_page_migration);
+
 /* Small wrapper function to factor out GPU-dependent context releasing */
 static void release_ctx(struct kbase_device *kbdev,
 		struct kbase_context *kctx)
@@ -201,7 +207,7 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as
 	mutex_lock(&kbdev->mmu_hw_mutex);
 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
 
-	if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
+	if (kbdev->pm.backend.gpu_ready && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
 		dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
 
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -389,7 +395,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
-	if (!kbase_page_migration_enabled)
+	if (!kbase_is_page_migration_enabled())
 		return false;
 
 	spin_lock(&page_md->migrate_lock);
@@ -404,8 +410,10 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
 			page_md->status =
 				PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
 		}
+	} else if ((PAGE_STATUS_GET(page_md->status) == FREE_IN_PROGRESS) ||
+		   (PAGE_STATUS_GET(page_md->status) == ALLOCATE_IN_PROGRESS)) {
+		/* Nothing to do - fall through */
 	} else {
-		WARN_ON_ONCE(mmut->kctx);
 		WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
 	}
 	spin_unlock(&page_md->migrate_lock);
@@ -431,7 +439,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
  * @pgd:     Physical address of page directory to be freed.
  *
  * This function is supposed to be called with mmu_lock held and after
- * ensuring that GPU won't be able to access the page.
+ * ensuring that the GPU won't be able to access the page.
  */
 static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 			       phys_addr_t pgd)
@@ -727,7 +735,7 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
 		kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
 		break;
-	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+	case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE:
 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
 				"Execute Permission fault", fault);
 		break;
@@ -1293,10 +1301,11 @@ page_fault_retry:
 		 * so the no_flush version of insert_pages is used which allows
 		 * us to unlock the MMU as we see fit.
 		 */
-		err = kbase_mmu_insert_pages_no_flush(
-			kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
-			&kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags,
-			region->gpu_alloc->group_id, &dirty_pgds, region, false);
+		err = mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
+						&kbase_get_gpu_phy_pages(region)[pfn_offset],
+						new_pages, region->flags,
+						region->gpu_alloc->group_id, &dirty_pgds, region,
+						false);
 		if (err) {
 			kbase_free_phy_pages_helper(region->gpu_alloc,
 					new_pages);
@@ -1480,7 +1489,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
 	if (!p)
 		return KBASE_MMU_INVALID_PGD_ADDRESS;
 
-	page = kmap(p);
+	page = kbase_kmap(p);
+
 	if (page == NULL)
 		goto alloc_free;
 
@@ -1513,7 +1523,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
 	 */
 	kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE);
 
-	kunmap(p);
+	kbase_kunmap(p, page);
 	return pgd;
 
 alloc_free:
@@ -1553,7 +1563,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	vpfn &= 0x1FF;
 
 	p = pfn_to_page(PFN_DOWN(*pgd));
-	page = kmap(p);
+	page = kbase_kmap(p);
 	if (page == NULL) {
 		dev_err(kbdev->dev, "%s: kmap failure", __func__);
 		return -EINVAL;
@@ -1562,7 +1572,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 	if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
 		dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
 			vpfn);
-		kunmap(p);
+		kbase_kunmap(p, page);
 		return -EFAULT;
 	} else {
 		target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
@@ -1570,7 +1580,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
 	}
 
-	kunmap(p);
+	kbase_kunmap(p, page);
 	*pgd = target_pgd;
 
 	return 0;
@@ -1700,10 +1710,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
 			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
 			pgds[level] = pgd;
-			page = kmap(p);
+			page = kbase_kmap(p);
 			if (mmu_mode->ate_is_valid(page[idx], level))
 				break; /* keep the mapping */
-			kunmap(p);
+			kbase_kunmap(p, page);
 			pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx]));
 			p = phys_to_page(pgd);
@@ -1736,7 +1746,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		mmu_mode->entries_invalidate(&page[idx], pcount);
 
 		if (!num_of_valid_entries) {
-			kunmap(p);
+			kbase_kunmap(p, page);
 
 			kbase_mmu_add_to_free_pgds_list(mmut, p);
 
@@ -1754,7 +1764,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
 				   kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount,
 				   KBASE_MMU_OP_NONE);
-		kunmap(p);
+		kbase_kunmap(p, page);
 next:
 		vpfn += count;
 	}
@@ -1764,7 +1774,7 @@ next:
 	 * going to happen to these pages at this stage. They might return
 	 * movable once they are returned to a memory pool.
 	 */
-	if (kbase_page_migration_enabled && !ignore_page_migration && phys) {
+	if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) {
 		const u64 num_pages = to_vpfn - from_vpfn + 1;
 		u64 i;
 
@@ -1831,7 +1841,6 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
  *                The bottom PGD level.
  * @insert_level: The level of MMU page table where the chain of newly allocated
  *                PGDs needs to be linked-in/inserted.
- *                The top-most PDG level to be updated.
  * @insert_vpfn:  The virtual page frame number for the ATE.
  * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
  *                  the physical addresses of newly allocated PGDs from index
@@ -1839,7 +1848,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
  *                  insert_level.
  *
  * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
- * at insert_level which already exists in the MMU Page Tables.Migration status is also
+ * at insert_level which already exists in the MMU Page Tables. Migration status is also
  * updated for all the newly allocated PGD pages.
  *
  * Return:
@@ -1873,7 +1882,8 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 			goto failure_recovery;
 		}
 
-		parent_page_va = kmap(parent_page);
+		parent_page_va = kbase_kmap(parent_page);
+
 		if (unlikely(parent_page_va == NULL)) {
 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 			err = -EINVAL;
@@ -1886,7 +1896,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 		parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
 			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
 		kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
-		kunmap(parent_page);
+		kbase_kunmap(parent_page, parent_page_va);
 
 		if (parent_index != insert_level) {
 			/* Newly allocated PGDs */
@@ -1905,7 +1915,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
 		}
 
 		/* Update the new target_pgd page to its stable state */
-		if (kbase_page_migration_enabled) {
+		if (kbase_is_page_migration_enabled()) {
 			struct kbase_page_metadata *page_md =
 				kbase_page_private(phys_to_page(target_pgd));
 
@@ -1934,11 +1944,11 @@ failure_recovery:
 	for (; pgd_index < cur_level; pgd_index++) {
 		phys_addr_t pgd = pgds_to_insert[pgd_index];
 		struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
-		u64 *pgd_page_va = kmap(pgd_page);
+		u64 *pgd_page_va = kbase_kmap(pgd_page);
 		u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
 
 		kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
-		kunmap(pgd_page);
+		kbase_kunmap(pgd_page, pgd_page_va);
 	}
 
 	return err;
@@ -2001,10 +2011,11 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta
 	return 0;
 }
 
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
-				 struct tagged_addr phys, size_t nr, unsigned long flags,
-				 int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
-				 bool ignore_page_migration)
+static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
+					struct tagged_addr phys, size_t nr, unsigned long flags,
+					int const group_id,
+					enum kbase_caller_mmu_sync_info mmu_sync_info,
+					bool ignore_page_migration)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
@@ -2034,7 +2045,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
 	/* If page migration is enabled, pages involved in multiple GPU mappings
 	 * are always treated as not movable.
 	 */
-	if (kbase_page_migration_enabled && !ignore_page_migration) {
+	if (kbase_is_page_migration_enabled() && !ignore_page_migration) {
 		struct page *phys_page = as_page(phys);
 		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
 
@@ -2099,7 +2110,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
 		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
-		pgd_page = kmap(p);
+
+		pgd_page = kbase_kmap(p);
 		if (!pgd_page) {
 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 			err = -ENOMEM;
@@ -2147,14 +2159,14 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
 
 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
 
-				kunmap(p);
+				kbase_kunmap(p, pgd_page);
 				goto fail_unlock_free_pgds;
 			}
 		}
 
 		insert_vpfn += count;
 		remain -= count;
-		kunmap(p);
+		kbase_kunmap(p, pgd_page);
 	}
 
 	rt_mutex_unlock(&mmut->mmu_lock);
@@ -2211,6 +2223,9 @@ static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
 	struct page *phys_page = as_page(phys);
 	struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return;
+
 	spin_lock(&page_md->migrate_lock);
 
 	/* If no GPU va region is given: the metadata provided are
@@ -2245,6 +2260,9 @@ static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
 {
 	size_t i;
 
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return;
+
 	for (i = 0; i < requested_nr; i++) {
 		struct page *phys_page = as_page(phys[i]);
 		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
@@ -2294,10 +2312,10 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
 		group_id, level, entry);
 }
 
-int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
-				    unsigned long flags, int const group_id, u64 *dirty_pgds,
-				    struct kbase_va_region *reg, bool ignore_page_migration)
+static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				     const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+				     unsigned long flags, int const group_id, u64 *dirty_pgds,
+				     struct kbase_va_region *reg, bool ignore_page_migration)
 {
 	phys_addr_t pgd;
 	u64 *pgd_page;
@@ -2378,7 +2396,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		}
 
 		p = pfn_to_page(PFN_DOWN(pgd));
-		pgd_page = kmap(p);
+		pgd_page = kbase_kmap(p);
+
 		if (!pgd_page) {
 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
 			err = -ENOMEM;
@@ -2415,7 +2434,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 				/* If page migration is enabled, this is the right time
 				 * to update the status of the page.
 				 */
-				if (kbase_page_migration_enabled && !ignore_page_migration &&
+				if (kbase_is_page_migration_enabled() && !ignore_page_migration &&
 				    !is_huge(phys[i]) && !is_partial(phys[i]))
 					kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
 									       insert_vpfn + i);
@@ -2450,7 +2469,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 
 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
 
-				kunmap(p);
+				kbase_kunmap(p, pgd_page);
 				goto fail_unlock_free_pgds;
 			}
 		}
@@ -2458,7 +2477,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		phys += count;
 		insert_vpfn += count;
 		remain -= count;
-		kunmap(p);
+		kbase_kunmap(p, pgd_page);
 	}
 
 	rt_mutex_unlock(&mmut->mmu_lock);
@@ -2485,6 +2504,23 @@ fail_unlock:
 	return err;
 }
 
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+				    unsigned long flags, int const group_id, u64 *dirty_pgds,
+				    struct kbase_va_region *reg)
+{
+	int err;
+
+	/* Early out if there is nothing to do */
+	if (nr == 0)
+		return 0;
+
+	err = mmu_insert_pages_no_flush(kbdev, mmut, start_vpfn, phys, nr, flags, group_id,
+					dirty_pgds, reg, false);
+
+	return err;
+}
+
 /*
  * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
  * number 'as_nr'.
@@ -2492,7 +2528,7 @@ fail_unlock:
 int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
 			   int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
-			   struct kbase_va_region *reg, bool ignore_page_migration)
+			   struct kbase_va_region *reg)
 {
 	int err;
 	u64 dirty_pgds = 0;
@@ -2501,8 +2537,8 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
 	if (nr == 0)
 		return 0;
 
-	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
-					      &dirty_pgds, reg, ignore_page_migration);
+	err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+					reg, false);
 	if (err)
 		return err;
 
@@ -2513,11 +2549,12 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
 
 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
 
-int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				    u64 vpfn, struct tagged_addr *phys, size_t nr,
-				    unsigned long flags, int as_nr, int const group_id,
-				    enum kbase_caller_mmu_sync_info mmu_sync_info,
-				    struct kbase_va_region *reg)
+int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
+					      struct kbase_mmu_table *mmut, u64 vpfn,
+					      struct tagged_addr *phys, size_t nr,
+					      unsigned long flags, int as_nr, int const group_id,
+					      enum kbase_caller_mmu_sync_info mmu_sync_info,
+					      struct kbase_va_region *reg)
 {
 	int err;
 	u64 dirty_pgds = 0;
@@ -2529,8 +2566,8 @@ int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu
 	/* Imported allocations don't have metadata and therefore always ignore the
 	 * page migration logic.
 	 */
-	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
-					      &dirty_pgds, reg, true);
+	err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+					reg, true);
 	if (err)
 		return err;
 
@@ -2555,8 +2592,8 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_
 	/* Memory aliases are always built on top of existing allocations,
 	 * therefore the state of physical pages shall be updated.
 	 */
-	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
-					      &dirty_pgds, reg, false);
+	err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+					reg, false);
 	if (err)
 		return err;
 
@@ -2771,7 +2808,8 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 	     current_level--) {
 		phys_addr_t current_pgd = pgds[current_level];
 		struct page *p = phys_to_page(current_pgd);
-		u64 *current_page = kmap(p);
+
+		u64 *current_page = kbase_kmap(p);
 		unsigned int current_valid_entries =
 			kbdev->mmu_mode->get_num_valid_entries(current_page);
 		int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
@@ -2783,7 +2821,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 		kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
 		if (current_valid_entries == 1 &&
 		    current_level != MIDGARD_MMU_LEVEL(0)) {
-			kunmap(p);
+			kbase_kunmap(p, current_page);
 
 			/* Ensure the cacheline containing the last valid entry
 			 * of PGD is invalidated from the GPU cache, before the
@@ -2800,7 +2838,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
 			kbdev->mmu_mode->set_num_valid_entries(
 				current_page, current_valid_entries);
 
-			kunmap(p);
+			kbase_kunmap(p, current_page);
 
 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
 					   kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64),
@@ -2856,7 +2894,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
 
 		for (i = 0; !flush_done && i < phys_page_nr; i++) {
 			spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
-			if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+			if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
 				mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
 						   KBASE_MMU_OP_FLUSH_MEM);
 			else
@@ -2897,7 +2935,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 			phys_addr_t next_pgd;
 
 			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
-			page = kmap(p);
+			page = kbase_kmap(p);
 			if (mmu_mode->ate_is_valid(page[index], level))
 				break; /* keep the mapping */
 			else if (!mmu_mode->pte_is_valid(page[index], level)) {
@@ -2923,7 +2961,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 			next_pgd = mmu_mode->pte_to_phy_addr(
 				kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
 					kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index]));
-			kunmap(p);
+			kbase_kunmap(p, page);
 			pgds[level] = pgd;
 			pgd = next_pgd;
 			p = phys_to_page(pgd);
@@ -2934,7 +2972,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 		case MIDGARD_MMU_LEVEL(1):
 			dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
 				 level);
-			kunmap(p);
+			kbase_kunmap(p, page);
 			goto out;
 		case MIDGARD_MMU_LEVEL(2):
 			/* can only teardown if count >= 512 */
@@ -2972,7 +3010,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 		mmu_mode->entries_invalidate(&page[index], pcount);
 
 		if (!num_of_valid_entries) {
-			kunmap(p);
+			kbase_kunmap(p, page);
 
 			/* Ensure the cacheline(s) containing the last valid entries
 			 * of PGD is invalidated from the GPU cache, before the
@@ -2998,17 +3036,48 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
 				   kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64),
 				   flush_op);
 next:
-		kunmap(p);
-		vpfn += count;
-		nr -= count;
+	kbase_kunmap(p, page);
+	vpfn += count;
+	nr -= count;
 	}
 out:
 	return 0;
 }
 
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
-			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
-			     int as_nr, bool ignore_page_migration)
+/**
+ * mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev:    Pointer to kbase device.
+ * @mmut:     Pointer to GPU MMU page table.
+ * @vpfn:     Start page frame number of the GPU virtual pages to unmap.
+ * @phys:     Array of physical pages currently mapped to the virtual
+ *            pages to unmap, or NULL. This is used for GPU cache maintenance
+ *            and page migration support.
+ * @nr_phys_pages: Number of physical pages to flush.
+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
+ * @as_nr:    Address space number, for GPU cache maintenance operations
+ *            that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
+ *
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in the PGD.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual; that is, invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+			      struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+			      int as_nr, bool ignore_page_migration)
 {
 	u64 start_vpfn = vpfn;
 	enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
@@ -3089,7 +3158,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 	 * updated before releasing the lock to protect against concurrent
 	 * requests to migrate the pages, if they have been isolated.
 	 */
-	if (kbase_page_migration_enabled && phys && !ignore_page_migration)
+	if (kbase_is_page_migration_enabled() && phys && !ignore_page_migration)
 		kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
 
 	kbase_mmu_free_pgds_list(kbdev, mmut);
@@ -3098,7 +3167,22 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
 
 	return err;
 }
-KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+			     int as_nr)
+{
+	return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
+				  false);
+}
+
+int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				      u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
+				      size_t nr_virt_pages, int as_nr)
+{
+	return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
+				  true);
+}
 
 /**
  * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
@@ -3162,7 +3246,7 @@ int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 			goto fail_unlock;
 
 		p = pfn_to_page(PFN_DOWN(pgd));
-		pgd_page = kmap(p);
+		pgd_page = kbase_kmap(p);
 		if (!pgd_page) {
 			dev_warn(kbdev->dev, "kmap failure on update_pages");
 			err = -ENOMEM;
@@ -3217,7 +3301,7 @@ int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 		vpfn += count;
 		nr -= count;
 
-		kunmap(p);
+		kbase_kunmap(p, pgd_page);
 	}
 
 	rt_mutex_unlock(&mmut->mmu_lock);
@@ -3339,6 +3423,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	unsigned int num_of_valid_entries;
 	u8 vmap_count = 0;
 
+	/* If page migration support is not compiled in, return with fault */
+	if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+		return -EINVAL;
 	/* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
 	 * here we skip the no kctx case, which is only used with MCU's mmut.
 	 */
@@ -3356,21 +3443,21 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
 
 	/* Create all mappings before copying content.
-	 * This is done as early as possible because is the only operation that may
+	 * This is done as early as possible because it is the only operation that may
 	 * fail. It is possible to do this before taking any locks because the
 	 * pages to migrate are not going to change and even the parent PGD is not
 	 * going to be affected by any other concurrent operation, since the page
 	 * has been isolated before migration and therefore it cannot disappear in
 	 * the middle of this function.
 	 */
-	old_page = kmap(as_page(old_phys));
+	old_page = kbase_kmap(as_page(old_phys));
 	if (!old_page) {
 		dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
 		ret = -EINVAL;
 		goto old_page_map_error;
 	}
 
-	new_page = kmap(as_page(new_phys));
+	new_page = kbase_kmap(as_page(new_phys));
 	if (!new_page) {
 		dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
 		ret = -EINVAL;
@@ -3457,14 +3544,13 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 		goto get_pgd_at_level_error;
 	}
 
-	pgd_page = kmap(phys_to_page(pgd));
+	pgd_page = kbase_kmap(phys_to_page(pgd));
 	if (!pgd_page) {
 		dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
 		ret = -EINVAL;
 		goto pgd_page_map_error;
 	}
 
-	rt_mutex_lock(&kbdev->pm.lock);
 	mutex_lock(&kbdev->mmu_hw_mutex);
 
 	/* Lock MMU region and flush GPU cache by using GPU control,
@@ -3475,14 +3561,13 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 		/* Defer the migration as L2 is in a transitional phase */
 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 		mutex_unlock(&kbdev->mmu_hw_mutex);
-		rt_mutex_unlock(&kbdev->pm.lock);
 		dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
 		ret = -EAGAIN;
 		goto l2_state_defer_out;
 	}
 	/* Prevent transitional phases in L2 by starting the transaction */
 	mmu_page_migration_transaction_begin(kbdev);
-	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+	if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
 		int as_nr = mmut->kctx->as_nr;
 		struct kbase_as *as = &kbdev->as[as_nr];
 
@@ -3498,7 +3583,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 
 	if (ret < 0) {
 		mutex_unlock(&kbdev->mmu_hw_mutex);
-		rt_mutex_unlock(&kbdev->pm.lock);
 		dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
 		goto undo_mappings;
 	}
@@ -3574,7 +3658,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	 * won't have any effect on them.
 	 */
 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
-	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+	if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
 		int as_nr = mmut->kctx->as_nr;
 		struct kbase_as *as = &kbdev->as[as_nr];
 
@@ -3590,7 +3674,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
 	/* Releasing locks before checking the migration transaction error state */
 	mutex_unlock(&kbdev->mmu_hw_mutex);
-	rt_mutex_unlock(&kbdev->pm.lock);
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
 	/* Release the transition prevention in L2 by ending the transaction */
@@ -3623,24 +3706,24 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
 	set_page_private(as_page(old_phys), 0);
 
 l2_state_defer_out:
-	kunmap(phys_to_page(pgd));
+	kbase_kunmap(phys_to_page(pgd), pgd_page);
 pgd_page_map_error:
 get_pgd_at_level_error:
 page_state_change_out:
 	rt_mutex_unlock(&mmut->mmu_lock);
 
-	kunmap(as_page(new_phys));
+	kbase_kunmap(as_page(new_phys), new_page);
 new_page_map_error:
-	kunmap(as_page(old_phys));
+	kbase_kunmap(as_page(old_phys), old_page);
 old_page_map_error:
 	return ret;
 
 undo_mappings:
 	/* Unlock the MMU table and undo mappings. */
 	rt_mutex_unlock(&mmut->mmu_lock);
-	kunmap(phys_to_page(pgd));
-	kunmap(as_page(new_phys));
-	kunmap(as_page(old_phys));
+	kbase_kunmap(phys_to_page(pgd), pgd_page);
+	kbase_kunmap(as_page(new_phys), new_page);
+	kbase_kunmap(as_page(old_phys), old_page);
 
 	return ret;
 }
@@ -3657,7 +3740,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 
 	lockdep_assert_held(&mmut->mmu_lock);
 
-	pgd_page = kmap_atomic(p);
+	pgd_page = kbase_kmap_atomic(p);
 	/* kmap_atomic should NEVER fail. */
 	if (WARN_ON_ONCE(pgd_page == NULL))
 		return;
@@ -3673,11 +3756,11 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 	 * there are no pages left mapped on the GPU for a context. Hence the count
 	 * of valid entries is expected to be zero here.
 	 */
-	if (kbase_page_migration_enabled && mmut->kctx)
+	if (kbase_is_page_migration_enabled() && mmut->kctx)
 		WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
 	/* Invalidate page after copying */
 	mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
-	kunmap_atomic(pgd_page);
+	kbase_kunmap_atomic(pgd_page);
 	pgd_page = pgd_page_buffer;
 
 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
@@ -3696,6 +3779,24 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
 	kbase_mmu_free_pgd(kbdev, mmut, pgd);
 }
 
+static void kbase_mmu_mark_non_movable(struct page *page)
+{
+	struct kbase_page_metadata *page_md;
+
+	if (!kbase_is_page_migration_enabled())
+		return;
+
+	page_md = kbase_page_private(page);
+
+	spin_lock(&page_md->migrate_lock);
+	page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+
+	if (IS_PAGE_MOVABLE(page_md->status))
+		page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+
+	spin_unlock(&page_md->migrate_lock);
+}
+
 int kbase_mmu_init(struct kbase_device *const kbdev,
 	struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
 	int const group_id)
@@ -3729,11 +3830,10 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 			return -ENOMEM;
 		}
 
-		rt_mutex_lock(&mmut->mmu_lock);
 		mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
-		rt_mutex_unlock(&mmut->mmu_lock);
 	}
 
+	kbase_mmu_mark_non_movable(pfn_to_page(PFN_DOWN(mmut->pgd)));
 	return 0;
 }
 
@@ -3769,7 +3869,7 @@ void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *
 
 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
 	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
-	    kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+	    kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
 		mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
 #endif
@@ -3794,7 +3894,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 	kbdev = kctx->kbdev;
 	mmu_mode = kbdev->mmu_mode;
 
-	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+	pgd_page = kbase_kmap(pfn_to_page(PFN_DOWN(pgd)));
 	if (!pgd_page) {
 		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
 		return 0;
@@ -3829,7 +3929,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 						target_pgd, level + 1,
 						buffer, size_left);
 				if (!dump_size) {
-					kunmap(pfn_to_page(PFN_DOWN(pgd)));
+					kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
 					return 0;
 				}
 				size += dump_size;
@@ -3837,7 +3937,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
 		}
 	}
 
-	kunmap(pfn_to_page(PFN_DOWN(pgd)));
+	kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
 
 	return size;
 }
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 861a5f4..e13e9b9 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,8 +36,8 @@ struct kbase_va_region;
  * A pointer to this type is passed down from the outer-most callers in the kbase
  * module - where the information resides as to the synchronous / asynchronous
  * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to
- * existing GPU work does it come from requests (like ioctl) from user-space, power management,
- * etc.
+ * existing GPU work or does it come from requests (like ioctl) from user-space, power
+ * management, etc.
  *
  * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice
  *                                  of a 'valid' value
@@ -154,25 +154,43 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				    u64 vpfn, struct tagged_addr *phys, size_t nr,
 				    unsigned long flags, int group_id, u64 *dirty_pgds,
-				    struct kbase_va_region *reg, bool ignore_page_migration);
+				    struct kbase_va_region *reg);
 int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
 			   int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
-			   struct kbase_va_region *reg, bool ignore_page_migration);
-int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
-				    u64 vpfn, struct tagged_addr *phys, size_t nr,
-				    unsigned long flags, int as_nr, int group_id,
-				    enum kbase_caller_mmu_sync_info mmu_sync_info,
-				    struct kbase_va_region *reg);
+			   struct kbase_va_region *reg);
+
+/**
+ * kbase_mmu_insert_pages_skip_status_update - Map 'nr' pages pointed to by 'phys'
+ * at GPU PFN 'vpfn' for GPU address space number 'as_nr'.
+ *
+ * @kbdev:         Instance of GPU platform device, allocated from the probe method.
+ * @mmut:          GPU page tables.
+ * @vpfn:          Start page frame number of the GPU virtual pages to map.
+ * @phys:          Physical address of the page to be mapped.
+ * @nr:            The number of pages to map.
+ * @flags:         Bitmask of attributes of the GPU memory region being mapped.
+ * @as_nr:         The GPU address space number.
+ * @group_id:      The physical memory group in which the page was allocated.
+ * @mmu_sync_info: MMU-synchronous caller info.
+ * @reg:           The region whose physical allocation is to be mapped.
+ *
+ * Similar to kbase_mmu_insert_pages() but skips updating each pages metadata
+ * for page migration.
+ *
+ * Return: 0 if successful, otherwise a negative error code.
+ */
+int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
+					      struct kbase_mmu_table *mmut, u64 vpfn,
+					      struct tagged_addr *phys, size_t nr,
+					      unsigned long flags, int as_nr, int group_id,
+					      enum kbase_caller_mmu_sync_info mmu_sync_info,
+					      struct kbase_va_region *reg);
 int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
 				   u64 vpfn, struct tagged_addr *phys, size_t nr,
 				   unsigned long flags, int as_nr, int group_id,
 				   enum kbase_caller_mmu_sync_info mmu_sync_info,
 				   struct kbase_va_region *reg);
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys,
-				 size_t nr, unsigned long flags, int group_id,
-				 enum kbase_caller_mmu_sync_info mmu_sync_info,
-				 bool ignore_page_migration);
 int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
 					  struct tagged_addr phys, size_t nr, unsigned long flags,
 					  int group_id,
@@ -182,40 +200,16 @@ int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
 					 int group_id,
 					 enum kbase_caller_mmu_sync_info mmu_sync_info);
 
-/**
- * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
- *
- * @kbdev:    Pointer to kbase device.
- * @mmut:     Pointer to GPU MMU page table.
- * @vpfn:     Start page frame number of the GPU virtual pages to unmap.
- * @phys:     Array of physical pages currently mapped to the virtual
- *            pages to unmap, or NULL. This is used for GPU cache maintenance
- *            and page migration support.
- * @nr_phys_pages: Number of physical pages to flush.
- * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
- * @as_nr:    Address space number, for GPU cache maintenance operations
- *            that happen outside a specific kbase context.
- * @ignore_page_migration: Whether page migration metadata should be ignored.
- *
- * We actually discard the ATE and free the page table pages if no valid entries
- * exist in PGD.
- *
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
- *
- * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
- * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
- * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
- * instead of specific physical address ranges.
- *
- * Return: 0 on success, otherwise an error code.
- */
 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
 			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
-			     int as_nr, bool ignore_page_migration);
+			     int as_nr);
+int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+				      u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
+				      size_t nr_virt_pages, int as_nr);
+#define kbase_mmu_teardown_firmware_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages,   \
+					  as_nr)                                                   \
+	kbase_mmu_teardown_imported_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages,   \
+					  as_nr)
 
 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
 			   struct tagged_addr *phys, size_t nr,
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index d53f928..49e050e 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@ enum kbase_mmu_fault_type {
  * struct kbase_mmu_hw_op_param  - parameters for kbase_mmu_hw_do_* functions
  * @vpfn:           MMU Virtual Page Frame Number to start the operation on.
  * @nr:             Number of pages to work on.
- * @op:             Operation type (written to ASn_COMMAND).
+ * @op:             Operation type (written to AS_COMMAND).
  * @kctx_id:        Kernel context ID for MMU command tracepoint.
  * @mmu_sync_info:  Indicates whether this call is synchronous wrt MMU ops.
  * @flush_skip_levels: Page table levels to skip flushing. (Only
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index ecfa23d..ca9f060 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -170,10 +170,10 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
 static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
 {
 	const ktime_t wait_loop_start = ktime_get_raw();
-	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+	const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms;
 	s64 diff;
 
-	if (unlikely(kbdev->as[as_nr].is_unresponsive))
+	if (unlikely(kbdev->mmu_unresponsive))
 		return -EBUSY;
 
 	do {
@@ -181,7 +181,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
 
 		for (i = 0; i < 1000; i++) {
 			/* Wait for the MMU status to indicate there is no active command */
-			if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
+			if (!(kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_STATUS))) &
 			      AS_STATUS_AS_ACTIVE))
 				return 0;
 		}
@@ -192,7 +192,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
 	dev_err(kbdev->dev,
 		"AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system",
 		as_nr);
-	kbdev->as[as_nr].is_unresponsive = true;
+	kbdev->mmu_unresponsive = true;
 	if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
 		kbase_reset_gpu_locked(kbdev);
 
@@ -205,7 +205,7 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
 	const int status = wait_ready(kbdev, as_nr);
 
 	if (likely(status == 0))
-		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
+		kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_COMMAND)), cmd);
 	else if (status == -EBUSY) {
 		dev_dbg(kbdev->dev,
 			"Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u",
@@ -277,9 +277,8 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c
 	 * the workaround can be safely skipped.
 	 */
 	if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
-		if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) {
-			dev_warn(kbdev->dev,
-				 "Unexpected mmu command received");
+		if (unlikely(*mmu_cmd != AS_COMMAND_FLUSH_MEM)) {
+			dev_warn(kbdev->dev, "Unexpected MMU command(%u) received", *mmu_cmd);
 			return -EINVAL;
 		}
 
@@ -341,19 +340,18 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
 		transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
 	}
 
-	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
-			transcfg);
-	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+	kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSCFG_LO)), transcfg);
+	kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSCFG_HI)),
 			(transcfg >> 32) & 0xFFFFFFFFUL);
 
-	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+	kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSTAB_LO)),
 			current_setup->transtab & 0xFFFFFFFFUL);
-	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+	kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSTAB_HI)),
 			(current_setup->transtab >> 32) & 0xFFFFFFFFUL);
 
-	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+	kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_MEMATTR_LO)),
 			current_setup->memattr & 0xFFFFFFFFUL);
-	kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+	kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_MEMATTR_HI)),
 			(current_setup->memattr >> 32) & 0xFFFFFFFFUL);
 
 	KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as,
@@ -401,9 +399,9 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock
 
 	if (!ret) {
 		/* Set the region that needs to be updated */
-		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO),
+		kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_LOCKADDR_LO)),
 				*lock_addr & 0xFFFFFFFFUL);
-		kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI),
+		kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_LOCKADDR_HI)),
 				(*lock_addr >> 32) & 0xFFFFFFFFUL);
 	}
 	return ret;
@@ -490,9 +488,11 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
 	if (likely(!ret)) {
 		u64 lock_addr = 0x0;
 		/* read MMU_AS_CONTROL.LOCKADDR register */
-		lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
+		lock_addr |= (u64)kbase_reg_read(
+				     kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_LOCKADDR_HI)))
 			     << 32;
-		lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO));
+		lock_addr |= (u64)kbase_reg_read(
+			kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_LOCKADDR_LO)));
 
 		mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK,
 				  lock_addr, op_param->mmu_sync_info);
@@ -572,8 +572,14 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
 			ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number);
 		}
 
-		if (ret)
-			return ret;
+		if (ret) {
+			dev_warn(
+				kbdev->dev,
+				"Failed to apply WA for HW issue when doing MMU flush op on VA range %llx-%llx for AS %u",
+				op_param->vpfn << PAGE_SHIFT,
+				((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1, as->number);
+			/* Continue with the MMU flush operation */
+		}
 	}
 #endif
 
@@ -664,7 +670,7 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
 			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
 		pf_bf_mask |= MMU_BUS_ERROR(as->number);
 #endif
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), pf_bf_mask);
 
 unlock:
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
@@ -688,15 +694,15 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
 	if (kbdev->irq_reset_flush)
 		goto unlock;
 
-	irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) |
-			MMU_PAGE_FAULT(as->number);
+	irq_mask =
+		kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)) | MMU_PAGE_FAULT(as->number);
 
 #if !MALI_USE_CSF
 	if (type == KBASE_MMU_FAULT_TYPE_BUS ||
 			type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
 		irq_mask |= MMU_BUS_ERROR(as->number);
 #endif
-	kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask);
+	kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), irq_mask);
 
 unlock:
 	spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
diff --git a/mali_kbase/platform/Kconfig b/mali_kbase/platform/Kconfig
index de4203c..b190e26 100644
--- a/mali_kbase/platform/Kconfig
+++ b/mali_kbase/platform/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
 
 # Add your platform specific Kconfig file here
 #
-# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+# "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/platform/xxx/Kconfig"
 #
 # Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
 #
diff --git a/mali_kbase/platform/meson/mali_kbase_config_platform.h b/mali_kbase/platform/meson/mali_kbase_config_platform.h
index 06279e2..866a7de 100644
--- a/mali_kbase/platform/meson/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/meson/mali_kbase_config_platform.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
  */
 
 /**
- * Power management configuration
+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration
  *
  * Attached value: pointer to @ref kbase_pm_callback_conf
  * Default value: See @ref kbase_pm_callback_conf
@@ -28,7 +28,7 @@
 #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
 
 /**
- * Platform specific configuration functions
+ * PLATFORM_FUNCS - Platform specific configuration functions
  *
  * Attached value: pointer to @ref kbase_platform_funcs_conf
  * Default value: See @ref kbase_platform_funcs_conf
@@ -38,7 +38,7 @@
 extern struct kbase_pm_callback_conf pm_callbacks;
 
 /**
- * Autosuspend delay
+ * AUTO_SUSPEND_DELAY - Autosuspend delay
  *
  * The delay time (in milliseconds) to be used for autosuspend
  */
diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
index c65e6ce..75f3c2a 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
@@ -119,7 +119,7 @@ static void get_fw_trace(struct kbase_device *kbdev, struct sscd_segment *seg)
 		.version = 1,
 	};
 
-	tb = kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+	tb = kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
 
 	if (tb == NULL) {
 		dev_err(kbdev->dev, "pixel: failed to open firmware trace buffer");
diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild
index 38e4dd4..72ca70a 100644
--- a/mali_kbase/tests/Kbuild
+++ b/mali_kbase/tests/Kbuild
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -17,6 +17,7 @@
 # http://www.gnu.org/licenses/gpl-2.0.html.
 #
 #
+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
 
 ccflags-y += -I$(src)/include \
              -I$(src)
@@ -29,3 +30,4 @@ obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/
 obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/
 obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/
 
+
diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig
index e6f0376..f100901 100644
--- a/mali_kbase/tests/Kconfig
+++ b/mali_kbase/tests/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -65,5 +65,6 @@ config MALI_KUTF_MGM_INTEGRATION_TEST
 	    - mali_kutf_mgm_integration_test.ko
 
 
+
 comment "Enable MALI_DEBUG for KUTF modules support"
 	depends on MALI_MIDGARD && !MALI_DEBUG && MALI_KUTF
diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig
index d81c639..aa09274 100644
--- a/mali_kbase/tests/Mconfig
+++ b/mali_kbase/tests/Mconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 #
-# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
 #
 # This program is free software and is provided to you under the terms of the
 # GNU General Public License version 2 as published by the Free Software
@@ -65,6 +65,7 @@ config MALI_KUTF_MGM_INTEGRATION_TEST
 	  - mali_kutf_mgm_integration_test.ko
 
 
+
 # Enable MALI_DEBUG for KUTF modules support
 
 config UNIT_TEST_KERNEL_MODULES
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index a6f54b6..8b86fb0 100644
--- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -442,8 +442,9 @@ static const char *kutf_clk_trace_do_get_platform(
 #if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
 	struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
 
-	arbiter_if_node =
-		of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL);
+	arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter-if", NULL);
+	if (!arbiter_if_node)
+		arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL);
 #endif
 	if (arbiter_if_node) {
 		power_node = of_find_compatible_node(NULL, NULL,
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index 1e636b9..20f7496 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -303,8 +303,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 	 * is no free region at the address found originally by too large a
 	 * same_va_end_addr here, and will fail the allocation gracefully.
 	 */
-	struct kbase_reg_zone *zone =
-		kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
+	struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, SAME_VA_ZONE);
 	u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
 #if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
 	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
@@ -386,7 +385,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
 #ifndef CONFIG_64BIT
 	} else {
 		return current->mm->get_unmapped_area(
-			kctx->filp, addr, len, pgoff, flags);
+			kctx->kfile->filp, addr, len, pgoff, flags);
 #endif
 	}
author	Jörg Wagner <jorwag@google.com>	2023-08-31 19:15:13 +0000
committer	Jörg Wagner <jorwag@google.com>	2023-09-01 09:13:55 +0000
commit	b6fd708b3a4da86a196a61592ea3585f1aca7313 (patch)
tree	1cbe3029a45bf9869c17a5b6954e5ae074b44ac8
parent	46edf1b5965d872c5f8a09c6dc3dcbff58f78a92 (diff)
parent	e61eb93296e9f940b32d4ad4b0c3a5557cbeaf17 (diff)
download	gpu-b6fd708b3a4da86a196a61592ea3585f1aca7313.tar.gz