summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJörg Wagner <jorwag@google.com>2023-08-31 19:15:13 +0000
committerJörg Wagner <jorwag@google.com>2023-09-01 09:13:55 +0000
commitb6fd708b3a4da86a196a61592ea3585f1aca7313 (patch)
tree1cbe3029a45bf9869c17a5b6954e5ae074b44ac8
parent46edf1b5965d872c5f8a09c6dc3dcbff58f78a92 (diff)
parente61eb93296e9f940b32d4ad4b0c3a5557cbeaf17 (diff)
downloadgpu-b6fd708b3a4da86a196a61592ea3585f1aca7313.tar.gz
Merge r44p1-00dev3 from partner/upstream into android13-gs-pixel-5.10-udc-qpr1
Bug: 290882327 Change-Id: I90723cbaa3f294431087587fd8025f0688e51bf2
-rw-r--r--common/include/linux/version_compat_defs.h170
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h2
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h66
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h7
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h27
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h69
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h7
-rw-r--r--mali_kbase/BUILD.bazel55
-rw-r--r--mali_kbase/Kbuild10
-rw-r--r--mali_kbase/Kconfig36
-rw-r--r--mali_kbase/Makefile330
-rw-r--r--mali_kbase/Mconfig52
-rw-r--r--mali_kbase/backend/gpu/Kbuild4
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c24
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h11
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c12
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_irq_linux.c12
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_hw.c14
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_internal.h2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c129
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_js_backend.c78
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c5
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c65
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.c7
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c74
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c220
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_internal.h43
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_policy.c18
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c235
-rw-r--r--mali_kbase/build.bp15
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_csf.c18
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_jm.c17
-rw-r--r--mali_kbase/context/mali_kbase_context.c6
-rw-r--r--mali_kbase/context/mali_kbase_context.h7
-rw-r--r--mali_kbase/csf/Kbuild3
-rw-r--r--mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c134
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c527
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h58
-rw-r--r--mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c27
-rw-r--r--mali_kbase/csf/mali_kbase_csf_csg_debugfs.c45
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h140
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c321
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h20
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.c99
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.h34
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c9
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_log.c149
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c69
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c419
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h41
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c151
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.h (renamed from mali_kbase/mali_kbase_bits.h)29
-rw-r--r--mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c37
-rw-r--r--mali_kbase/csf/mali_kbase_csf_registers.h86
-rw-r--r--mali_kbase/csf/mali_kbase_csf_reset_gpu.c13
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c833
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.h93
-rw-r--r--mali_kbase/csf/mali_kbase_csf_sync_debugfs.c358
-rw-r--r--mali_kbase/csf/mali_kbase_csf_sync_debugfs.h27
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap.c4
-rw-r--r--mali_kbase/csf/mali_kbase_csf_timeout.c15
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.c11
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.c50
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.h24
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c28
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_csf.c4
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_jm.c7
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c15
-rw-r--r--mali_kbase/device/mali_kbase_device.c28
-rw-r--r--mali_kbase/device/mali_kbase_device.h3
-rw-r--r--mali_kbase/device/mali_kbase_device_hw.c83
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h86
-rw-r--r--mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h95
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu.c4
-rw-r--r--mali_kbase/gpu/mali_kbase_gpu_regmap.h126
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c10
-rw-r--r--mali_kbase/jm/mali_kbase_jm_defs.h13
-rw-r--r--mali_kbase/jm/mali_kbase_js_defs.h24
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h4
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h2
-rw-r--r--mali_kbase/mali_kbase.h146
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h37
-rw-r--r--mali_kbase/mali_kbase_core_linux.c609
-rw-r--r--mali_kbase/mali_kbase_ctx_sched.c5
-rw-r--r--mali_kbase/mali_kbase_debug_mem_allocs.c23
-rw-r--r--mali_kbase/mali_kbase_debug_mem_view.c49
-rw-r--r--mali_kbase/mali_kbase_debug_mem_zones.c35
-rw-r--r--mali_kbase/mali_kbase_defs.h231
-rw-r--r--mali_kbase/mali_kbase_dummy_job_wa.c14
-rw-r--r--mali_kbase/mali_kbase_fence.h13
-rw-r--r--mali_kbase/mali_kbase_fence_ops.c6
-rw-r--r--mali_kbase/mali_kbase_gpu_metrics.c260
-rw-r--r--mali_kbase/mali_kbase_gpu_metrics.h167
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c4
-rw-r--r--mali_kbase/mali_kbase_gwt.c6
-rw-r--r--mali_kbase/mali_kbase_hwaccess_time.h43
-rw-r--r--mali_kbase/mali_kbase_js.c157
-rw-r--r--mali_kbase/mali_kbase_kinstr_prfcnt.c11
-rw-r--r--mali_kbase/mali_kbase_mem.c1066
-rw-r--r--mali_kbase/mali_kbase_mem.h548
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c242
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.c93
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.h10
-rw-r--r--mali_kbase/mali_kbase_mem_pool.c43
-rw-r--r--mali_kbase/mali_kbase_pbha.c53
-rw-r--r--mali_kbase/mali_kbase_pm.c26
-rw-r--r--mali_kbase/mali_kbase_pm.h9
-rw-r--r--mali_kbase/mali_kbase_softjobs.c12
-rw-r--r--mali_kbase/mali_kbase_strings.h23
-rw-r--r--mali_kbase/mali_kbase_utility.h52
-rw-r--r--mali_kbase/mali_kbase_vinstr.c10
-rw-r--r--mali_kbase/mali_linux_trace.h4
-rw-r--r--mali_kbase/mali_power_gpu_work_period_trace.c (renamed from mali_kbase/mali_kbase_strings.c)16
-rw-r--r--mali_kbase/mali_power_gpu_work_period_trace.h88
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_csf.c48
-rw-r--r--mali_kbase/mmu/backend/mali_kbase_mmu_jm.c31
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c294
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.h86
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw.h4
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_hw_direct.c56
-rw-r--r--mali_kbase/platform/Kconfig4
-rw-r--r--mali_kbase/platform/meson/mali_kbase_config_platform.h8
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_sscd.c2
-rw-r--r--mali_kbase/tests/Kbuild4
-rw-r--r--mali_kbase/tests/Kconfig3
-rw-r--r--mali_kbase/tests/Mconfig3
-rw-r--r--mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c7
-rw-r--r--mali_kbase/thirdparty/mali_kbase_mmap.c5
129 files changed, 7422 insertions, 3458 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h
index c9b1f62..47551f2 100644
--- a/common/include/linux/version_compat_defs.h
+++ b/common/include/linux/version_compat_defs.h
@@ -23,6 +23,21 @@
#define _VERSION_COMPAT_DEFS_H_
#include <linux/version.h>
+#include <linux/highmem.h>
+#include <linux/timer.h>
+
+#if (KERNEL_VERSION(4, 4, 267) < LINUX_VERSION_CODE)
+#include <linux/overflow.h>
+#endif
+
+#include <linux/bitops.h>
+#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
+#include <linux/bits.h>
+#endif
+
+#ifndef BITS_PER_TYPE
+#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE)
+#endif
#if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
typedef unsigned int __poll_t;
@@ -62,18 +77,167 @@ typedef unsigned int __poll_t;
/* Replace the default definition with CONFIG_LSM_MMAP_MIN_ADDR */
#undef kbase_mmap_min_addr
#define kbase_mmap_min_addr CONFIG_LSM_MMAP_MIN_ADDR
-#pragma message "kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update!"
+#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \
+ "* MALI kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update possible! *"
#endif /* (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR) */
#endif /* CONFIG_LSM_MMAP_MIN_ADDR */
#if (kbase_mmap_min_addr == CONFIG_DEFAULT_MMAP_MIN_ADDR)
-#pragma message "kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update!"
+#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \
+ "* MALI kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update possible! *"
#endif
#else /* CONFIG_MMU */
#define kbase_mmap_min_addr (0UL)
-#pragma message "kbase_mmap_min_addr compiled to (0UL), no runtime update!"
+#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \
+ "* MALI kbase_mmap_min_addr compiled to (0UL), no runtime update possible! *"
#endif /* CONFIG_MMU */
#endif /* KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE */
+static inline void kbase_timer_setup(struct timer_list *timer,
+ void (*callback)(struct timer_list *timer))
+{
+#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE
+ setup_timer(timer, (void (*)(unsigned long))callback, (unsigned long)timer);
+#else
+ timer_setup(timer, callback, 0);
+#endif
+}
+
+#ifndef WRITE_ONCE
+#ifdef ASSIGN_ONCE
+#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
+#else
+#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+#endif
+#endif
+
+#ifndef READ_ONCE
+#define READ_ONCE(x) ACCESS_ONCE(x)
+#endif
+
+static inline void *kbase_kmap(struct page *p)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+ return kmap_local_page(p);
+#else
+ return kmap(p);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+static inline void *kbase_kmap_atomic(struct page *p)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+ return kmap_local_page(p);
+#else
+ return kmap_atomic(p);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+static inline void kbase_kunmap(struct page *p, void *address)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+ kunmap_local(address);
+#else
+ kunmap(p);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+static inline void kbase_kunmap_atomic(void *address)
+{
+#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE
+ kunmap_local(address);
+#else
+ kunmap_atomic(address);
+#endif /* KERNEL_VERSION(5, 11, 0) */
+}
+
+/* Some of the older 4.4 kernel patch versions do
+ * not contain the overflow check functions. However,
+ * they are based on compiler instrinsics, so they
+ * are simple to reproduce.
+ */
+#if (KERNEL_VERSION(4, 4, 267) >= LINUX_VERSION_CODE)
+/* Some of the older 4.4 kernel patch versions do
+ * not contain the overflow check functions. However,
+ * they are based on compiler instrinsics, so they
+ * are simple to reproduce.
+ */
+#define check_mul_overflow(a, b, d) __builtin_mul_overflow(a, b, d)
+#endif
+
+/*
+ * There was a big rename in the 4.10 kernel (fence* -> dma_fence*),
+ * with most of the related functions keeping the same signatures.
+ */
+
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+
+#include <linux/fence.h>
+
+#define dma_fence fence
+#define dma_fence_ops fence_ops
+#define dma_fence_context_alloc(a) fence_context_alloc(a)
+#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e)
+#define dma_fence_get(a) fence_get(a)
+#define dma_fence_put(a) fence_put(a)
+#define dma_fence_signal(a) fence_signal(a)
+#define dma_fence_is_signaled(a) fence_is_signaled(a)
+#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c)
+#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b)
+#define dma_fence_default_wait fence_default_wait
+
+#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0)
+#else
+#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
+
+#else
+
+#include <linux/dma-fence.h>
+
+#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
+#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? (a)->status ?: 1 : 0)
+#endif
+
+#endif /* < 4.10.0 */
+
+static inline void dma_fence_set_error_helper(
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence,
+#else
+ struct dma_fence *fence,
+#endif
+ int error)
+{
+#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
+ dma_fence_set_error(fence, error);
+#elif (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
+ KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
+ fence_set_error(fence, error);
+#else
+ fence->status = error;
+#endif
+}
+
+#include <linux/mm.h>
+#if !((KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) || \
+ ((KERNEL_VERSION(6, 1, 25) <= LINUX_VERSION_CODE) && defined(__ANDROID_COMMON_KERNEL__)))
+static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags)
+{
+ vma->vm_flags |= flags;
+}
+static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags)
+{
+ vma->vm_flags &= ~flags;
+}
+#endif
+
+#if (KERNEL_VERSION(6, 4, 0) <= LINUX_VERSION_CODE)
+#define KBASE_CLASS_CREATE(owner, name) class_create(name)
+#else
+#define KBASE_CLASS_CREATE(owner, name) class_create(owner, name)
+#endif
+
#endif /* _VERSION_COMPAT_DEFS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
index c6f6ff1..a8e5802 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
@@ -177,7 +177,7 @@ enum base_kcpu_command_type {
BASE_KCPU_COMMAND_TYPE_JIT_ALLOC,
BASE_KCPU_COMMAND_TYPE_JIT_FREE,
BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND,
- BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER
+ BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER,
};
/**
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
index 7c37cfc..c9de5fd 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -82,10 +82,18 @@
* - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
* before allocating GPU memory for the context.
* - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 1.19:
+ * - Add NE support in queue_group_create IOCTL fields
+ * - Previous version retained as KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 for
+ * backward compatibility.
+ * 1.20:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ * read, poll) on the file descriptor of mali device file that was inherited
+ * from the parent process.
*/
#define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 18
+#define BASE_UK_VERSION_MINOR 20
/**
* struct kbase_ioctl_version_check - Check version compatibility between
@@ -258,6 +266,56 @@ union kbase_ioctl_cs_queue_group_create_1_6 {
_IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6)
/**
+ * union kbase_ioctl_cs_queue_group_create_1_18 - Create a GPU command queue group
+ * @in: Input parameters
+ * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use.
+ * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use.
+ * @in.compute_mask: Mask of compute endpoints the group is allowed to use.
+ * @in.cs_min: Minimum number of CSs required.
+ * @in.priority: Queue group's priority within a process.
+ * @in.tiler_max: Maximum number of tiler endpoints the group is allowed
+ * to use.
+ * @in.fragment_max: Maximum number of fragment endpoints the group is
+ * allowed to use.
+ * @in.compute_max: Maximum number of compute endpoints the group is allowed
+ * to use.
+ * @in.csi_handlers: Flags to signal that the application intends to use CSI
+ * exception handlers in some linear buffers to deal with
+ * the given exception types.
+ * @in.padding: Currently unused, must be zero
+ * @out: Output parameters
+ * @out.group_handle: Handle of a newly created queue group.
+ * @out.padding: Currently unused, must be zero
+ * @out.group_uid: UID of the queue group available to base.
+ */
+union kbase_ioctl_cs_queue_group_create_1_18 {
+ struct {
+ __u64 tiler_mask;
+ __u64 fragment_mask;
+ __u64 compute_mask;
+ __u8 cs_min;
+ __u8 priority;
+ __u8 tiler_max;
+ __u8 fragment_max;
+ __u8 compute_max;
+ __u8 csi_handlers;
+ __u8 padding[2];
+ /**
+ * @in.dvs_buf: buffer for deferred vertex shader
+ */
+ __u64 dvs_buf;
+ } in;
+ struct {
+ __u8 group_handle;
+ __u8 padding[3];
+ __u32 group_uid;
+ } out;
+};
+
+#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 \
+ _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create_1_18)
+
+/**
* union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group
* @in: Input parameters
* @in.tiler_mask: Mask of tiler endpoints the group is allowed to use.
@@ -291,11 +349,15 @@ union kbase_ioctl_cs_queue_group_create {
__u8 fragment_max;
__u8 compute_max;
__u8 csi_handlers;
- __u8 padding[2];
+ /**
+ * @in.reserved: Reserved, currently unused, must be zero.
+ */
+ __u16 reserved;
/**
* @in.dvs_buf: buffer for deferred vertex shader
*/
__u64 dvs_buf;
+ __u64 padding[9];
} in;
struct {
__u8 group_handle;
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
index 0ca5d90..eaa4b2d 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,11 +22,6 @@
#ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_
#define _UAPI_KBASE_GPU_REGMAP_CSF_H_
-/* IPA control registers */
-#define IPA_CONTROL_BASE 0x40000
-#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r))
-#define STATUS 0x004 /* (RO) Status register */
-
/* USER base address */
#define USER_BASE 0x0010000
#define USER_REG(r) (USER_BASE + (r))
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
index 9bfd6d2..d24afcc 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,29 +22,4 @@
#ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_
#define _UAPI_KBASE_GPU_REGMAP_JM_H_
-/* GPU control registers */
-
-#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */
-
-/* Job control registers */
-
-#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
-#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
-#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
-#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
-#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
-#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
-#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
-
-#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
-#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
-#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
-#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
-#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
-#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
-
-#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
-
-#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
-
#endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
index 1f33167..8256191 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,71 +28,4 @@
#include "backend/mali_kbase_gpu_regmap_jm.h"
#endif /* !MALI_USE_CSF */
-/* Begin Register Offsets */
-/* GPU control registers */
-
-#define GPU_CONTROL_BASE 0x0000
-#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
-
-#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
-
-#define GPU_IRQ_CLEAR 0x024 /* (WO) */
-#define GPU_IRQ_STATUS 0x02C /* (RO) */
-
-#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
-#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
-
-#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
-#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
-
-#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
-#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
-
-#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
-#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
-
-#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
-#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
-
-#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
-#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
-
-/* Job control registers */
-
-#define JOB_CONTROL_BASE 0x1000
-
-#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
-
-#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
-#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
-#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
-
-/* MMU control registers */
-
-#define MEMORY_MANAGEMENT_BASE 0x2000
-
-#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r))
-
-#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */
-#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
-#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
-#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
-
-#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
-
-/* MMU address space control registers */
-
-#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r))
-
-#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
-#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
-#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
-#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
-#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
-
-/* (RW) Translation table configuration for address space n, low word */
-#define AS_TRANSCFG_LO 0x30
-/* (RW) Translation table configuration for address space n, high word */
-#define AS_TRANSCFG_HI 0x34
-
#endif /* _UAPI_KBASE_GPU_REGMAP_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
index ac6affe..f2329f9 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -143,9 +143,14 @@
* - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
* before allocating GPU memory for the context.
* - CPU mappings of USER_BUFFER imported memory handles must be cached.
+ * 11.39:
+ * - Restrict child process from doing supported file operations (like mmap, ioctl,
+ * read, poll) on the file descriptor of mali device file that was inherited
+ * from the parent process.
*/
+
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 38
+#define BASE_UK_VERSION_MINOR 39
/**
* struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/mali_kbase/BUILD.bazel b/mali_kbase/BUILD.bazel
index e38f617..54dd437 100644
--- a/mali_kbase/BUILD.bazel
+++ b/mali_kbase/BUILD.bazel
@@ -1,27 +1,45 @@
-# NOTE: THIS FILE IS EXPERIMENTAL FOR THE BAZEL MIGRATION AND NOT USED FOR
-# YOUR BUILDS CURRENTLY.
+# This program is free software and is provided to you under the terms of the
+# GNU General Public License version 2 as published by the Free Software
+# Foundation, and any use by you of this program is subject to the terms
+# of such GNU license.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
#
-# It is not yet the source of truth for your build. If you're looking to modify
-# the build file, modify the Android.bp file instead. Do *not* modify this file
-# unless you have coordinated with the team managing the Soong to Bazel
-# migration.
-load("//build/kleaf:kernel.bzl", "kernel_module")
+load(
+ "//build/kernel/kleaf:kernel.bzl",
+ "kernel_module",
+)
+
+_midgard_modules = [
+ "mali_kbase.ko",
+ "tests/kutf/mali_kutf.ko",
+ "tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test_portal.ko",
+]
kernel_module(
name = "mali_kbase.cloudripper",
srcs = glob([
"**/*.c",
"**/*.h",
- "**/Kbuild",
+ "**/*Kbuild",
+ "**/*Makefile",
]) + [
+ "//common:kernel_headers",
+ "//common-modules/mali:headers",
+ "//common-modules/mali/drivers/gpu/arm/arbitration",
+ "//common-modules/mali/drivers/xen/arm:xen",
"//private/google-modules/gpu/common:headers",
],
- outs = [
- "mali_kbase.ko",
- "tests/kutf/mali_kutf.ko",
- "tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test_portal.ko",
- ],
+ outs = _midgard_modules,
kernel_build = "//private/gs-google:cloudripper",
visibility = [
"//private/gs-google:__pkg__",
@@ -30,3 +48,14 @@ kernel_module(
"//private/google-modules/gpu/mali_pixel",
],
)
+
+filegroup(
+ name = "midgard_kconfig.cloudripper",
+ srcs = glob([
+ "**/*Kconfig",
+ ]),
+ visibility = [
+ "//common:__pkg__",
+ "//common-modules/mali:__subpackages__",
+ ],
+)
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 9da4141..ff0a0de 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -68,12 +68,11 @@ endif
# Configurations
#
-# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r43p0-01eac0"'
-
# We are building for Pixel
CONFIG_MALI_PLATFORM_NAME="pixel"
+# Driver version string which is returned to userspace via an ioctl
+MALI_RELEASE_NAME ?= '"r44p1-00dev3"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_DEBUG), y)
MALI_UNIT_TEST = 1
@@ -191,7 +190,6 @@ mali_kbase-y := \
mali_kbase_mem_pool.o \
mali_kbase_mem_pool_debugfs.o \
mali_kbase_debugfs_helper.o \
- mali_kbase_strings.o \
mali_kbase_as_fault_debugfs.o \
mali_kbase_regs_history_debugfs.o \
mali_kbase_dvfs_debugfs.o \
@@ -208,6 +206,10 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \
mali_kbase_sync_file.o \
mali_kbase_sync_common.o
+mali_kbase-$(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) += \
+ mali_power_gpu_work_period_trace.o \
+ mali_kbase_gpu_metrics.o
+
ifneq ($(CONFIG_MALI_CSF_SUPPORT),y)
mali_kbase-y += \
mali_kbase_jm.o \
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index 46e3546..bb25ef4 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -65,11 +65,18 @@ config MALI_NO_MALI
All calls to the simulated hardware will complete immediately as if the hardware
completed the task.
+config MALI_NO_MALI_DEFAULT_GPU
+ string "Default GPU for No Mali"
+ depends on MALI_NO_MALI
+ default "tMIx"
+ help
+ This option sets the default GPU to identify as for No Mali builds.
+
endchoice
menu "Platform specific options"
-source "drivers/gpu/arm/midgard/platform/Kconfig"
+source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/platform/Kconfig"
endmenu
config MALI_CSF_SUPPORT
@@ -193,6 +200,22 @@ config LARGE_PAGE_ALLOC
If in doubt, say N
+config PAGE_MIGRATION_SUPPORT
+ bool "Enable support for page migration"
+ depends on MALI_MIDGARD && MALI_EXPERT
+ default y
+ default n if ANDROID
+ help
+ Compile in support for page migration.
+ If set to disabled ('n') then page migration cannot
+ be enabled at all, and related symbols are not compiled in.
+ If not set, page migration is compiled in by default, and
+ if not explicitly enabled or disabled with the insmod parameter,
+ page migration becomes automatically enabled with large pages.
+
+ If in doubt, say Y. To strip out page migration symbols and support,
+ say N.
+
config MALI_MEMORY_FULLY_BACKED
bool "Enable memory fully physically-backed"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -395,7 +418,16 @@ config MALI_ARBITRATION
virtualization setup for Mali
If unsure, say N.
+config MALI_TRACE_POWER_GPU_WORK_PERIOD
+ bool "Enable per-application GPU metrics tracepoints"
+ depends on MALI_MIDGARD
+ default y
+ help
+ This option enables per-application GPU metrics tracepoints.
+
+ If unsure, say N.
+
-source "drivers/gpu/arm/midgard/tests/Kconfig"
+source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/tests/Kconfig"
endif
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index d851653..59b306b 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -20,8 +20,6 @@
KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build
KDIR ?= $(KERNEL_SRC)
-
-# Ensure build intermediates are in OUT_DIR instead of alongside the source
M ?= $(shell pwd)
ifeq ($(KDIR),)
@@ -39,6 +37,7 @@ CONFIG_MALI_SYSTEM_TRACE=y
# Core kbase configuration options
CONFIG_MALI_EXPERT=y
CONFIG_MALI_MIDGARD_DVFS=y
+CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD = y
# Pixel integration specific configuration options
CONFIG_MALI_PLATFORM_NAME="pixel"
@@ -54,164 +53,176 @@ CONFIG_MALI_PIXEL_GPU_SLC ?= y
# Dependency resolution is done through statements as Kconfig
# is not supported for out-of-tree builds.
#
+CONFIGS :=
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+ CONFIG_MALI_MIDGARD ?= m
+ ifeq ($(CONFIG_MALI_MIDGARD),m)
+ CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
+ CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD ?= y
+ CONFIG_MALI_GATOR_SUPPORT ?= y
+ CONFIG_MALI_ARBITRATION ?= n
+ CONFIG_MALI_PARTITION_MANAGER ?= n
+
+ ifneq ($(CONFIG_MALI_NO_MALI),y)
+ # Prevent misuse when CONFIG_MALI_NO_MALI=y
+ CONFIG_MALI_REAL_HW ?= y
+ CONFIG_MALI_CORESIGHT = n
+ endif
-CONFIG_MALI_MIDGARD ?= m
-ifeq ($(CONFIG_MALI_MIDGARD),m)
- CONFIG_MALI_PLATFORM_NAME ?= "devicetree"
- CONFIG_MALI_GATOR_SUPPORT ?= y
- CONFIG_MALI_ARBITRATION ?= n
- CONFIG_MALI_PARTITION_MANAGER ?= n
-
- ifneq ($(CONFIG_MALI_NO_MALI),y)
- # Prevent misuse when CONFIG_MALI_NO_MALI=y
- CONFIG_MALI_REAL_HW ?= y
- CONFIG_MALI_CORESIGHT = n
- endif
-
- ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y)
- # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y
- CONFIG_MALI_DEVFREQ ?= n
- else
- CONFIG_MALI_DEVFREQ ?= y
- endif
+ ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y)
+ # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y
+ CONFIG_MALI_DEVFREQ ?= n
+ else
+ CONFIG_MALI_DEVFREQ ?= y
+ endif
- ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y)
- # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y
- CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
- endif
+ ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y)
+ # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y
+ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n
+ endif
- ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
- CONFIG_MALI_CORESIGHT ?= n
- endif
+ ifeq ($(CONFIG_MALI_CSF_SUPPORT), y)
+ CONFIG_MALI_CORESIGHT ?= n
+ endif
- #
- # Expert/Debug/Test released configurations
- #
- ifeq ($(CONFIG_MALI_EXPERT), y)
- ifeq ($(CONFIG_MALI_NO_MALI), y)
- CONFIG_MALI_REAL_HW = n
+ #
+ # Expert/Debug/Test released configurations
+ #
+ ifeq ($(CONFIG_MALI_EXPERT), y)
+ ifeq ($(CONFIG_MALI_NO_MALI), y)
+ CONFIG_MALI_REAL_HW = n
+ CONFIG_MALI_NO_MALI_DEFAULT_GPU ?= "tMIx"
- else
- # Prevent misuse when CONFIG_MALI_NO_MALI=n
- CONFIG_MALI_REAL_HW = y
- CONFIG_MALI_ERROR_INJECT = n
- endif
+ else
+ # Prevent misuse when CONFIG_MALI_NO_MALI=n
+ CONFIG_MALI_REAL_HW = y
+ CONFIG_MALI_ERROR_INJECT = n
+ endif
- ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
- # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
- CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
- endif
+ ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y)
+ # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y
+ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+ endif
- ifeq ($(CONFIG_MALI_DEBUG), y)
- CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y
- CONFIG_MALI_SYSTEM_TRACE ?= y
+ ifeq ($(CONFIG_MALI_DEBUG), y)
+ CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y
+ CONFIG_MALI_SYSTEM_TRACE ?= y
- ifeq ($(CONFIG_SYNC_FILE), y)
- CONFIG_MALI_FENCE_DEBUG ?= y
+ ifeq ($(CONFIG_SYNC_FILE), y)
+ CONFIG_MALI_FENCE_DEBUG ?= y
+ else
+ CONFIG_MALI_FENCE_DEBUG = n
+ endif
else
+ # Prevent misuse when CONFIG_MALI_DEBUG=n
+ CONFIG_MALI_MIDGARD_ENABLE_TRACE = n
+ CONFIG_MALI_SYSTEM_TRACE = n
CONFIG_MALI_FENCE_DEBUG = n
endif
else
- # Prevent misuse when CONFIG_MALI_DEBUG=n
+ # Prevent misuse when CONFIG_MALI_EXPERT=n
+ CONFIG_MALI_CORESTACK = n
+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
+ CONFIG_LARGE_PAGE_ALLOC = n
+ CONFIG_MALI_PWRSOFT_765 = n
+ CONFIG_MALI_MEMORY_FULLY_BACKED = n
+ CONFIG_MALI_JOB_DUMP = n
+ CONFIG_MALI_NO_MALI = n
+ CONFIG_MALI_REAL_HW = y
+ CONFIG_MALI_ERROR_INJECT = n
+ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
+ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
+ CONFIG_MALI_HOST_CONTROLS_SC_RAILS = n
+ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
+ CONFIG_MALI_DEBUG = n
CONFIG_MALI_MIDGARD_ENABLE_TRACE = n
CONFIG_MALI_FENCE_DEBUG = n
endif
- else
- # Prevent misuse when CONFIG_MALI_EXPERT=n
- CONFIG_MALI_CORESTACK = n
- CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n
- CONFIG_LARGE_PAGE_ALLOC = n
- CONFIG_MALI_PWRSOFT_765 = n
- CONFIG_MALI_MEMORY_FULLY_BACKED = n
- CONFIG_MALI_JOB_DUMP = n
- CONFIG_MALI_NO_MALI = n
- CONFIG_MALI_REAL_HW = y
- CONFIG_MALI_ERROR_INJECT = n
- CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
- CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
- CONFIG_MALI_HOST_CONTROLS_SC_RAILS = n
- CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
- CONFIG_MALI_DEBUG = n
- CONFIG_MALI_MIDGARD_ENABLE_TRACE = n
- CONFIG_MALI_FENCE_DEBUG = n
- endif
- ifeq ($(CONFIG_MALI_DEBUG), y)
- CONFIG_MALI_KUTF ?= y
- ifeq ($(CONFIG_MALI_KUTF), y)
- CONFIG_MALI_KUTF_IRQ_TEST ?= y
- CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
- CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
+ ifeq ($(CONFIG_MALI_DEBUG), y)
+ CONFIG_MALI_KUTF ?= y
+ ifeq ($(CONFIG_MALI_KUTF), y)
+ CONFIG_MALI_KUTF_IRQ_TEST ?= y
+ CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y
+ ifeq ($(CONFIG_MALI_DEVFREQ), y)
+ ifeq ($(CONFIG_MALI_NO_MALI), y)
+ CONFIG_MALI_KUTF_IPA_UNIT_TEST ?= y
+ endif
+ endif
+
+ else
+ # Prevent misuse when CONFIG_MALI_KUTF=n
+ CONFIG_MALI_KUTF_IRQ_TEST = n
+ CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
+ endif
else
- # Prevent misuse when CONFIG_MALI_KUTF=n
+ # Prevent misuse when CONFIG_MALI_DEBUG=n
+ CONFIG_MALI_KUTF = y
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
else
- # Prevent misuse when CONFIG_MALI_DEBUG=n
- CONFIG_MALI_KUTF = y
+ # Prevent misuse when CONFIG_MALI_MIDGARD=n
+ CONFIG_MALI_ARBITRATION = n
+ CONFIG_MALI_KUTF = n
CONFIG_MALI_KUTF_IRQ_TEST = n
CONFIG_MALI_KUTF_CLK_RATE_TRACE = y
CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
endif
-else
- # Prevent misuse when CONFIG_MALI_MIDGARD=n
- CONFIG_MALI_ARBITRATION = n
- CONFIG_MALI_KUTF = n
- CONFIG_MALI_KUTF_IRQ_TEST = n
- CONFIG_MALI_KUTF_CLK_RATE_TRACE = n
- CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n
-endif
-# All Mali CONFIG should be listed here
-CONFIGS := \
- CONFIG_MALI_MIDGARD \
- CONFIG_MALI_GATOR_SUPPORT \
- CONFIG_MALI_ARBITER_SUPPORT \
- CONFIG_MALI_ARBITRATION \
- CONFIG_MALI_PARTITION_MANAGER \
- CONFIG_MALI_REAL_HW \
- CONFIG_MALI_DEVFREQ \
- CONFIG_MALI_MIDGARD_DVFS \
- CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
- CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
- CONFIG_MALI_EXPERT \
- CONFIG_MALI_CORESTACK \
- CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
- CONFIG_LARGE_PAGE_ALLOC \
- CONFIG_MALI_PWRSOFT_765 \
- CONFIG_MALI_MEMORY_FULLY_BACKED \
- CONFIG_MALI_JOB_DUMP \
- CONFIG_MALI_NO_MALI \
- CONFIG_MALI_ERROR_INJECT \
- CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
- CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
- CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
- CONFIG_MALI_PRFCNT_SET_PRIMARY \
- CONFIG_MALI_PRFCNT_SET_SECONDARY \
- CONFIG_MALI_PRFCNT_SET_TERTIARY \
- CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \
- CONFIG_MALI_DEBUG \
- CONFIG_MALI_MIDGARD_ENABLE_TRACE \
- CONFIG_MALI_SYSTEM_TRACE \
- CONFIG_MALI_FENCE_DEBUG \
- CONFIG_MALI_KUTF \
- CONFIG_MALI_KUTF_IRQ_TEST \
- CONFIG_MALI_KUTF_CLK_RATE_TRACE \
- CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
- CONFIG_MALI_XEN \
- CONFIG_MALI_CORESIGHT
-
-# Pixel integration CONFIG options
-CONFIGS += \
- CONFIG_MALI_PIXEL_GPU_QOS \
- CONFIG_MALI_PIXEL_GPU_BTS \
- CONFIG_MALI_PIXEL_GPU_THERMAL \
- CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING \
- CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
- CONFIG_MALI_PIXEL_GPU_SLC
+ # All Mali CONFIG should be listed here
+ CONFIGS := \
+ CONFIG_MALI_MIDGARD \
+ CONFIG_MALI_GATOR_SUPPORT \
+ CONFIG_MALI_ARBITER_SUPPORT \
+ CONFIG_MALI_ARBITRATION \
+ CONFIG_MALI_PARTITION_MANAGER \
+ CONFIG_MALI_REAL_HW \
+ CONFIG_MALI_DEVFREQ \
+ CONFIG_MALI_MIDGARD_DVFS \
+ CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \
+ CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \
+ CONFIG_MALI_EXPERT \
+ CONFIG_MALI_CORESTACK \
+ CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \
+ CONFIG_LARGE_PAGE_ALLOC \
+ CONFIG_MALI_PWRSOFT_765 \
+ CONFIG_MALI_MEMORY_FULLY_BACKED \
+ CONFIG_MALI_JOB_DUMP \
+ CONFIG_MALI_NO_MALI \
+ CONFIG_MALI_ERROR_INJECT \
+ CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
+ CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
+ CONFIG_MALI_HOST_CONTROLS_SC_RAILS \
+ CONFIG_MALI_PRFCNT_SET_PRIMARY \
+ CONFIG_MALI_PRFCNT_SET_SECONDARY \
+ CONFIG_MALI_PRFCNT_SET_TERTIARY \
+ CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \
+ CONFIG_MALI_DEBUG \
+ CONFIG_MALI_MIDGARD_ENABLE_TRACE \
+ CONFIG_MALI_SYSTEM_TRACE \
+ CONFIG_MALI_FENCE_DEBUG \
+ CONFIG_MALI_KUTF \
+ CONFIG_MALI_KUTF_IRQ_TEST \
+ CONFIG_MALI_KUTF_CLK_RATE_TRACE \
+ CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \
+ CONFIG_MALI_XEN \
+ CONFIG_MALI_CORESIGHT \
+ CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD
+
+ # Pixel integration CONFIG options
+ CONFIGS += \
+ CONFIG_MALI_PIXEL_GPU_QOS \
+ CONFIG_MALI_PIXEL_GPU_BTS \
+ CONFIG_MALI_PIXEL_GPU_THERMAL \
+ CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING \
+ CONFIG_MALI_PIXEL_GPU_SLC
+
+endif
THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
-include $(THIS_DIR)/../arbitration/Makefile
@@ -227,7 +238,9 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \
$(value config)=$(value $(value config)), \
$(value config)=n))
-MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+ MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+endif
#
# EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build
@@ -239,63 +252,66 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \
$(if $(filter y m,$(value $(value config))), \
-D$(value config)=1))
-EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME)
+ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
+ EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME='\"$(CONFIG_MALI_PLATFORM_NAME)\"'
+ EXTRA_CFLAGS += -DCONFIG_MALI_NO_MALI_DEFAULT_GPU='\"$(CONFIG_MALI_NO_MALI_DEFAULT_GPU)\"'
+endif
#
# KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions
#
EXTRA_SYMBOLS += $(OUT_DIR)/../google-modules/gpu/mali_pixel/Module.symvers
-KBUILD_CFLAGS += -Wall -Werror
+CFLAGS_MODULE += -Wall -Werror
# The following were added to align with W=1 in scripts/Makefile.extrawarn
# from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
-KBUILD_CFLAGS += -Wmissing-declarations
-KBUILD_CFLAGS += -Wmissing-format-attribute
-KBUILD_CFLAGS += -Wmissing-prototypes
-KBUILD_CFLAGS += -Wold-style-definition
+CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter
+CFLAGS_MODULE += -Wmissing-declarations
+CFLAGS_MODULE += -Wmissing-format-attribute
+CFLAGS_MODULE += -Wmissing-prototypes
+CFLAGS_MODULE += -Wold-style-definition
# The -Wmissing-include-dirs cannot be enabled as the path to some of the
# included directories change depending on whether it is an in-tree or
# out-of-tree build.
-KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
-KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
-KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
+CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable)
+CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable)
+CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned)
+CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation)
# The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-shift-negative-value
+CFLAGS_MODULE += -Wno-sign-compare
+CFLAGS_MODULE += -Wno-shift-negative-value
# This flag is needed to avoid build errors on older kernels
-KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type)
+CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type)
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
# The following were added to align with W=2 in scripts/Makefile.extrawarn
# from the Linux source tree (v5.18.14)
-KBUILD_CFLAGS += -Wdisabled-optimization
+CFLAGS_MODULE += -Wdisabled-optimization
# The -Wshadow flag cannot be enabled unless upstream kernels are
# patched to fix redefinitions of certain built-in functions and
# global variables.
-KBUILD_CFLAGS += $(call cc-option, -Wlogical-op)
-KBUILD_CFLAGS += -Wmissing-field-initializers
+CFLAGS_MODULE += $(call cc-option, -Wlogical-op)
+CFLAGS_MODULE += -Wmissing-field-initializers
# -Wtype-limits must be disabled due to build failures on kernel 5.x
-KBUILD_CFLAGS += -Wno-type-limits
-KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
-KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
+CFLAGS_MODULE += -Wno-type-limits
+CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized)
+CFLAGS_MODULE += $(call cc-option, -Wunused-macros)
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
# This warning is disabled to avoid build failures in some kernel versions
-KBUILD_CFLAGS += -Wno-ignored-qualifiers
+CFLAGS_MODULE += -Wno-ignored-qualifiers
ifeq ($(CONFIG_GCOV_KERNEL),y)
- KBUILD_CFLAGS += $(call cc-option, -ftest-coverage)
- KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs)
+ CFLAGS_MODULE += $(call cc-option, -ftest-coverage)
+ CFLAGS_MODULE += $(call cc-option, -fprofile-arcs)
EXTRA_CFLAGS += -DGCOV_PROFILE=1
endif
ifeq ($(CONFIG_MALI_KCOV),y)
- KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp)
+ CFLAGS_MODULE += $(call cc-option, -fsanitize-coverage=trace-cmp)
EXTRA_CFLAGS += -DKCOV=1
EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1
endif
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index 77a528f..2d6fca0 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -196,6 +196,18 @@ config MALI_CORESTACK
If unsure, say N.
+config PAGE_MIGRATION_SUPPORT
+ bool "Compile with page migration support"
+ depends on BACKEND_KERNEL
+ default y
+ default n if ANDROID
+ help
+ Compile in support for page migration.
+ If set to disabled ('n') then page migration cannot
+ be enabled at all. If set to enabled, then page migration
+ support is explicitly compiled in. This has no effect when
+ PAGE_MIGRATION_OVERRIDE is disabled.
+
choice
prompt "Error injection level"
depends on MALI_MIDGARD && MALI_EXPERT
@@ -352,5 +364,45 @@ config MALI_HOST_CONTROLS_SC_RAILS
Adapter) inside the GPU to handshake with SoC PMU to control the
power of cores.
+config MALI_TRACE_POWER_GPU_WORK_PERIOD
+ bool "Enable per-application GPU metrics tracepoints"
+ depends on MALI_MIDGARD
+ default y
+ help
+ This option enables per-application GPU metrics tracepoints.
+
+ If unsure, say N.
+
+choice
+ prompt "CSF Firmware trace mode"
+ depends on MALI_MIDGARD
+ default MALI_FW_TRACE_MODE_MANUAL
+ help
+ CSF Firmware log operating mode.
+
+config MALI_FW_TRACE_MODE_MANUAL
+ bool "manual mode"
+ depends on MALI_MIDGARD
+ help
+ firmware log can be read manually by the userspace (and it will
+ also be dumped automatically into dmesg on GPU reset).
+
+config MALI_FW_TRACE_MODE_AUTO_PRINT
+ bool "automatic printing mode"
+ depends on MALI_MIDGARD
+ help
+ firmware log will be periodically emptied into dmesg, manual
+ reading through debugfs is disabled.
+
+config MALI_FW_TRACE_MODE_AUTO_DISCARD
+ bool "automatic discarding mode"
+ depends on MALI_MIDGARD
+ help
+ firmware log will be periodically discarded, the remaining log can be
+ read manually by the userspace (and it will also be dumped
+ automatically into dmesg on GPU reset).
+
+endchoice
+
source "kernel/drivers/gpu/arm/arbitration/Mconfig"
source "kernel/drivers/gpu/arm/midgard/tests/Mconfig"
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index 7df24c3..c37cc59 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -22,7 +22,6 @@ mali_kbase-y += \
backend/gpu/mali_kbase_cache_policy_backend.o \
backend/gpu/mali_kbase_gpuprops_backend.o \
backend/gpu/mali_kbase_irq_linux.o \
- backend/gpu/mali_kbase_js_backend.o \
backend/gpu/mali_kbase_pm_backend.o \
backend/gpu/mali_kbase_pm_driver.o \
backend/gpu/mali_kbase_pm_metrics.o \
@@ -42,7 +41,8 @@ ifeq ($(MALI_USE_CSF),0)
backend/gpu/mali_kbase_jm_as.o \
backend/gpu/mali_kbase_debug_job_fault_backend.o \
backend/gpu/mali_kbase_jm_hw.o \
- backend/gpu/mali_kbase_jm_rb.o
+ backend/gpu/mali_kbase_jm_rb.o \
+ backend/gpu/mali_kbase_js_backend.o
endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
index 7c0abba..86539d5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -43,12 +43,12 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev,
kbdev->current_gpu_coherency_mode = mode;
if (kbasep_amba_register_present(kbdev)) {
- u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+ u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE));
val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode);
- kbase_reg_write(kbdev, AMBA_ENABLE, val);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val);
} else
- kbase_reg_write(kbdev, COHERENCY_ENABLE, mode);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(COHERENCY_ENABLE), mode);
}
u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev)
@@ -69,24 +69,12 @@ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
bool enable)
{
if (kbasep_amba_register_present(kbdev)) {
- u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
+ u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE));
val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable);
- kbase_reg_write(kbdev, AMBA_ENABLE, val);
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val);
} else {
WARN(1, "memory_cache_support not supported");
}
}
-
-void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable)
-{
- if (kbasep_amba_register_present(kbdev)) {
- u32 val = kbase_reg_read(kbdev, AMBA_ENABLE);
-
- val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable);
- kbase_reg_write(kbdev, AMBA_ENABLE, val);
- } else {
- WARN(1, "invalidate_hint not supported");
- }
-}
diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
index 8cd8090..0103695 100644
--- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
+++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -53,13 +53,4 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev);
*/
void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev,
bool enable);
-/**
- * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint
- * in the GPU.
- * @kbdev: Device pointer
- * @enable: true for enable.
- *
- * Note: Only for arch version 12.x.1 onwards.
- */
-void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable);
#endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
index 8d09347..cca4f74 100644
--- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
+++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c
@@ -58,8 +58,10 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev)
if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev))
return callbacks;
- arbiter_if_node =
- of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+ arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL);
+ if (!arbiter_if_node)
+ arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL);
+
/* Arbitration enabled, override the callback pointer.*/
if (arbiter_if_node)
callbacks = &arb_clk_rate_trace_ops;
@@ -241,8 +243,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev)
if (!clk_rtm->clk_rate_trace_ops)
return;
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- spin_lock(&clk_rtm->lock);
+ spin_lock_irqsave(&clk_rtm->lock, flags);
for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) {
struct kbase_clk_data *clk_data = clk_rtm->clks[i];
@@ -258,8 +259,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev)
}
clk_rtm->gpu_idle = false;
- spin_unlock(&clk_rtm->lock);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ spin_unlock_irqrestore(&clk_rtm->lock, flags);
}
void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c
index e121b41..cd3b29d 100644
--- a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -59,7 +59,7 @@ static int job_slot_reg_snapshot[] = {
JS_CONFIG_NEXT
};
-/*MMU_REG(r)*/
+/*MMU_CONTROL_REG(r)*/
static int mmu_reg_snapshot[] = {
MMU_IRQ_MASK,
MMU_IRQ_STATUS
@@ -118,15 +118,14 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx,
/* get the MMU registers*/
for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) {
- kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]);
+ kctx->reg_dump[offset] = MMU_CONTROL_REG(mmu_reg_snapshot[i]);
offset += 2;
}
/* get the Address space registers*/
for (j = 0; j < as_number; j++) {
for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) {
- kctx->reg_dump[offset] =
- MMU_AS_REG(j, as_reg_snapshot[i]);
+ kctx->reg_dump[offset] = MMU_STAGE1_REG(MMU_AS_REG(j, as_reg_snapshot[i]));
offset += 2;
}
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
index ef09c6b..b95277c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -99,7 +99,7 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data)
atomic_inc(&kbdev->faults_pending);
- val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+ val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS));
#ifdef CONFIG_MALI_DEBUG
if (!kbdev->pm.backend.driver_ready_for_irqs)
@@ -298,7 +298,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
return IRQ_NONE;
}
- val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS));
+ val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS));
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -310,7 +310,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data)
kbasep_irq_test_data.triggered = 1;
wake_up(&kbasep_irq_test_data.wait);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), val);
return IRQ_HANDLED;
}
@@ -344,8 +344,8 @@ static int kbasep_common_test_interrupt(
break;
case MMU_IRQ_TAG:
test_handler = kbase_mmu_irq_test_handler;
- rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT);
- mask_offset = MMU_REG(MMU_IRQ_MASK);
+ rawstat_offset = MMU_CONTROL_REG(MMU_IRQ_RAWSTAT);
+ mask_offset = MMU_CONTROL_REG(MMU_IRQ_MASK);
break;
case GPU_IRQ_TAG:
/* already tested by pm_driver - bail out */
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
index 72926bc..dd8f4d9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c
@@ -585,7 +585,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
count += nr_done;
while (nr_done) {
- if (nr_done == 1) {
+ if (likely(nr_done == 1)) {
kbase_gpu_complete_hw(kbdev, i,
completion_code,
job_tail,
@@ -604,6 +604,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done)
BASE_JD_EVENT_DONE,
0,
&end_timestamp);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ /* Increment the end timestamp value by 1 ns to
+ * avoid having the same value for 'start_time_ns'
+ * and 'end_time_ns' for the 2nd atom whose job
+ * completion IRQ got merged with the 1st atom.
+ */
+ end_timestamp = ktime_add(end_timestamp, ns_to_ktime(1));
+#endif
}
nr_done--;
}
@@ -1061,12 +1069,12 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev)
i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO)));
}
dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
- kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+ kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x",
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
- kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
+ kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)));
dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x",
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
index bfd55a6..380a530 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h
@@ -47,7 +47,7 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code,
#if IS_ENABLED(CONFIG_GPU_TRACEPOINTS)
static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size)
{
- snprintf(js_string, js_size, "job_slot_%u", js);
+ (void)scnprintf(js_string, js_size, "job_slot_%u", js);
return js_string;
}
#endif
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index f4094a3..66f068a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -32,6 +32,9 @@
#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <mali_kbase_reset_gpu.h>
#include <mali_kbase_kinstr_jm.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#endif
#include <backend/gpu/mali_kbase_cache_policy_backend.h>
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
@@ -274,6 +277,59 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js)
return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js);
}
+/**
+ * trace_atom_completion_for_gpu_metrics - Report the completion of atom for the
+ * purpose of emitting power/gpu_work_period
+ * tracepoint.
+ *
+ * @katom: Pointer to the atom that completed execution on GPU.
+ * @end_timestamp: Pointer to the timestamp of atom completion. May be NULL, in
+ * which case current time will be used.
+ *
+ * The function would also report the start for an atom that was in the HEAD_NEXT
+ * register.
+ *
+ * Note: Caller must hold the HW access lock.
+ */
+static inline void trace_atom_completion_for_gpu_metrics(
+ struct kbase_jd_atom *const katom,
+ ktime_t *end_timestamp)
+{
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ u64 complete_ns;
+ struct kbase_context *kctx = katom->kctx;
+ struct kbase_jd_atom *queued =
+ kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1);
+
+#ifdef CONFIG_MALI_DEBUG
+ WARN_ON(!kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 0));
+#endif
+
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ if (unlikely(queued == katom))
+ return;
+
+ /* A protected atom and a non-protected atom cannot be in the RB_SUBMITTED
+ * state at the same time in the job slot ringbuffer. Atom submission state
+ * machine prevents the submission of a non-protected atom until all
+ * protected atoms have completed and GPU has exited the protected mode.
+ * This implies that if the queued atom is in RB_SUBMITTED state, it shall
+ * be a protected atom and so we can return early.
+ */
+ if (unlikely(kbase_jd_katom_is_protected(katom)))
+ return;
+
+ if (likely(end_timestamp))
+ complete_ns = ktime_to_ns(*end_timestamp);
+ else
+ complete_ns = ktime_get_raw_ns();
+
+ kbase_gpu_metrics_ctx_end_activity(kctx, complete_ns);
+ if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED)
+ kbase_gpu_metrics_ctx_start_activity(queued->kctx, complete_ns);
+#endif
+}
static void kbase_gpu_release_atom(struct kbase_device *kbdev,
struct kbase_jd_atom *katom,
@@ -290,6 +346,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev,
break;
case KBASE_ATOM_GPU_RB_SUBMITTED:
+ trace_atom_completion_for_gpu_metrics(katom, end_timestamp);
kbase_kinstr_jm_atom_hw_release(katom);
/* Inform power management at start/finish of atom so it can
* update its GPU utilisation metrics. Mark atom as not
@@ -865,6 +922,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
for (idx = 0; idx < SLOT_RB_SIZE; idx++) {
bool cores_ready;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ bool trace_atom_submit_for_gpu_metrics = true;
+#endif
int ret;
if (!katom[idx])
@@ -975,12 +1035,21 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
case KBASE_ATOM_GPU_RB_READY:
if (idx == 1) {
+ enum kbase_atom_gpu_rb_state atom_0_gpu_rb_state =
+ katom[0]->gpu_rb_state;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ trace_atom_submit_for_gpu_metrics =
+ (atom_0_gpu_rb_state ==
+ KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB);
+#endif
+
/* Only submit if head atom or previous
* atom already submitted
*/
- if ((katom[0]->gpu_rb_state !=
+ if ((atom_0_gpu_rb_state !=
KBASE_ATOM_GPU_RB_SUBMITTED &&
- katom[0]->gpu_rb_state !=
+ atom_0_gpu_rb_state !=
KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB))
break;
@@ -1017,7 +1086,15 @@ void kbase_backend_slot_update(struct kbase_device *kbdev)
&katom[idx]->start_timestamp);
/* Inform platform at start/finish of atom */
+
kbasep_platform_event_work_begin(katom[idx]);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ if (likely(trace_atom_submit_for_gpu_metrics &&
+ !kbase_jd_katom_is_protected(katom[idx])))
+ kbase_gpu_metrics_ctx_start_activity(
+ katom[idx]->kctx,
+ ktime_to_ns(katom[idx]->start_timestamp));
+#endif
} else {
if (katom[idx]->core_req & BASE_JD_REQ_PERMON)
kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
@@ -1079,6 +1156,25 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a,
KBASE_KATOM_FLAG_FAIL_BLOCKER)));
}
+static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
+ struct kbase_jd_atom *katom,
+ u32 action,
+ bool disjoint)
+{
+ struct kbase_context *kctx = katom->kctx;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
+ kbase_gpu_mark_atom_for_return(kbdev, katom);
+ kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
+ katom->sched_priority);
+
+ if (disjoint)
+ kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
+ katom);
+}
+
/**
* kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is
* related to a failed JSn_HEAD atom
@@ -1129,9 +1225,9 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple
kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) {
kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT),
JS_COMMAND_NOP);
- next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
if (completion_code == BASE_JD_EVENT_STOPPED) {
+ kbase_gpu_remove_atom(kbdev, next_katom, JS_COMMAND_SOFT_STOP, false);
KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom,
&kbdev->gpu_props.props.raw_props.js_features
[next_katom->slot_nr]);
@@ -1140,10 +1236,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple
KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx,
&kbdev->gpu_props.props.raw_props.js_features
[next_katom->slot_nr]);
- }
+ } else {
+ next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY;
- if (next_katom->core_req & BASE_JD_REQ_PERMON)
- kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+ if (next_katom->core_req & BASE_JD_REQ_PERMON)
+ kbase_pm_release_gpu_cycle_counter_nolock(kbdev);
+ }
/* On evicting the next_katom, the last submission kctx on the
* given job slot then reverts back to the one that owns katom.
@@ -1528,25 +1626,6 @@ static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int
kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority);
}
-static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev,
- struct kbase_jd_atom *katom,
- u32 action,
- bool disjoint)
-{
- struct kbase_context *kctx = katom->kctx;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
-
- katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT;
- kbase_gpu_mark_atom_for_return(kbdev, katom);
- kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr,
- katom->sched_priority);
-
- if (disjoint)
- kbase_job_check_enter_disjoint(kbdev, action, katom->core_req,
- katom);
-}
-
static int should_stop_x_dep_slot(struct kbase_jd_atom *katom)
{
if (katom->x_post_dep) {
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index 0ed04bb..ff4e114 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,28 +28,18 @@
#include <mali_kbase_reset_gpu.h>
#include <backend/gpu/mali_kbase_jm_internal.h>
#include <backend/gpu/mali_kbase_js_internal.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+
+#endif
-#if !MALI_USE_CSF
/*
* Hold the runpool_mutex for this
*/
-static inline bool timer_callback_should_run(struct kbase_device *kbdev)
+static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_running_ctxs)
{
- struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
- int nr_running_ctxs;
-
lockdep_assert_held(&kbdev->js_data.runpool_mutex);
- /* Timer must stop if we are suspending */
- if (backend->suspend_timer)
- return false;
-
- /* nr_contexts_pullable is updated with the runpool_mutex. However, the
- * locking in the caller gives us a barrier that ensures
- * nr_contexts_pullable is up-to-date for reading
- */
- nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable);
-
#ifdef CONFIG_MALI_DEBUG
if (kbdev->js_data.softstop_always) {
/* Debug support for allowing soft-stop on a single context */
@@ -273,18 +263,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-#endif /* !MALI_USE_CSF */
void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
{
-#if !MALI_USE_CSF
struct kbasep_js_device_data *js_devdata = &kbdev->js_data;
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
unsigned long flags;
+ /* Timer must stop if we are suspending */
+ const bool suspend_timer = backend->suspend_timer;
+ const int nr_running_ctxs =
+ atomic_read(&kbdev->js_data.nr_contexts_runnable);
lockdep_assert_held(&js_devdata->runpool_mutex);
- if (!timer_callback_should_run(kbdev)) {
+ if (suspend_timer || !timer_callback_should_run(kbdev, nr_running_ctxs)) {
/* Take spinlock to force synchronisation with timer */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->timer_running = false;
@@ -298,7 +290,8 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
hrtimer_cancel(&backend->scheduling_timer);
}
- if (timer_callback_should_run(kbdev) && !backend->timer_running) {
+ if (!suspend_timer && timer_callback_should_run(kbdev, nr_running_ctxs) &&
+ !backend->timer_running) {
/* Take spinlock to force synchronisation with timer */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
backend->timer_running = true;
@@ -309,36 +302,59 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev)
KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u);
}
-#else /* !MALI_USE_CSF */
- CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ if (unlikely(suspend_timer)) {
+ js_devdata->gpu_metrics_timer_needed = false;
+ /* Cancel the timer as System suspend is happening */
+ hrtimer_cancel(&js_devdata->gpu_metrics_timer);
+ js_devdata->gpu_metrics_timer_running = false;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ /* Explicitly emit the tracepoint on System suspend */
+ kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns());
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ return;
+ }
+
+ if (!nr_running_ctxs) {
+ /* Just set the flag to not restart the timer on expiry */
+ js_devdata->gpu_metrics_timer_needed = false;
+ return;
+ }
+
+ /* There are runnable contexts so the timer is needed */
+ if (!js_devdata->gpu_metrics_timer_needed) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ js_devdata->gpu_metrics_timer_needed = true;
+ /* No need to restart the timer if it is already running. */
+ if (!js_devdata->gpu_metrics_timer_running) {
+ hrtimer_start(&js_devdata->gpu_metrics_timer,
+ HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_emit_interval()),
+ HRTIMER_MODE_REL);
+ js_devdata->gpu_metrics_timer_running = true;
+ }
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ }
+#endif
}
int kbase_backend_timer_init(struct kbase_device *kbdev)
{
-#if !MALI_USE_CSF
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
backend->scheduling_timer.function = timer_callback;
backend->timer_running = false;
-#else /* !MALI_USE_CSF */
- CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
return 0;
}
void kbase_backend_timer_term(struct kbase_device *kbdev)
{
-#if !MALI_USE_CSF
struct kbase_backend_data *backend = &kbdev->hwaccess.backend;
hrtimer_cancel(&backend->scheduling_timer);
-#else /* !MALI_USE_CSF */
- CSTD_UNUSED(kbdev);
-#endif /* !MALI_USE_CSF */
}
void kbase_backend_timer_suspend(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
index 9ce5075..6eedc00 100644
--- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
+++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,8 +19,9 @@
*
*/
+#include <linux/version_compat_defs.h>
+
#include <mali_kbase.h>
-#include <mali_kbase_bits.h>
#include <mali_kbase_config_defaults.h>
#include <device/mali_kbase_device.h>
#include "mali_kbase_l2_mmu_config.h"
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index dd16fb2..46bcdc7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -484,13 +484,6 @@ void *gpu_device_get_data(void *model)
#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
-/* SCons should pass in a default GPU, but other ways of building (e.g.
- * in-tree) won't, so define one here in case.
- */
-#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
-#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
-#endif
-
static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
module_param(no_mali_gpu, charp, 0000);
MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
@@ -1378,10 +1371,10 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->l2_config = value;
}
#if MALI_USE_CSF
- else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) &&
- addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET +
- (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
- if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
+ else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET &&
+ addr < CSF_HW_DOORBELL_PAGE_OFFSET +
+ (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) {
+ if (addr == CSF_HW_DOORBELL_PAGE_OFFSET)
hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
} else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
(addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
@@ -1409,13 +1402,13 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value)
}
}
#endif
- else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+ else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) {
hw_error_status.mmu_irq_mask = value;
- } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) {
hw_error_status.mmu_irq_rawstat &= (~value);
- } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) {
- int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
- >> 6;
+ } else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) &&
+ (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) {
+ int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6;
switch (addr & 0x3F) {
case AS_COMMAND:
@@ -1926,10 +1919,9 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
} else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO)
&& addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) {
*value = 0;
- } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)
- && addr <= MMU_AS_REG(15, AS_STATUS)) {
- int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
- >> 6;
+ } else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
+ addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) {
+ int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6;
switch (addr & 0x3F) {
case AS_TRANSTAB_LO:
@@ -1973,11 +1965,11 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
*value = 0;
break;
}
- } else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) {
*value = hw_error_status.mmu_irq_mask;
- } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) {
*value = hw_error_status.mmu_irq_rawstat;
- } else if (addr == MMU_REG(MMU_IRQ_STATUS)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) {
*value = hw_error_status.mmu_irq_mask &
hw_error_status.mmu_irq_rawstat;
}
@@ -1985,8 +1977,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
else if (addr == IPA_CONTROL_REG(STATUS)) {
*value = (ipa_control_timer_enabled << 31);
} else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3;
bool is_low_word =
@@ -1995,8 +1986,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW,
counter_index, is_low_word);
} else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3;
bool is_low_word =
@@ -2005,8 +1995,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS,
counter_index, is_low_word);
} else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3;
bool is_low_word =
@@ -2015,8 +2004,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER,
counter_index, is_low_word);
} else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3;
bool is_low_word =
@@ -2214,16 +2202,3 @@ int gpu_model_control(void *model,
return 0;
}
-
-/**
- * kbase_is_gpu_removed - Has the GPU been removed.
- * @kbdev: Kbase device pointer
- *
- * This function would return true if the GPU has been removed.
- * It is stubbed here
- * Return: Always false
- */
-bool kbase_is_gpu_removed(struct kbase_device *kbdev)
-{
- return false;
-}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
index e90e4df..67e00e9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -95,8 +95,7 @@ static void serve_mmu_irq(struct work_struct *work)
if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) {
u32 val;
- while ((val = kbase_reg_read(kbdev,
- MMU_REG(MMU_IRQ_STATUS)))) {
+ while ((val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)))) {
/* Handle the IRQ */
kbase_mmu_interrupt(kbdev, val);
}
@@ -156,7 +155,7 @@ KBASE_EXPORT_TEST_API(kbase_reg_write);
u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
{
unsigned long flags;
- u32 val;
+ u32 val = 0;
spin_lock_irqsave(&kbdev->reg_op_lock, flags);
midgard_model_read_reg(kbdev->model, offset, &val);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index abbb9c8..46c5ffd 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -169,6 +169,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev)
kbdev->pm.backend.gpu_powered = false;
kbdev->pm.backend.gpu_ready = false;
kbdev->pm.suspending = false;
+ kbdev->pm.resuming = false;
#ifdef CONFIG_MALI_ARBITER_SUPPORT
kbase_pm_set_gpu_lost(kbdev, false);
#endif
@@ -590,11 +591,13 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev)
{
struct kbase_pm_backend_data *backend = &kbdev->pm.backend;
unsigned long flags;
- int ret = 0;
+ int ret;
WARN_ON(kbdev->pm.active_count);
- kbase_pm_wait_for_poweroff_work_complete(kbdev);
+ ret = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+ if (ret)
+ return ret;
kbase_pm_lock(kbdev);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -679,60 +682,6 @@ unlock_hwaccess:
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
-static bool is_poweroff_in_progress(struct kbase_device *kbdev)
-{
- bool ret;
- unsigned long flags;
-
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- ret = (kbdev->pm.backend.poweroff_wait_in_progress == false);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- return ret;
-}
-
-void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
-{
-#define POWEROFF_TIMEOUT_MSEC 500
- long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC);
- remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait,
- is_poweroff_in_progress(kbdev), remaining);
- if (!remaining) {
- /* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will
- * definitely be called, so it's safe to continue waiting for it.
- */
- if (!work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) {
- unsigned long flags;
- kbasep_platform_event_core_dump(kbdev, "poweroff work timeout");
- dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims",
- POWEROFF_TIMEOUT_MSEC);
- kbase_gpu_timeout_debug_message(kbdev);
-#if MALI_USE_CSF
- //csf.scheduler.state should be accessed with scheduler lock!
- //callchains go through this function though holding that lock
- //so just print without locking.
- dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state);
- dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0));
-#endif
- //Attempt another state machine transition prompt.
- dev_err(kbdev->dev, "Attempt to prompt state machine");
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_pm_update_state(kbdev);
- spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-
- dev_err(kbdev->dev, "GPU state after re-prompt of state machine");
- kbase_gpu_timeout_debug_message(kbdev);
-
- dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d",
- is_poweroff_in_progress(kbdev));
- }
- wait_event_killable(kbdev->pm.backend.poweroff_wait,
- is_poweroff_in_progress(kbdev));
- }
-#undef POWEROFF_TIMEOUT_MSEC
-}
-KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
-
/**
* is_gpu_powered_down - Check whether GPU is powered down
*
@@ -986,7 +935,13 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev)
kbase_pm_unlock(kbdev);
- kbase_pm_wait_for_poweroff_work_complete(kbdev);
+ ret = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+ if (ret) {
+#if !MALI_USE_CSF
+ kbase_backend_timer_resume(kbdev);
+#endif /* !MALI_USE_CSF */
+ return ret;
+ }
#endif
WARN_ON(kbdev->pm.backend.gpu_powered);
@@ -1002,6 +957,8 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
{
kbase_pm_lock(kbdev);
+ /* System resume callback has begun */
+ kbdev->pm.resuming = true;
kbdev->pm.suspending = false;
#ifdef CONFIG_MALI_ARBITER_SUPPORT
if (kbase_pm_is_gpu_lost(kbdev)) {
@@ -1016,7 +973,6 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
kbase_backend_timer_resume(kbdev);
#endif /* !MALI_USE_CSF */
- wake_up_all(&kbdev->pm.resume_wait);
kbase_pm_unlock(kbdev);
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 2c69ac9..7c891c1 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -51,9 +51,6 @@
#ifdef CONFIG_MALI_ARBITER_SUPPORT
#include <arbiter/mali_kbase_arbiter_pm.h>
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
-#if MALI_USE_CSF
-#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
-#endif
#if MALI_USE_CSF
#include <linux/delay.h>
@@ -699,8 +696,8 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev)
/* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */
while (--max_loops &&
- kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
- AS_STATUS_AS_ACTIVE_INT)
+ kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) &
+ AS_STATUS_AS_ACTIVE_INT)
;
if (!WARN_ON_ONCE(max_loops == 0))
@@ -2442,26 +2439,29 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev)
#define PM_TIMEOUT_MS (5000) /* 5s */
#endif
-void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
+void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg)
+{
unsigned long flags;
+
+ dev_err(kbdev->dev, "%s", timeout_msg);
#if !MALI_USE_CSF
CSTD_UNUSED(flags);
dev_err(kbdev->dev, "Desired state :\n");
- dev_err(kbdev->dev, " Shader=%016llx\n",
+ dev_err(kbdev->dev, "\tShader=%016llx\n",
kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0);
#else
dev_err(kbdev->dev, "GPU pm state :\n");
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- dev_err(kbdev->dev, " scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count);
- dev_err(kbdev->dev, " poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d",
+ dev_err(kbdev->dev, "\tscheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count);
+ dev_err(kbdev->dev, "\tpoweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d",
kbdev->pm.backend.poweron_required,
kbdev->pm.active_count,
kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off);
- dev_err(kbdev->dev, " gpu_poweroff_wait_work pending %d",
+ dev_err(kbdev->dev, "\tgpu_poweroff_wait_work pending %d",
work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work));
- dev_err(kbdev->dev, " MCU desired = %d\n",
+ dev_err(kbdev->dev, "\tMCU desired = %d\n",
kbase_pm_is_mcu_desired(kbdev));
- dev_err(kbdev->dev, " MCU sw state = %d\n",
+ dev_err(kbdev->dev, "\tMCU sw state = %d\n",
kbdev->pm.backend.mcu_state);
dev_err(kbdev->dev, "\tL2 desired = %d (locked_off: %d)\n",
kbase_pm_is_l2_desired(kbdev), kbdev->pm.backend.policy_change_clamp_state_to_off);
@@ -2474,17 +2474,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
#endif
dev_err(kbdev->dev, "Current state :\n");
- dev_err(kbdev->dev, " Shader=%08x%08x\n",
+ dev_err(kbdev->dev, "\tShader=%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(SHADER_READY_LO)));
- dev_err(kbdev->dev, " Tiler =%08x%08x\n",
+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_READY_HI)),
kbase_reg_read(kbdev,
GPU_CONTROL_REG(TILER_READY_LO)));
- dev_err(kbdev->dev, " L2 =%08x%08x\n",
+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
kbase_reg_read(kbdev,
GPU_CONTROL_REG(L2_READY_HI)),
kbase_reg_read(kbdev,
@@ -2493,17 +2493,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
kbase_csf_debug_dump_registers(kbdev);
#endif
dev_err(kbdev->dev, "Cores transitioning :\n");
- dev_err(kbdev->dev, " Shader=%08x%08x\n",
+ dev_err(kbdev->dev, "\tShader=%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
SHADER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
SHADER_PWRTRANS_LO)));
- dev_err(kbdev->dev, " Tiler =%08x%08x\n",
+ dev_err(kbdev->dev, "\tTiler =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
TILER_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
TILER_PWRTRANS_LO)));
- dev_err(kbdev->dev, " L2 =%08x%08x\n",
+ dev_err(kbdev->dev, "\tL2 =%08x%08x\n",
kbase_reg_read(kbdev, GPU_CONTROL_REG(
L2_PWRTRANS_HI)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(
@@ -2512,12 +2512,9 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) {
dump_stack();
}
-static void kbase_pm_timed_out(struct kbase_device *kbdev)
+static void kbase_pm_timed_out(struct kbase_device *kbdev, const char *timeout_msg)
{
- dev_err(kbdev->dev, "Power transition timed out unexpectedly\n");
- kbase_gpu_timeout_debug_message(kbdev);
- dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
-
+ kbase_gpu_timeout_debug_message(kbdev, timeout_msg);
/* pixel: If either:
* 1. L2/MCU power transition timed out, or,
* 2. kbase state machine fell out of sync with the hw state,
@@ -2530,6 +2527,7 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev)
* We have already lost work if we end up here, so send a powercycle to reset the hw,
* which is more reliable.
*/
+ dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n");
if (kbase_prepare_to_reset_gpu(kbdev,
RESET_FLAGS_HWC_UNRECOVERABLE_ERROR |
RESET_FLAGS_FORCE_PM_HW_RESET))
@@ -2570,7 +2568,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
.info = GPU_UEVENT_INFO_L2_PM_TIMEOUT
};
pixel_gpu_uevent_send(kbdev, &evt);
- kbase_pm_timed_out(kbdev);
+ kbase_pm_timed_out(kbdev, "Wait for desired PM state with L2 powered timed out");
err = -ETIMEDOUT;
} else if (remaining < 0) {
dev_info(
@@ -2582,7 +2580,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev)
return err;
}
-int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
+static int pm_wait_for_desired_state(struct kbase_device *kbdev, bool killable_wait)
{
unsigned long flags;
long remaining;
@@ -2600,31 +2598,42 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
/* Wait for cores */
#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
- remaining = wait_event_killable_timeout(
- kbdev->pm.backend.gpu_in_desired_state_wait,
- kbase_pm_is_in_desired_state(kbdev), timeout);
+ if (killable_wait)
+ remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
+ kbase_pm_is_in_desired_state(kbdev),
+ timeout);
#else
- remaining = wait_event_timeout(
- kbdev->pm.backend.gpu_in_desired_state_wait,
- kbase_pm_is_in_desired_state(kbdev), timeout);
+ killable_wait = false;
#endif
-
+ if (!killable_wait)
+ remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait,
+ kbase_pm_is_in_desired_state(kbdev), timeout);
if (!remaining) {
const struct gpu_uevent evt = {
.type = GPU_UEVENT_TYPE_KMD_ERROR,
.info = GPU_UEVENT_INFO_PM_TIMEOUT
};
pixel_gpu_uevent_send(kbdev, &evt);
- kbase_pm_timed_out(kbdev);
+ kbase_pm_timed_out(kbdev, "Wait for power transition timed out");
err = -ETIMEDOUT;
} else if (remaining < 0) {
- dev_info(kbdev->dev,
- "Wait for desired PM state got interrupted");
+ WARN_ON_ONCE(!killable_wait);
+ dev_info(kbdev->dev, "Wait for power transition got interrupted");
err = (int)remaining;
}
return err;
}
+
+int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev)
+{
+ return pm_wait_for_desired_state(kbdev, true);
+}
+
+int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev)
+{
+ return pm_wait_for_desired_state(kbdev, false);
+}
KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state);
#if MALI_USE_CSF
@@ -2674,7 +2683,7 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
#endif
if (!remaining) {
- kbase_pm_timed_out(kbdev);
+ kbase_pm_timed_out(kbdev, "Wait for cores down scaling timed out");
err = -ETIMEDOUT;
} else if (remaining < 0) {
dev_info(
@@ -2687,6 +2696,96 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev)
}
#endif
+static bool is_poweroff_wait_in_progress(struct kbase_device *kbdev)
+{
+ bool ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ ret = kbdev->pm.backend.poweroff_wait_in_progress;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return ret;
+}
+
+static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool killable_wait)
+{
+ long remaining;
+#if MALI_USE_CSF
+ /* gpu_poweroff_wait_work would be subjected to the kernel scheduling
+ * and so the wait time can't only be the function of GPU frequency.
+ */
+ const unsigned int extra_wait_time_ms = 2000;
+ const long timeout = kbase_csf_timeout_in_jiffies(
+ kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + extra_wait_time_ms);
+#else
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ /* Handling of timeout error isn't supported for arbiter builds */
+ const long timeout = MAX_SCHEDULE_TIMEOUT;
+#else
+ const long timeout = msecs_to_jiffies(PM_TIMEOUT_MS);
+#endif
+#endif
+ int err = 0;
+
+#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE
+ if (killable_wait)
+ remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait,
+ !is_poweroff_wait_in_progress(kbdev),
+ timeout);
+#else
+ killable_wait = false;
+#endif
+
+ if (!killable_wait)
+ remaining = wait_event_timeout(kbdev->pm.backend.poweroff_wait,
+ !is_poweroff_wait_in_progress(kbdev), timeout);
+ if (!remaining) {
+ /* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will
+ * definitely be called, so it's safe to continue waiting for it.
+ */
+ if (work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) {
+ wait_event_killable(kbdev->pm.backend.poweroff_wait,
+ !is_poweroff_wait_in_progress(kbdev));
+ } else {
+ unsigned long flags;
+ kbasep_platform_event_core_dump(kbdev, "poweroff work timeout");
+ kbase_gpu_timeout_debug_message(kbdev, "failed to wait for poweroff worker");
+#if MALI_USE_CSF
+ //csf.scheduler.state should be accessed with scheduler lock!
+ //callchains go through this function though holding that lock
+ //so just print without locking.
+ dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state);
+ dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0));
+#endif
+ //Attempt another state machine transition prompt.
+ dev_err(kbdev->dev, "Attempt to prompt state machine");
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ kbase_gpu_timeout_debug_message(kbdev, "GPU state after re-prompt of state machine");
+ err = -ETIMEDOUT;
+ }
+ } else if (remaining < 0) {
+ WARN_ON_ONCE(!killable_wait);
+ dev_info(kbdev->dev, "Wait for poweroff work got interrupted");
+ err = (int)remaining;
+ }
+ return err;
+}
+
+int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
+{
+ return pm_wait_for_poweroff_work_complete(kbdev, true);
+}
+
+int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev)
+{
+ return pm_wait_for_poweroff_work_complete(kbdev, false);
+}
+KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete);
+
void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
{
unsigned long flags;
@@ -2704,12 +2803,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev)
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
#if MALI_USE_CSF
/* Enable only the Page fault bits part */
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFF);
#else
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFFFFFF);
#endif
}
@@ -2729,8 +2828,8 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev)
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF);
}
void kbase_pm_disable_interrupts(struct kbase_device *kbdev)
@@ -3147,9 +3246,13 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
kbdev->hw_quirks_tiler = 0;
kbdev->hw_quirks_mmu = 0;
- if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) {
- dev_info(kbdev->dev,
- "Found quirks_gpu = [0x%x] in Devicetree\n",
+ /* Read the "-" versions of the properties and fall back to
+ * the "_" versions if these are not found
+ */
+
+ if (!of_property_read_u32(np, "quirks-gpu", &kbdev->hw_quirks_gpu) ||
+ !of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) {
+ dev_info(kbdev->dev, "Found quirks_gpu = [0x%x] in Devicetree\n",
kbdev->hw_quirks_gpu);
} else {
error = kbase_set_gpu_quirks(kbdev, prod_id);
@@ -3157,33 +3260,30 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
return error;
}
- if (!of_property_read_u32(np, "quirks_sc",
- &kbdev->hw_quirks_sc)) {
- dev_info(kbdev->dev,
- "Found quirks_sc = [0x%x] in Devicetree\n",
- kbdev->hw_quirks_sc);
+ if (!of_property_read_u32(np, "quirks-sc", &kbdev->hw_quirks_sc) ||
+ !of_property_read_u32(np, "quirks_sc", &kbdev->hw_quirks_sc)) {
+ dev_info(kbdev->dev, "Found quirks_sc = [0x%x] in Devicetree\n",
+ kbdev->hw_quirks_sc);
} else {
error = kbase_set_sc_quirks(kbdev, prod_id);
if (error)
return error;
}
- if (!of_property_read_u32(np, "quirks_tiler",
- &kbdev->hw_quirks_tiler)) {
- dev_info(kbdev->dev,
- "Found quirks_tiler = [0x%x] in Devicetree\n",
- kbdev->hw_quirks_tiler);
+ if (!of_property_read_u32(np, "quirks-tiler", &kbdev->hw_quirks_tiler) ||
+ !of_property_read_u32(np, "quirks_tiler", &kbdev->hw_quirks_tiler)) {
+ dev_info(kbdev->dev, "Found quirks_tiler = [0x%x] in Devicetree\n",
+ kbdev->hw_quirks_tiler);
} else {
error = kbase_set_tiler_quirks(kbdev);
if (error)
return error;
}
- if (!of_property_read_u32(np, "quirks_mmu",
- &kbdev->hw_quirks_mmu)) {
- dev_info(kbdev->dev,
- "Found quirks_mmu = [0x%x] in Devicetree\n",
- kbdev->hw_quirks_mmu);
+ if (!of_property_read_u32(np, "quirks-mmu", &kbdev->hw_quirks_mmu) ||
+ !of_property_read_u32(np, "quirks_mmu", &kbdev->hw_quirks_mmu)) {
+ dev_info(kbdev->dev, "Found quirks_mmu = [0x%x] in Devicetree\n",
+ kbdev->hw_quirks_mmu);
} else {
error = kbase_set_mmu_quirks(kbdev);
}
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
index 9e29236..d7f19fb 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -224,7 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev);
* power off in progress and kbase_pm_context_active() was called instead of
* kbase_csf_scheduler_pm_active().
*
- * Return: 0 on success, error code on error
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error.
*/
int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
#else
@@ -247,12 +247,27 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
* must ensure that this is not the case by, for example, calling
* kbase_pm_wait_for_poweroff_work_complete()
*
- * Return: 0 on success, error code on error
+ * Return: 0 on success, or -ETIMEDOUT error code on timeout error.
*/
int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev);
#endif
/**
+ * kbase_pm_killable_wait_for_desired_state - Wait for the desired power state to be
+ * reached in a killable state.
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is same as kbase_pm_wait_for_desired_state(), expect that it would
+ * allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the
+ * wait was interrupted.
+ */
+int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev);
+
+/**
* kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
@@ -467,8 +482,26 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev);
* This function effectively just waits for the @gpu_poweroff_wait_work work
* item to complete, if it was enqueued. GPU may not have been powered down
* before this function returns.
+ *
+ * Return: 0 on success, error code on error
*/
-void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
+int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
+
+/**
+ * kbase_pm_killable_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to
+ * complete in killable state.
+ *
+ * @kbdev: The kbase device structure for the device (must be a valid pointer)
+ *
+ * This function is same as kbase_pm_wait_for_poweroff_work_complete(), expect that
+ * it would allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the
+ * wait was interrupted.
+ */
+int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev);
/**
* kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete
@@ -857,6 +890,8 @@ static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev)
{
bool in_desired_state = true;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON)
in_desired_state = false;
else if (!kbase_pm_is_mcu_desired(kbdev) &&
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
index f5dc008..7d7650c 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -54,7 +54,9 @@ void kbase_pm_policy_init(struct kbase_device *kbdev)
unsigned long flags;
int i;
- if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) {
+ /* Read "power-policy" property and fallback to "power_policy" if not found */
+ if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) ||
+ (of_property_read_string(np, "power_policy", &power_policy_name) == 0)) {
for (i = 0; i < ARRAY_SIZE(all_policy_list); i++)
if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) {
default_policy = all_policy_list[i];
@@ -298,6 +300,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
bool reset_gpu = false;
bool reset_op_prevented = true;
struct kbase_csf_scheduler *scheduler = NULL;
+ u32 pwroff;
+ bool switching_to_always_on;
#endif
KBASE_DEBUG_ASSERT(kbdev != NULL);
@@ -306,6 +310,16 @@ void kbase_pm_set_policy(struct kbase_device *kbdev,
KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id);
#if MALI_USE_CSF
+ pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+ switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops;
+ if (pwroff == 0 && !switching_to_always_on) {
+ dev_warn(kbdev->dev,
+ "power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n");
+ dev_warn(kbdev->dev,
+ "power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n");
+ kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev);
+ }
+
scheduler = &kbdev->csf.scheduler;
KBASE_DEBUG_ASSERT(scheduler != NULL);
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index 7a4d662..28365c0 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -29,6 +29,39 @@
#include <device/mali_kbase_device.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_config_defaults.h>
+#include <linux/version_compat_defs.h>
+
+struct kbase_timeout_info {
+ char *selector_str;
+ u64 timeout_cycles;
+};
+
+#if MALI_USE_CSF
+static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
+ [CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES,
+ CSF_FIRMWARE_PING_TIMEOUT_CYCLES) },
+ [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES },
+ [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES },
+ [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES },
+ [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT",
+ CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES },
+ [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT",
+ CSF_FIRMWARE_PING_TIMEOUT_CYCLES },
+ [CSF_SCHED_PROTM_PROGRESS_TIMEOUT] = { "CSF_SCHED_PROTM_PROGRESS_TIMEOUT",
+ DEFAULT_PROGRESS_TIMEOUT_CYCLES },
+ [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT",
+ MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES },
+ [KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT",
+ KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES },
+};
+#else
+static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
+ [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT",
+ MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES },
+ [JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT",
+ JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES },
+};
+#endif
void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
u64 *cycle_counter,
@@ -108,94 +141,130 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter,
#endif
}
-unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev,
- enum kbase_timeout_selector selector)
+static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev)
+{
+ u64 freq_khz = kbdev->lowest_gpu_freq_khz;
+
+ if (!freq_khz) {
+ dev_dbg(kbdev->dev,
+ "Lowest frequency uninitialized! Using reference frequency for scaling");
+ return DEFAULT_REF_TIMEOUT_FREQ_KHZ;
+ }
+
+ return freq_khz;
+}
+
+void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+ unsigned int timeout_ms)
{
+ char *selector_str;
+
+ if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+ selector = KBASE_DEFAULT_TIMEOUT;
+ selector_str = timeout_info[selector].selector_str;
+ dev_warn(kbdev->dev,
+ "Unknown timeout selector passed, falling back to default: %s\n",
+ timeout_info[selector].selector_str);
+ }
+ selector_str = timeout_info[selector].selector_str;
+
+ kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms;
+ dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms);
+}
+
+void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+ u64 timeout_cycles, u32 cycle_multiplier)
+{
+ u64 final_cycles;
+ u64 timeout;
+ u64 freq_khz = kbase_device_get_scaling_frequency(kbdev);
+
+ if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+ selector = KBASE_DEFAULT_TIMEOUT;
+ dev_warn(kbdev->dev,
+ "Unknown timeout selector passed, falling back to default: %s\n",
+ timeout_info[selector].selector_str);
+ }
+
+ /* If the multiplication overflows, we will have unsigned wrap-around, and so might
+ * end up with a shorter timeout. In those cases, we then want to have the largest
+ * timeout possible that will not run into these issues. Note that this will not
+ * wait for U64_MAX/frequency ms, as it will be clamped to a max of UINT_MAX
+ * milliseconds by subsequent steps.
+ */
+ if (check_mul_overflow(timeout_cycles, (u64)cycle_multiplier, &final_cycles))
+ final_cycles = U64_MAX;
+
/* Timeout calculation:
* dividing number of cycles by freq in KHz automatically gives value
* in milliseconds. nr_cycles will have to be multiplied by 1e3 to
* get result in microseconds, and 1e6 to get result in nanoseconds.
*/
+ timeout = div_u64(final_cycles, freq_khz);
+
+ if (unlikely(timeout > UINT_MAX)) {
+ dev_dbg(kbdev->dev,
+ "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
+ timeout, timeout_info[selector].selector_str,
+ kbase_device_get_scaling_frequency(kbdev));
+ timeout = UINT_MAX;
+ }
- u64 timeout, nr_cycles = 0;
- u64 freq_khz;
+ kbase_device_set_timeout_ms(kbdev, selector, (unsigned int)timeout);
+}
- /* Only for debug messages, safe default in case it's mis-maintained */
- const char *selector_str = "(unknown)";
+/**
+ * kbase_timeout_scaling_init - Initialize the table of scaled timeout
+ * values associated with a @kbase_device.
+ *
+ * @kbdev: KBase device pointer.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int kbase_timeout_scaling_init(struct kbase_device *kbdev)
+{
+ int err;
+ enum kbase_timeout_selector selector;
- if (!kbdev->lowest_gpu_freq_khz) {
- dev_dbg(kbdev->dev,
- "Lowest frequency uninitialized! Using reference frequency for scaling");
- freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
- } else {
- freq_khz = kbdev->lowest_gpu_freq_khz;
+ /* First, we initialize the minimum and maximum device frequencies, which
+ * are used to compute the timeouts.
+ */
+ err = kbase_pm_gpu_freq_init(kbdev);
+ if (unlikely(err < 0)) {
+ dev_dbg(kbdev->dev, "Could not initialize GPU frequency\n");
+ return err;
}
- switch (selector) {
- case MMU_AS_INACTIVE_WAIT_TIMEOUT:
- selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT";
- nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES;
- break;
- case KBASE_TIMEOUT_SELECTOR_COUNT:
- default:
-#if !MALI_USE_CSF
- WARN(1, "Invalid timeout selector used! Using default value");
- nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES;
- break;
- case JM_DEFAULT_JS_FREE_TIMEOUT:
- selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT";
- nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES;
- break;
-#else
- /* Use Firmware timeout if invalid selection */
- WARN(1,
- "Invalid timeout selector used! Using CSF Firmware timeout");
- fallthrough;
- case CSF_FIRMWARE_TIMEOUT:
- selector_str = "CSF_FIRMWARE_TIMEOUT";
- /* Any FW timeout cannot be longer than the FW ping interval, after which
- * the firmware_aliveness_monitor will be triggered and may restart
- * the GPU if the FW is unresponsive.
+ dev_dbg(kbdev->dev, "Scaling kbase timeouts:\n");
+ for (selector = 0; selector < KBASE_TIMEOUT_SELECTOR_COUNT; selector++) {
+ u32 cycle_multiplier = 1;
+ u64 nr_cycles = timeout_info[selector].timeout_cycles;
+#if MALI_USE_CSF
+ /* Special case: the scheduler progress timeout can be set manually,
+ * and does not have a canonical length defined in the headers. Hence,
+ * we query it once upon startup to get a baseline, and change it upon
+ * every invocation of the appropriate functions
*/
- nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES);
-
- if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES)
- dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n",
- selector_str);
- break;
- case CSF_PM_TIMEOUT:
- selector_str = "CSF_PM_TIMEOUT";
- nr_cycles = CSF_PM_TIMEOUT_CYCLES;
- break;
- case CSF_GPU_RESET_TIMEOUT:
- selector_str = "CSF_GPU_RESET_TIMEOUT";
- nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES;
- break;
- case CSF_CSG_SUSPEND_TIMEOUT:
- selector_str = "CSF_CSG_SUSPEND_TIMEOUT";
- nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES;
- break;
- case CSF_FIRMWARE_BOOT_TIMEOUT:
- selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT";
- nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES;
- break;
- case CSF_FIRMWARE_PING_TIMEOUT:
- selector_str = "CSF_FIRMWARE_PING_TIMEOUT";
- nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES;
- break;
- case CSF_SCHED_PROTM_PROGRESS_TIMEOUT:
- selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT";
- nr_cycles = kbase_csf_timeout_get(kbdev);
- break;
+ if (selector == CSF_SCHED_PROTM_PROGRESS_TIMEOUT)
+ nr_cycles = kbase_csf_timeout_get(kbdev);
#endif
+
+ /* Since we are in control of the iteration bounds for the selector,
+ * we don't have to worry about bounds checking when setting the timeout.
+ */
+ kbase_device_set_timeout(kbdev, selector, nr_cycles, cycle_multiplier);
}
+ return 0;
+}
- timeout = div_u64(nr_cycles, freq_khz);
- if (WARN(timeout > UINT_MAX,
- "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms",
- (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz))
- timeout = UINT_MAX;
- return (unsigned int)timeout;
+unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector)
+{
+ if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) {
+ dev_warn(kbdev->dev, "Querying wrong selector, falling back to default\n");
+ selector = KBASE_DEFAULT_TIMEOUT;
+ }
+
+ return kbdev->backend_time.device_scaled_timeouts[selector];
}
KBASE_EXPORT_TEST_API(kbase_get_timeout_ms);
@@ -247,18 +316,21 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t
int kbase_backend_time_init(struct kbase_device *kbdev)
{
+ int err = 0;
#if MALI_USE_CSF
u64 cpu_ts = 0;
u64 gpu_ts = 0;
u64 freq;
u64 common_factor;
+ kbase_pm_register_access_enable(kbdev);
get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL);
freq = arch_timer_get_cntfrq();
if (!freq) {
dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!");
- return -EINVAL;
+ err = -EINVAL;
+ goto disable_registers;
}
common_factor = gcd(NSEC_PER_SEC, freq);
@@ -268,12 +340,23 @@ int kbase_backend_time_init(struct kbase_device *kbdev)
if (!kbdev->backend_time.divisor) {
dev_warn(kbdev->dev, "CPU to GPU divisor is zero!");
- return -EINVAL;
+ err = -EINVAL;
+ goto disable_registers;
}
kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier,
kbdev->backend_time.divisor);
#endif
- return 0;
+ if (kbase_timeout_scaling_init(kbdev)) {
+ dev_warn(kbdev->dev, "Could not initialize timeout scaling");
+ err = -EINVAL;
+ }
+
+#if MALI_USE_CSF
+disable_registers:
+ kbase_pm_register_access_disable(kbdev);
+#endif
+
+ return err;
}
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index e82dd12..381b1fe 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -68,6 +68,9 @@ bob_defaults {
large_page_alloc: {
kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"],
},
+ page_migration_support: {
+ kbuild_options: ["CONFIG_PAGE_MIGRATION_SUPPORT=y"],
+ },
mali_memory_fully_backed: {
kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"],
},
@@ -143,6 +146,18 @@ bob_defaults {
mali_coresight: {
kbuild_options: ["CONFIG_MALI_CORESIGHT=y"],
},
+ mali_fw_trace_mode_manual: {
+ kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_MANUAL=y"],
+ },
+ mali_fw_trace_mode_auto_print: {
+ kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT=y"],
+ },
+ mali_fw_trace_mode_auto_discard: {
+ kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD=y"],
+ },
+ mali_trace_power_gpu_work_period: {
+ kbuild_options: ["CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD=y"],
+ },
kbuild_options: [
"CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}",
"MALI_CUSTOMER_RELEASE={{.release}}",
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index 9aa661a..45a5a6c 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -124,7 +124,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
bool is_compat,
base_context_create_flags const flags,
unsigned long const api_version,
- struct file *const filp)
+ struct kbase_file *const kfile)
{
struct kbase_context *kctx;
unsigned int i = 0;
@@ -143,9 +143,11 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
kctx->kbdev = kbdev;
kctx->api_version = api_version;
- kctx->filp = filp;
+ kctx->kfile = kfile;
kctx->create_flags = flags;
+ memcpy(kctx->comm, current->comm, sizeof(current->comm));
+
if (is_compat)
kbase_ctx_flag_set(kctx, KCTX_COMPAT);
#if defined(CONFIG_64BIT)
@@ -213,6 +215,16 @@ void kbase_destroy_context(struct kbase_context *kctx)
kctx->tgid, kctx->id);
}
+ /* Have synchronized against the System suspend and incremented the
+ * pm.active_count. So any subsequent invocation of System suspend
+ * callback would get blocked.
+ * If System suspend callback was already in progress then the above loop
+ * would have waited till the System resume callback has begun.
+ * So wait for the System resume callback to also complete as we want to
+ * avoid context termination during System resume also.
+ */
+ wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev));
+
kbase_mem_pool_group_mark_dying(&kctx->mem_pools);
kbase_context_term_partial(kctx, ARRAY_SIZE(context_init));
diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c
index 7acb3f6..39595d9 100644
--- a/mali_kbase/context/backend/mali_kbase_context_jm.c
+++ b/mali_kbase/context/backend/mali_kbase_context_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -179,7 +179,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
bool is_compat,
base_context_create_flags const flags,
unsigned long const api_version,
- struct file *const filp)
+ struct kbase_file *const kfile)
{
struct kbase_context *kctx;
unsigned int i = 0;
@@ -198,7 +198,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev,
kctx->kbdev = kbdev;
kctx->api_version = api_version;
- kctx->filp = filp;
+ kctx->kfile = kfile;
kctx->create_flags = flags;
if (is_compat)
@@ -258,6 +258,17 @@ void kbase_destroy_context(struct kbase_context *kctx)
wait_event(kbdev->pm.resume_wait,
!kbase_pm_is_suspending(kbdev));
}
+
+ /* Have synchronized against the System suspend and incremented the
+ * pm.active_count. So any subsequent invocation of System suspend
+ * callback would get blocked.
+ * If System suspend callback was already in progress then the above loop
+ * would have waited till the System resume callback has begun.
+ * So wait for the System resume callback to also complete as we want to
+ * avoid context termination during System resume also.
+ */
+ wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev));
+
#ifdef CONFIG_MALI_ARBITER_SUPPORT
atomic_dec(&kbdev->pm.gpu_users_waiting);
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c
index 84d56f7..70941ef 100644
--- a/mali_kbase/context/mali_kbase_context.c
+++ b/mali_kbase/context/mali_kbase_context.c
@@ -190,7 +190,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
kctx->pid = current->pid;
/* Check if this is a Userspace created context */
- if (likely(kctx->filp)) {
+ if (likely(kctx->kfile)) {
struct pid *pid_struct;
rcu_read_lock();
@@ -264,7 +264,7 @@ int kbase_context_common_init(struct kbase_context *kctx)
if (err) {
dev_err(kctx->kbdev->dev,
"(err:%d) failed to insert kctx to kbase_process", err);
- if (likely(kctx->filp)) {
+ if (likely(kctx->kfile)) {
mmdrop(kctx->process_mm);
put_task_struct(kctx->task);
}
@@ -356,7 +356,7 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
- if (likely(kctx->filp)) {
+ if (likely(kctx->kfile)) {
mmdrop(kctx->process_mm);
put_task_struct(kctx->task);
}
diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h
index 7c90e27..22cb00c 100644
--- a/mali_kbase/context/mali_kbase_context.h
+++ b/mali_kbase/context/mali_kbase_context.h
@@ -56,8 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx);
* BASEP_CONTEXT_CREATE_KERNEL_FLAGS.
* @api_version: Application program interface version, as encoded in
* a single integer by the KBASE_API_VERSION macro.
- * @filp: Pointer to the struct file corresponding to device file
- * /dev/malixx instance, passed to the file's open method.
+ * @kfile: Pointer to the object representing the /dev/malixx device
+ * file instance. Shall be passed as NULL for internally created
+ * contexts.
*
* Up to one context can be created for each client that opens the device file
* /dev/malixx. Context creation is deferred until a special ioctl() system call
@@ -69,7 +70,7 @@ struct kbase_context *
kbase_create_context(struct kbase_device *kbdev, bool is_compat,
base_context_create_flags const flags,
unsigned long api_version,
- struct file *filp);
+ struct kbase_file *const kfile);
/**
* kbase_destroy_context - Destroy a kernel base context.
diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild
index c5438f0..c626092 100644
--- a/mali_kbase/csf/Kbuild
+++ b/mali_kbase/csf/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -32,6 +32,7 @@ mali_kbase-y += \
csf/mali_kbase_csf_csg_debugfs.o \
csf/mali_kbase_csf_kcpu_debugfs.o \
csf/mali_kbase_csf_sync_debugfs.o \
+ csf/mali_kbase_csf_kcpu_fence_debugfs.o \
csf/mali_kbase_csf_protected_memory.o \
csf/mali_kbase_csf_tiler_heap_debugfs.o \
csf/mali_kbase_csf_cpu_queue_debugfs.o \
diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
index 4336705..bbf2e4e 100644
--- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
+++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c
@@ -64,12 +64,19 @@
* struct kbase_ipa_control_listener_data - Data for the GPU clock frequency
* listener
*
- * @listener: GPU clock frequency listener.
- * @kbdev: Pointer to kbase device.
+ * @listener: GPU clock frequency listener.
+ * @kbdev: Pointer to kbase device.
+ * @clk_chg_wq: Dedicated workqueue to process the work item corresponding to
+ * a clock rate notification.
+ * @clk_chg_work: Work item to process the clock rate change
+ * @rate: The latest notified rate change, in unit of Hz
*/
struct kbase_ipa_control_listener_data {
struct kbase_clk_rate_listener listener;
struct kbase_device *kbdev;
+ struct workqueue_struct *clk_chg_wq;
+ struct work_struct clk_chg_work;
+ atomic_t rate;
};
static u32 timer_value(u32 gpu_rate)
@@ -271,52 +278,61 @@ kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener,
u32 clk_index, u32 clk_rate_hz)
{
if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) {
- size_t i;
struct kbase_ipa_control_listener_data *listener_data =
- container_of(listener,
- struct kbase_ipa_control_listener_data,
- listener);
- struct kbase_device *kbdev = listener_data->kbdev;
- struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
-
- lockdep_assert_held(&kbdev->hwaccess_lock);
- if (!kbdev->pm.backend.gpu_ready) {
- dev_err(kbdev->dev,
- "%s: GPU frequency cannot change while GPU is off",
- __func__);
- return;
- }
+ container_of(listener, struct kbase_ipa_control_listener_data, listener);
+
+ /* Save the rate and delegate the job to a work item */
+ atomic_set(&listener_data->rate, clk_rate_hz);
+ queue_work(listener_data->clk_chg_wq, &listener_data->clk_chg_work);
+ }
+}
- /* Interrupts are already disabled and interrupt state is also saved */
- spin_lock(&ipa_ctrl->lock);
+static void kbase_ipa_ctrl_rate_change_worker(struct work_struct *data)
+{
+ struct kbase_ipa_control_listener_data *listener_data =
+ container_of(data, struct kbase_ipa_control_listener_data, clk_chg_work);
+ struct kbase_device *kbdev = listener_data->kbdev;
+ struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
+ unsigned long flags;
+ u32 rate;
+ size_t i;
- for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
- struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- if (session->active) {
- size_t j;
+ if (!kbdev->pm.backend.gpu_ready) {
+ dev_err(kbdev->dev, "%s: GPU frequency cannot change while GPU is off", __func__);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ return;
+ }
- for (j = 0; j < session->num_prfcnts; j++) {
- struct kbase_ipa_control_prfcnt *prfcnt =
- &session->prfcnts[j];
+ spin_lock(&ipa_ctrl->lock);
+ /* Picking up the latest notified rate */
+ rate = (u32)atomic_read(&listener_data->rate);
- if (prfcnt->gpu_norm)
- calc_prfcnt_delta(kbdev, prfcnt, true);
- }
- }
- }
+ for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) {
+ struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i];
- ipa_ctrl->cur_gpu_rate = clk_rate_hz;
+ if (session->active) {
+ size_t j;
- /* Update the timer for automatic sampling if active sessions
- * are present. Counters have already been manually sampled.
- */
- if (ipa_ctrl->num_active_sessions > 0) {
- kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER),
- timer_value(ipa_ctrl->cur_gpu_rate));
+ for (j = 0; j < session->num_prfcnts; j++) {
+ struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[j];
+
+ if (prfcnt->gpu_norm)
+ calc_prfcnt_delta(kbdev, prfcnt, true);
+ }
}
- spin_unlock(&ipa_ctrl->lock);
}
+
+ ipa_ctrl->cur_gpu_rate = rate;
+ /* Update the timer for automatic sampling if active sessions
+ * are present. Counters have already been manually sampled.
+ */
+ if (ipa_ctrl->num_active_sessions > 0)
+ kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), timer_value(rate));
+
+ spin_unlock(&ipa_ctrl->lock);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_ipa_control_init(struct kbase_device *kbdev)
@@ -344,11 +360,27 @@ void kbase_ipa_control_init(struct kbase_device *kbdev)
listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data),
GFP_KERNEL);
if (listener_data) {
- listener_data->listener.notify =
- kbase_ipa_control_rate_change_notify;
- listener_data->kbdev = kbdev;
- ipa_ctrl->rtm_listener_data = listener_data;
- }
+ listener_data->clk_chg_wq =
+ alloc_workqueue("ipa_ctrl_wq", WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (listener_data->clk_chg_wq) {
+ INIT_WORK(&listener_data->clk_chg_work, kbase_ipa_ctrl_rate_change_worker);
+ listener_data->listener.notify = kbase_ipa_control_rate_change_notify;
+ listener_data->kbdev = kbdev;
+ ipa_ctrl->rtm_listener_data = listener_data;
+ /* Initialise to 0, which is out of normal notified rates */
+ atomic_set(&listener_data->rate, 0);
+ } else {
+ dev_warn(kbdev->dev,
+ "%s: failed to allocate workqueue, clock rate update disabled",
+ __func__);
+ kfree(listener_data);
+ listener_data = NULL;
+ }
+ } else
+ dev_warn(kbdev->dev,
+ "%s: failed to allocate memory, IPA control clock rate update disabled",
+ __func__);
+
spin_lock_irqsave(&clk_rtm->lock, flags);
if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP])
ipa_ctrl->cur_gpu_rate =
@@ -370,8 +402,10 @@ void kbase_ipa_control_term(struct kbase_device *kbdev)
WARN_ON(ipa_ctrl->num_active_sessions);
- if (listener_data)
+ if (listener_data) {
kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener);
+ destroy_workqueue(listener_data->clk_chg_wq);
+ }
kfree(ipa_ctrl->rtm_listener_data);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
@@ -997,14 +1031,11 @@ void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev,
u32 clk_index, u32 clk_rate_hz)
{
struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control;
- struct kbase_ipa_control_listener_data *listener_data =
- ipa_ctrl->rtm_listener_data;
- unsigned long flags;
+ struct kbase_ipa_control_listener_data *listener_data = ipa_ctrl->rtm_listener_data;
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbase_ipa_control_rate_change_notify(&listener_data->listener,
- clk_index, clk_rate_hz);
- spin_lock_irqrestore(&kbdev->hwaccess_lock, flags);
+ kbase_ipa_control_rate_change_notify(&listener_data->listener, clk_index, clk_rate_hz);
+ /* Ensure the callback has taken effect before returning back to the test caller */
+ flush_work(&listener_data->clk_chg_work);
}
KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test);
#endif
@@ -1057,4 +1088,3 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev)
}
}
}
-
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 2e3ced3..8eaedde 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -38,6 +38,7 @@
#include <linux/protected_memory_allocator.h>
#include <tl/mali_kbase_tracepoints.h>
#include "mali_kbase_csf_mcu_shared_reg.h"
+#include <linux/version_compat_defs.h>
#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
@@ -171,19 +172,19 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx,
static void init_user_io_pages(struct kbase_queue *queue)
{
- u32 *input_addr = (u32 *)(queue->user_io_addr);
- u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+ u64 *input_addr = queue->user_io_addr;
+ u64 *output_addr64 = queue->user_io_addr + PAGE_SIZE / sizeof(u64);
+ u32 *output_addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64));
- input_addr[CS_INSERT_LO/4] = 0;
- input_addr[CS_INSERT_HI/4] = 0;
-
- input_addr[CS_EXTRACT_INIT_LO/4] = 0;
- input_addr[CS_EXTRACT_INIT_HI/4] = 0;
-
- output_addr[CS_EXTRACT_LO/4] = 0;
- output_addr[CS_EXTRACT_HI/4] = 0;
-
- output_addr[CS_ACTIVE/4] = 0;
+ /*
+ * CS_INSERT and CS_EXTRACT registers contain 64-bit memory addresses which
+ * should be accessed atomically. Here we update them 32-bits at a time, but
+ * as this is initialisation code, non-atomic accesses are safe.
+ */
+ input_addr[CS_INSERT_LO / sizeof(*input_addr)] = 0;
+ input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] = 0;
+ output_addr64[CS_EXTRACT_LO / sizeof(*output_addr64)] = 0;
+ output_addr32[CS_ACTIVE / sizeof(*output_addr32)] = 0;
}
static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
@@ -205,7 +206,7 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx,
struct page *page_list[2];
pgprot_t cpu_map_prot;
unsigned long flags;
- char *user_io_addr;
+ uint64_t *user_io_addr;
int ret = 0;
size_t i;
@@ -246,7 +247,7 @@ unlock:
static void term_queue_group(struct kbase_queue_group *group);
static void get_queue(struct kbase_queue *queue);
-static void release_queue(struct kbase_queue *queue);
+static bool release_queue(struct kbase_queue *queue);
/**
* kbase_csf_free_command_stream_user_pages() - Free the resources allocated
@@ -400,7 +401,16 @@ static void get_queue(struct kbase_queue *queue)
WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
}
-static void release_queue(struct kbase_queue *queue)
+/**
+ * release_queue() - Release a reference to a GPU queue
+ *
+ * @queue: The queue to release.
+ *
+ * Return: true if the queue has been released.
+ *
+ * The queue will be released when its reference count reaches zero.
+ */
+static bool release_queue(struct kbase_queue *queue)
{
lockdep_assert_held(&queue->kctx->csf.lock);
if (kbase_refcount_dec_and_test(&queue->refcount)) {
@@ -410,7 +420,6 @@ static void release_queue(struct kbase_queue *queue)
dev_dbg(queue->kctx->kbdev->dev,
"Remove any pending command queue fatal from ctx %d_%d",
queue->kctx->tgid, queue->kctx->id);
- kbase_csf_event_remove_error(queue->kctx, &queue->error);
/* After this the Userspace would be able to free the
* memory for GPU queue. In case the Userspace missed
@@ -423,7 +432,11 @@ static void release_queue(struct kbase_queue *queue)
kbase_gpu_vm_unlock(queue->kctx);
kfree(queue);
+
+ return true;
}
+
+ return false;
}
static void oom_event_worker(struct work_struct *data);
@@ -531,37 +544,25 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
queue->size = (queue_size << PAGE_SHIFT);
queue->csi_index = KBASEP_IF_NR_INVALID;
- queue->enabled = false;
queue->priority = reg->priority;
+ /* Default to a safe value, this would be updated on binding */
+ queue->group_priority = KBASE_QUEUE_GROUP_PRIORITY_LOW;
kbase_refcount_set(&queue->refcount, 1);
- queue->group = NULL;
queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
queue->handle = BASEP_MEM_INVALID_HANDLE;
queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
- queue->status_wait = 0;
- queue->sync_ptr = 0;
- queue->sync_value = 0;
-
-#if IS_ENABLED(CONFIG_DEBUG_FS)
- queue->saved_cmd_ptr = 0;
-#endif
-
- queue->sb_status = 0;
queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
- atomic_set(&queue->pending, 0);
-
INIT_LIST_HEAD(&queue->link);
- INIT_LIST_HEAD(&queue->error.link);
+ atomic_set(&queue->pending_kick, 0);
+ INIT_LIST_HEAD(&queue->pending_kick_link);
INIT_WORK(&queue->oom_event_work, oom_event_worker);
INIT_WORK(&queue->cs_error_work, cs_error_worker);
list_add(&queue->link, &kctx->csf.queue_list);
- queue->extract_ofs = 0;
-
region->user_data = queue;
/* Initialize the cs_trace configuration parameters, When buffer_size
@@ -636,6 +637,22 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx,
static void unbind_queue(struct kbase_context *kctx,
struct kbase_queue *queue);
+static void wait_pending_queue_kick(struct kbase_queue *queue)
+{
+ struct kbase_context *const kctx = queue->kctx;
+
+ /* Drain a pending queue kick if any. It should no longer be
+ * possible to issue further queue kicks at this point: either the
+ * queue has been unbound, or the context is being terminated.
+ *
+ * Signal kbase_csf_scheduler_kthread() to allow for the
+ * eventual completion of the current iteration. Once it's done the
+ * event_wait wait queue shall be signalled.
+ */
+ complete(&kctx->kbdev->csf.scheduler.kthread_signal);
+ wait_event(kctx->kbdev->csf.event_wait, atomic_read(&queue->pending_kick) == 0);
+}
+
void kbase_csf_queue_terminate(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_terminate *term)
{
@@ -673,6 +690,18 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx,
queue->queue_reg->user_data = NULL;
kbase_gpu_vm_unlock(kctx);
+ rt_mutex_unlock(&kctx->csf.lock);
+ /* The GPU reset can be allowed now as the queue has been unbound. */
+ if (reset_prevented) {
+ kbase_reset_gpu_allow(kbdev);
+ reset_prevented = false;
+ }
+ wait_pending_queue_kick(queue);
+ /* The work items can be cancelled as Userspace is terminating the queue */
+ cancel_work_sync(&queue->oom_event_work);
+ cancel_work_sync(&queue->cs_error_work);
+ rt_mutex_lock(&kctx->csf.lock);
+
release_queue(queue);
}
@@ -717,6 +746,7 @@ int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_
bind->out.mmap_handle = queue->handle;
group->bound_queues[bind->in.csi_index] = queue;
queue->group = group;
+ queue->group_priority = group->priority;
queue->csi_index = bind->in.csi_index;
queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS;
@@ -726,12 +756,20 @@ out:
return ret;
}
-static struct kbase_queue_group *get_bound_queue_group(
- struct kbase_queue *queue)
+/**
+ * get_bound_queue_group - Get the group to which a queue was bound
+ *
+ * @queue: Pointer to the queue for this group
+ *
+ * Return: The group to which this queue was bound, or NULL on error.
+ */
+static struct kbase_queue_group *get_bound_queue_group(struct kbase_queue *queue)
{
struct kbase_context *kctx = queue->kctx;
struct kbase_queue_group *group;
+ lockdep_assert_held(&kctx->csf.lock);
+
if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND)
return NULL;
@@ -753,63 +791,6 @@ static struct kbase_queue_group *get_bound_queue_group(
return group;
}
-static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
-{
- kthread_queue_work(&kctx->csf.pending_submission_worker, &kctx->csf.pending_submission_work);
-}
-
-/**
- * pending_submission_worker() - Work item to process pending kicked GPU command queues.
- *
- * @work: Pointer to pending_submission_work.
- *
- * This function starts all pending queues, for which the work
- * was previously submitted via ioctl call from application thread.
- * If the queue is already scheduled and resident, it will be started
- * right away, otherwise once the group is made resident.
- */
-static void pending_submission_worker(struct kthread_work *work)
-{
- struct kbase_context *kctx =
- container_of(work, struct kbase_context, csf.pending_submission_work);
- struct kbase_device *kbdev = kctx->kbdev;
- struct kbase_queue *queue;
- int err = kbase_reset_gpu_prevent_and_wait(kbdev);
-
- if (err) {
- dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
- return;
- }
-
- rt_mutex_lock(&kctx->csf.lock);
-
- /* Iterate through the queue list and schedule the pending ones for submission. */
- list_for_each_entry(queue, &kctx->csf.queue_list, link) {
- if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
- struct kbase_queue_group *group = get_bound_queue_group(queue);
- int ret;
-
- if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
- dev_dbg(kbdev->dev, "queue is not bound to a group");
- continue;
- }
-
- ret = kbase_csf_scheduler_queue_start(queue);
- if (unlikely(ret)) {
- dev_dbg(kbdev->dev, "Failed to start queue");
- if (ret == -EBUSY) {
- atomic_cmpxchg(&queue->pending, 0, 1);
- enqueue_gpu_submission_work(kctx);
- }
- }
- }
- }
-
- rt_mutex_unlock(&kctx->csf.lock);
-
- kbase_reset_gpu_allow(kbdev);
-}
-
void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
{
if (WARN_ON(slot < 0))
@@ -902,7 +883,6 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
struct kbase_ioctl_cs_queue_kick *kick)
{
struct kbase_device *kbdev = kctx->kbdev;
- bool trigger_submission = false;
struct kbase_va_region *region;
int err = 0;
@@ -920,9 +900,19 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
if (!kbase_is_region_invalid_or_free(region)) {
struct kbase_queue *queue = region->user_data;
- if (queue) {
- atomic_cmpxchg(&queue->pending, 0, 1);
- trigger_submission = true;
+ if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) {
+ spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+ if (list_empty(&queue->pending_kick_link)) {
+ /* Queue termination shall block until this
+ * kick has been handled.
+ */
+ atomic_inc(&queue->pending_kick);
+ list_add_tail(
+ &queue->pending_kick_link,
+ &kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
+ complete(&kbdev->csf.scheduler.kthread_signal);
+ }
+ spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
}
} else {
dev_dbg(kbdev->dev,
@@ -931,9 +921,6 @@ int kbase_csf_queue_kick(struct kbase_context *kctx,
}
kbase_gpu_vm_unlock(kctx);
- if (likely(trigger_submission))
- enqueue_gpu_submission_work(kctx);
-
return err;
}
@@ -1222,6 +1209,9 @@ static int create_queue_group(struct kbase_context *const kctx,
} else {
int err = 0;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ group->prev_act = false;
+#endif
group->kctx = kctx;
group->handle = group_handle;
group->csg_nr = KBASEP_CSG_NR_INVALID;
@@ -1246,6 +1236,7 @@ static int create_queue_group(struct kbase_context *const kctx,
group->dvs_buf = create->in.dvs_buf;
+
#if IS_ENABLED(CONFIG_DEBUG_FS)
group->deschedule_deferred_cnt = 0;
#endif
@@ -1256,8 +1247,6 @@ static int create_queue_group(struct kbase_context *const kctx,
INIT_LIST_HEAD(&group->link);
INIT_LIST_HEAD(&group->link_to_schedule);
INIT_LIST_HEAD(&group->error_fatal.link);
- INIT_LIST_HEAD(&group->error_timeout.link);
- INIT_LIST_HEAD(&group->error_tiler_oom.link);
INIT_WORK(&group->timer_event_work, timer_event_worker);
kthread_init_work(&group->protm_event_work, protm_event_worker);
bitmap_zero(group->protm_pending_bitmap,
@@ -1307,7 +1296,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
const u32 compute_count = hweight64(create->in.compute_mask);
size_t i;
- for (i = 0; i < sizeof(create->in.padding); i++) {
+ for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
if (create->in.padding[i] != 0) {
dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
return -EINVAL;
@@ -1316,8 +1305,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
rt_mutex_lock(&kctx->csf.lock);
- if ((create->in.tiler_max > tiler_count) ||
- (create->in.fragment_max > fragment_count) ||
+ if ((create->in.tiler_max > tiler_count) || (create->in.fragment_max > fragment_count) ||
(create->in.compute_max > compute_count)) {
dev_dbg(kctx->kbdev->dev,
"Invalid maximum number of endpoints for a queue group");
@@ -1335,8 +1323,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx,
dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
err = -EINVAL;
- } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
- create->in.dvs_buf) {
+ } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && create->in.dvs_buf) {
dev_warn(
kctx->kbdev->dev,
"GPU does not support DVS but userspace is trying to use it");
@@ -1512,8 +1499,6 @@ static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
"Remove any pending group fatal error from context %pK\n",
(void *)group->kctx);
- kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
- kbase_csf_event_remove_error(kctx, &group->error_timeout);
kbase_csf_event_remove_error(kctx, &group->error_fatal);
}
@@ -1681,61 +1666,79 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq",
WQ_UNBOUND, 1);
- if (unlikely(!kctx->csf.wq))
- goto out;
- err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
- &kctx->csf.pending_submission_worker, "mali_submit");
- if (err) {
- dev_err(kctx->kbdev->dev, "error initializing pending submission worker thread");
- goto out_err_submission_kthread;
- }
+ if (likely(kctx->csf.wq)) {
+ err = kbase_csf_scheduler_context_init(kctx);
- err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
- &kctx->csf.protm_event_worker, "mali_protm_event");
- if (err) {
- dev_err(kctx->kbdev->dev, "error initializing protm event worker thread");
- goto out_err_protm_kthread;
- }
+ if (likely(!err)) {
+ err = kbase_csf_kcpu_queue_context_init(kctx);
- err = kbase_csf_scheduler_context_init(kctx);
- if (unlikely(err))
- goto out_err_scheduler_context;
+ if (likely(!err)) {
+ err = kbase_csf_tiler_heap_context_init(kctx);
- err = kbase_csf_kcpu_queue_context_init(kctx);
- if (unlikely(err))
- goto out_err_kcpu_queue_context;
+ if (likely(!err)) {
+ rt_mutex_init(&kctx->csf.lock);
- err = kbase_csf_tiler_heap_context_init(kctx);
- if (unlikely(err))
- goto out_err_tiler_heap_context;
+ err = kbasep_ctx_user_reg_page_mapping_init(kctx);
- rt_mutex_init(&kctx->csf.lock);
- kthread_init_work(&kctx->csf.pending_submission_work,
- pending_submission_worker);
+ if (likely(!err)) {
+ err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn,
+ &kctx->csf.protm_event_worker, "mali_protm_event");
+ if (unlikely(err)) {
+ dev_err(kctx->kbdev->dev, "error initializing protm event worker thread");
+ kbasep_ctx_user_reg_page_mapping_term(kctx);
+ }
+ }
- err = kbasep_ctx_user_reg_page_mapping_init(kctx);
- if (unlikely(err))
- goto out_err_user_reg_page_mapping_init;
+ if (unlikely(err))
+ kbase_csf_tiler_heap_context_term(kctx);
+ }
- return err;
+ if (unlikely(err))
+ kbase_csf_kcpu_queue_context_term(kctx);
+ }
+
+ if (unlikely(err))
+ kbase_csf_scheduler_context_term(kctx);
+ }
+
+ if (unlikely(err))
+ destroy_workqueue(kctx->csf.wq);
+ }
-out_err_user_reg_page_mapping_init:
- kbase_csf_tiler_heap_context_term(kctx);
-out_err_tiler_heap_context:
- kbase_csf_kcpu_queue_context_term(kctx);
-out_err_kcpu_queue_context:
- kbase_csf_scheduler_context_term(kctx);
-out_err_scheduler_context:
- kbase_destroy_kworker_stack(&kctx->csf.protm_event_worker);
-out_err_protm_kthread:
- kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker);
-out_err_submission_kthread:
- destroy_workqueue(kctx->csf.wq);
-out:
return err;
}
+void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx,
+ struct kbase_fault *fault)
+{
+ struct base_gpu_queue_group_error err_payload =
+ (struct base_gpu_queue_group_error){ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+ .payload = { .fatal_group = {
+ .sideband = fault->addr,
+ .status = fault->status,
+ } } };
+ struct kbase_device *kbdev = kctx->kbdev;
+ const u32 num_groups = kbdev->csf.global_iface.group_num;
+ unsigned long flags;
+ int csg_nr;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ for (csg_nr = 0; csg_nr < num_groups; csg_nr++) {
+ struct kbase_queue_group *const group =
+ kbdev->csf.scheduler.csg_slots[csg_nr].resident_group;
+
+ if (!group || (group->kctx != kctx))
+ continue;
+
+ group->faulted = true;
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ }
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+}
+
void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
struct kbase_fault *fault)
{
@@ -1777,6 +1780,9 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) {
term_queue_group(group);
+ /* This would effectively be a NOP if the fatal error was already added to
+ * the error_list by kbase_csf_ctx_report_page_fault_for_active_groups().
+ */
kbase_csf_add_group_fatal_error(group, &err_payload);
reported = true;
}
@@ -1833,8 +1839,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
- kthread_cancel_work_sync(&kctx->csf.pending_submission_work);
-
/* Now that all queue groups have been terminated, there can be no
* more OoM or timer event interrupts but there can be inflight work
* items. Destroying the wq will implicitly flush those work items.
@@ -1873,6 +1877,12 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
queue = list_first_entry(&kctx->csf.queue_list,
struct kbase_queue, link);
+ list_del_init(&queue->link);
+
+ rt_mutex_unlock(&kctx->csf.lock);
+ wait_pending_queue_kick(queue);
+ rt_mutex_lock(&kctx->csf.lock);
+
/* The reference held when the IO mapping was created on bind
* would have been dropped otherwise the termination of Kbase
* context itself wouldn't have kicked-in. So there shall be
@@ -1880,15 +1890,13 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
* registered.
*/
WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
- list_del_init(&queue->link);
+
release_queue(queue);
}
rt_mutex_unlock(&kctx->csf.lock);
- kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker);
kbase_destroy_kworker_stack(&kctx->csf.protm_event_worker);
-
kbasep_ctx_user_reg_page_mapping_term(kctx);
kbase_csf_tiler_heap_context_term(kctx);
kbase_csf_kcpu_queue_context_term(kctx);
@@ -1992,16 +2000,13 @@ static void report_tiler_oom_error(struct kbase_queue_group *group)
} } } };
kbase_csf_event_add_error(group->kctx,
- &group->error_tiler_oom,
+ &group->error_fatal,
&error);
kbase_event_wakeup_sync(group->kctx);
}
static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
{
- int err;
- const unsigned int cache_flush_wait_timeout_ms = 2000;
-
kbase_pm_lock(kbdev);
/* With the advent of partial cache flush, dirty cache lines could
* be left in the GPU L2 caches by terminating the queue group here
@@ -2011,17 +2016,12 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
*/
if (kbdev->pm.backend.gpu_powered) {
kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
- err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
-
- if (err) {
+ if (kbase_gpu_wait_cache_clean_timeout(kbdev,
+ kbdev->mmu_or_gpu_cache_op_wait_time_ms))
dev_warn(
kbdev->dev,
- "[%llu] Timeout waiting for cache clean to complete after fatal error",
+ "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC to complete after fatal error",
kbase_backend_get_cycle_cnt(kbdev));
-
- if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
- kbase_reset_gpu(kbdev);
- }
}
kbase_pm_unlock(kbdev);
@@ -2153,7 +2153,6 @@ static void oom_event_worker(struct work_struct *data)
rt_mutex_lock(&kctx->csf.lock);
kbase_queue_oom_event(queue);
- release_queue(queue);
rt_mutex_unlock(&kctx->csf.lock);
kbase_reset_gpu_allow(kbdev);
@@ -2180,7 +2179,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group)
"Notify the event notification thread, forward progress timeout (%llu cycles)\n",
kbase_csf_timeout_get(group->kctx->kbdev));
- kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
+ kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
kbase_event_wakeup_sync(group->kctx);
}
@@ -2406,12 +2405,10 @@ handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
(cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
- get_queue(queue);
queue->cs_error = cs_fault;
queue->cs_error_info = cs_fault_info;
queue->cs_error_fatal = false;
- if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
- release_queue(queue);
+ queue_work(queue->kctx->csf.wq, &queue->cs_error_work);
return;
}
}
@@ -2422,31 +2419,29 @@ handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
}
-static void report_queue_fatal_error(struct kbase_queue *const queue,
- u32 cs_fatal, u64 cs_fatal_info,
- u8 group_handle)
+static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fatal,
+ u64 cs_fatal_info, struct kbase_queue_group *group)
{
- struct base_csf_notification error = {
- .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
- .payload = {
- .csg_error = {
- .handle = group_handle,
- .error = {
- .error_type =
- BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
- .payload = {
- .fatal_queue = {
- .sideband = cs_fatal_info,
- .status = cs_fatal,
- .csi_index = queue->csi_index,
- }
- }
- }
- }
- }
- };
+ struct base_csf_notification
+ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .error = { .error_type =
+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+ .payload = { .fatal_queue = {
+ .sideband = cs_fatal_info,
+ .status = cs_fatal,
+ } } } } } };
+
+ if (!queue)
+ return;
+
+ if (WARN_ON_ONCE(!group))
+ return;
- kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
+ error.payload.csg_error.handle = group->handle;
+ error.payload.csg_error.error.payload.fatal_queue.csi_index = queue->csi_index;
+ kbase_csf_event_add_error(queue->kctx, &group->error_fatal, &error);
kbase_event_wakeup_sync(queue->kctx);
}
@@ -2461,10 +2456,10 @@ static void cs_error_worker(struct work_struct *const data)
{
struct kbase_queue *const queue =
container_of(data, struct kbase_queue, cs_error_work);
+ const u32 cs_fatal_exception_type = CS_FATAL_EXCEPTION_TYPE_GET(queue->cs_error);
struct kbase_context *const kctx = queue->kctx;
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_queue_group *group;
- u8 group_handle;
bool reset_prevented = false;
int err;
@@ -2511,14 +2506,22 @@ static void cs_error_worker(struct work_struct *const data)
}
#endif
- group_handle = group->handle;
term_queue_group(group);
flush_gpu_cache_on_fatal_error(kbdev);
- report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
- group_handle);
+ /* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the
+ * page fault handler disables the AS of faulty context. Need to skip reporting the
+ * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info.
+ * Page fault handler will report the fatal error with full page fault info.
+ */
+ if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && group->faulted) {
+ dev_dbg(kbdev->dev,
+ "Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d",
+ queue->csi_index, group->handle, kctx->tgid, kctx->id);
+ } else {
+ report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group);
+ }
unlock:
- release_queue(queue);
rt_mutex_unlock(&kctx->csf.lock);
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
@@ -2580,12 +2583,10 @@ handle_fatal_event(struct kbase_queue *const queue,
if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu(queue->kctx->kbdev);
}
- get_queue(queue);
queue->cs_error = cs_fatal;
queue->cs_error_info = cs_fatal_info;
queue->cs_error_fatal = true;
- if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
- release_queue(queue);
+ queue_work(queue->kctx->csf.wq, &queue->cs_error_work);
}
kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
@@ -2672,7 +2673,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
(cs_ack & CS_ACK_TILER_OOM_MASK))) {
- get_queue(queue);
KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
group, queue, cs_req ^ cs_ack);
if (!queue_work(wq, &queue->oom_event_work)) {
@@ -2686,7 +2686,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
"Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
queue->csi_index, group->handle, queue->kctx->tgid,
queue->kctx->id);
- release_queue(queue);
}
}
@@ -2797,17 +2796,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c
KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
- if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
- kbase_csf_firmware_csg_input_mask(ginfo,
- CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
-
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
-
- /* SYNC_UPDATE events shall invalidate GPU idle event */
- atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
-
- kbase_csf_event_signal_cpu_only(group->kctx);
- }
+ kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, req, ack);
if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
@@ -3117,13 +3106,16 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
do {
unsigned long flags;
u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
- struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
bool glb_idle_irq_received = false;
kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
order_job_irq_clear_with_iface_mem_read();
if (csg_interrupts != 0) {
+ struct irq_idle_and_protm_track track = { .protm_grp = NULL,
+ .idle_seq = U32_MAX,
+ .idle_slot = S8_MAX };
+
kbase_csf_scheduler_spin_lock(kbdev, &flags);
/* Looping through and track the highest idle and protm groups */
while (csg_interrupts != 0) {
@@ -3220,6 +3212,24 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
}
+void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev,
+ struct kbase_csf_cmd_stream_group_info *ginfo,
+ struct kbase_queue_group *group, u32 req, u32 ack)
+{
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
+
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
+
+ /* SYNC_UPDATE events shall invalidate GPU idle event */
+ atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
+
+ kbase_csf_event_signal_cpu_only(group->kctx);
+ }
+}
+
void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev)
{
if (kbdev->csf.db_filp) {
@@ -3258,6 +3268,28 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
return 0;
}
+void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev)
+{
+ size_t i;
+
+ for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i)
+ INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]);
+ spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock);
+}
+
+void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev)
+{
+ size_t i;
+
+ spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+ for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) {
+ if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i]))
+ dev_warn(kbdev->dev,
+ "Some GPU queue kicks for priority %zu were not handled", i);
+ }
+ spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+}
+
void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
{
if (kbdev->csf.user_reg.filp) {
@@ -3290,7 +3322,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
}
page = as_page(phys);
- addr = kmap_atomic(page);
+ addr = kbase_kmap_atomic(page);
/* Write a special value for the latest flush register inside the
* dummy page
@@ -3299,7 +3331,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
DMA_BIDIRECTIONAL);
- kunmap_atomic(addr);
+ kbase_kunmap_atomic(addr);
kbdev->csf.user_reg.filp = filp;
kbdev->csf.user_reg.dummy_page = phys;
@@ -3320,3 +3352,60 @@ u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority)
return out_priority;
}
+
+void kbase_csf_process_queue_kick(struct kbase_queue *queue)
+{
+ struct kbase_context *kctx = queue->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ bool retry_kick = false;
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+
+ if (err) {
+ dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue");
+ goto out_release_queue;
+ }
+
+ rt_mutex_lock(&kctx->csf.lock);
+
+ if (queue->bind_state != KBASE_CSF_QUEUE_BOUND)
+ goto out_allow_gpu_reset;
+
+ err = kbase_csf_scheduler_queue_start(queue);
+ if (unlikely(err)) {
+ dev_dbg(kbdev->dev, "Failed to start queue");
+ if (err == -EBUSY) {
+ retry_kick = true;
+
+ spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+ if (list_empty(&queue->pending_kick_link)) {
+ /* A failed queue kick shall be pushed to the
+ * back of the queue to avoid potential abuse.
+ */
+ list_add_tail(
+ &queue->pending_kick_link,
+ &kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
+ spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+ } else {
+ spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+ WARN_ON(atomic_read(&queue->pending_kick) == 0);
+ }
+
+ complete(&kbdev->csf.scheduler.kthread_signal);
+ }
+ }
+
+out_allow_gpu_reset:
+ if (likely(!retry_kick)) {
+ WARN_ON(atomic_read(&queue->pending_kick) == 0);
+ atomic_dec(&queue->pending_kick);
+ }
+
+ rt_mutex_unlock(&kctx->csf.lock);
+
+ kbase_reset_gpu_allow(kbdev);
+
+ return;
+out_release_queue:
+ WARN_ON(atomic_read(&queue->pending_kick) == 0);
+ atomic_dec(&queue->pending_kick);
+}
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 35d0331..29119e1 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -49,8 +49,8 @@
#define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX)
/* 60ms optimizes power while minimizing latency impact for UI test cases. */
-#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US (600)
-#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (60000) /* Default 60 milliseconds */
+#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS (600 * 1000)
+#define FIRMWARE_IDLE_HYSTERESIS_TIME_NS (60 * 1000 * 1000) /* Default 60 milliseconds */
/* Idle hysteresis time can be scaled down when GPU sleep feature is used */
#define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5)
@@ -78,6 +78,18 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
struct kbase_fault *fault);
/**
+ * kbase_csf_ctx_report_page_fault_for_active_groups - Notify Userspace about GPU page fault
+ * for active groups of the faulty context.
+ *
+ * @kctx: Pointer to faulty kbase context.
+ * @fault: Pointer to the fault.
+ *
+ * This function notifies the event notification thread of the GPU page fault.
+ */
+void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx,
+ struct kbase_fault *fault);
+
+/**
* kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space.
*
* @kctx: Pointer to the kbase context which is being terminated.
@@ -315,6 +327,19 @@ void kbase_csf_add_group_fatal_error(
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val);
/**
+ * kbase_csf_handle_csg_sync_update - Handle SYNC_UPDATE notification for the group.
+ *
+ * @kbdev: The kbase device to handle the SYNC_UPDATE interrupt.
+ * @ginfo: Pointer to the CSG interface used by the @group
+ * @group: Pointer to the GPU command queue group.
+ * @req: CSG_REQ register value corresponding to @group.
+ * @ack: CSG_ACK register value corresponding to @group.
+ */
+void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev,
+ struct kbase_csf_cmd_stream_group_info *ginfo,
+ struct kbase_queue_group *group, u32 req, u32 ack);
+
+/**
* kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates
* the update of userspace mapping of HW
* doorbell page.
@@ -363,6 +388,22 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
/**
+ * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling
+ * GPU queue kicks.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_pending_gpuq_kicks_init - De-initialize the data used for handling
+ * GPU queue kicks.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ */
+void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev);
+
+/**
* kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
@@ -505,4 +546,17 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
#endif
}
+/**
+ * kbase_csf_process_queue_kick() - Process a pending kicked GPU command queue.
+ *
+ * @queue: Pointer to the queue to process.
+ *
+ * This function starts the pending queue, for which the work
+ * was previously submitted via ioctl call from application thread.
+ * If the queue is already scheduled and resident, it will be started
+ * right away, otherwise once the group is made resident.
+ */
+void kbase_csf_process_queue_kick(struct kbase_queue *queue);
+
+
#endif /* _KBASE_CSF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
index 44221b0..a319a4a 100644
--- a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -126,30 +126,24 @@ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx)
int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
u64 buffer, size_t buf_size)
{
- int err = 0;
-
size_t alloc_size = buf_size;
char *dump_buffer;
if (!buffer || !alloc_size)
- goto done;
+ return 0;
alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1);
dump_buffer = kzalloc(alloc_size, GFP_KERNEL);
- if (ZERO_OR_NULL_PTR(dump_buffer)) {
- err = -ENOMEM;
- goto done;
- }
+ if (!dump_buffer)
+ return -ENOMEM;
WARN_ON(kctx->csf.cpu_queue.buffer != NULL);
- err = copy_from_user(dump_buffer,
+ if (copy_from_user(dump_buffer,
u64_to_user_ptr(buffer),
- buf_size);
- if (err) {
+ buf_size)) {
kfree(dump_buffer);
- err = -EFAULT;
- goto done;
+ return -EFAULT;
}
rt_mutex_lock(&kctx->csf.lock);
@@ -161,13 +155,12 @@ int kbase_csf_cpu_queue_dump(struct kbase_context *kctx,
kctx->csf.cpu_queue.buffer = dump_buffer;
kctx->csf.cpu_queue.buffer_size = buf_size;
complete_all(&kctx->csf.cpu_queue.dump_cmp);
- } else {
+ } else
kfree(dump_buffer);
- }
rt_mutex_unlock(&kctx->csf.lock);
-done:
- return err;
+
+ return 0;
}
#else
/*
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
index a45b588..c94e656 100644
--- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -287,7 +287,8 @@ static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file,
static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
struct kbase_queue *queue)
{
- u32 *addr;
+ u64 *addr;
+ u32 *addr32;
u64 cs_extract;
u64 cs_insert;
u32 cs_active;
@@ -309,12 +310,14 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file,
!queue->group))
return;
- addr = (u32 *)queue->user_io_addr;
- cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32);
+ addr = queue->user_io_addr;
+ cs_insert = addr[CS_INSERT_LO / sizeof(*addr)];
- addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
- cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32);
- cs_active = addr[CS_ACTIVE/4];
+ addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr);
+ cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)];
+
+ addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(*addr));
+ cs_active = addr32[CS_ACTIVE / sizeof(*addr32)];
#define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \
"Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n"
@@ -446,22 +449,20 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file,
group->csg_nr);
seq_puts(file, "*** The following group-record is likely stale\n");
}
+ seq_puts(
+ file,
+ "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
+ seq_printf(
+ file,
+ "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
+ group->handle, group->csg_nr, slot_priority, group->run_state,
+ group->priority, CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c),
+ CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r),
+ CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c),
+ CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
+ CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
+ CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), exclusive, idle);
- seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n");
- seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n",
- group->handle,
- group->csg_nr,
- slot_priority,
- group->run_state,
- group->priority,
- CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c),
- CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r),
- CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c),
- CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r),
- CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c),
- CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r),
- exclusive,
- idle);
} else {
seq_puts(file, "GroupID, CSG NR, Run State, Priority\n");
seq_printf(file, "%7d, %6d, %9d, %8d\n",
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index cb4e5eb..ef973b7 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -265,15 +265,18 @@ enum kbase_queue_group_priority {
* @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired
* Shader, L2 and MCU state.
* @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
- * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended.
+ * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended.
* @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
* @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
* to a ping from KBase.
* @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang.
* @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion
- * of a MMU operation
+ * of a MMU operation.
+ * @KCPU_FENCE_SIGNAL_TIMEOUT: Waiting time in ms for triggering a KCPU queue sync state dump
* @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
* the enum.
+ * @KBASE_DEFAULT_TIMEOUT: Default timeout used when an invalid selector is passed
+ * to the pre-computed timeout getter.
*/
enum kbase_timeout_selector {
CSF_FIRMWARE_TIMEOUT,
@@ -284,9 +287,11 @@ enum kbase_timeout_selector {
CSF_FIRMWARE_PING_TIMEOUT,
CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
MMU_AS_INACTIVE_WAIT_TIMEOUT,
+ KCPU_FENCE_SIGNAL_TIMEOUT,
/* Must be the last in the enum */
- KBASE_TIMEOUT_SELECTOR_COUNT
+ KBASE_TIMEOUT_SELECTOR_COUNT,
+ KBASE_DEFAULT_TIMEOUT = CSF_FIRMWARE_TIMEOUT
};
/**
@@ -324,6 +329,14 @@ struct kbase_csf_notification {
* It is in page units.
* @link: Link to the linked list of GPU command queues created per
* GPU address space.
+ * @pending_kick: Indicates whether there is a pending kick to be handled.
+ * @pending_kick_link: Link to the linked list of GPU command queues that have
+ * been kicked, but the kick has not yet been processed.
+ * This link would be deleted right before the kick is
+ * handled to allow for future kicks to occur in the mean
+ * time. For this reason, this must not be used to check
+ * for the presence of a pending queue kick. @pending_kick
+ * should be used instead.
* @refcount: Reference count, stands for the number of times the queue
* has been referenced. The reference is taken when it is
* created, when it is bound to the group and also when the
@@ -336,6 +349,7 @@ struct kbase_csf_notification {
* @base_addr: Base address of the CS buffer.
* @size: Size of the CS buffer.
* @priority: Priority of this queue within the group.
+ * @group_priority: Priority of the group to which this queue has been bound.
* @bind_state: Bind state of the queue as enum @kbase_csf_queue_bind_state
* @csi_index: The ID of the assigned CS hardware interface.
* @enabled: Indicating whether the CS is running, or not.
@@ -363,7 +377,6 @@ struct kbase_csf_notification {
* @trace_offset_ptr: Pointer to the CS trace buffer offset variable.
* @trace_buffer_size: CS trace buffer size for the queue.
* @trace_cfg: CS trace configuration parameters.
- * @error: GPU command queue fatal information to pass to user space.
* @cs_error_work: Work item to handle the CS fatal event reported for this
* queue or the CS fault event if dump on fault is enabled
* and acknowledgment for CS fault event needs to be done
@@ -373,7 +386,6 @@ struct kbase_csf_notification {
* @cs_error: Records information about the CS fatal event or
* about CS fault event if dump on fault is enabled.
* @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred.
- * @pending: Indicating whether the queue has new submitted work.
* @extract_ofs: The current EXTRACT offset, this is only updated when handling
* the GLB IDLE IRQ if the idle timeout value is non-0 in order
* to help detect a queue's true idle status.
@@ -386,11 +398,13 @@ struct kbase_queue {
struct kbase_context *kctx;
u64 user_io_gpu_va;
struct tagged_addr phys[2];
- char *user_io_addr;
+ u64 *user_io_addr;
u64 handle;
int doorbell_nr;
unsigned long db_file_offset;
struct list_head link;
+ atomic_t pending_kick;
+ struct list_head pending_kick_link;
kbase_refcount_t refcount;
struct kbase_queue_group *group;
struct kbase_va_region *queue_reg;
@@ -398,6 +412,7 @@ struct kbase_queue {
u64 base_addr;
u32 size;
u8 priority;
+ u8 group_priority;
s8 csi_index;
enum kbase_csf_queue_bind_state bind_state;
bool enabled;
@@ -410,12 +425,10 @@ struct kbase_queue {
u64 trace_offset_ptr;
u32 trace_buffer_size;
u32 trace_cfg;
- struct kbase_csf_notification error;
struct work_struct cs_error_work;
u64 cs_error_info;
u32 cs_error;
bool cs_error_fatal;
- atomic_t pending;
u64 extract_ofs;
#if IS_ENABLED(CONFIG_DEBUG_FS)
u64 saved_cmd_ptr;
@@ -514,10 +527,6 @@ struct kbase_protected_suspend_buffer {
* have pending protected mode entry requests.
* @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be
* returned to userspace if such an error has occurred.
- * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT
- * to be returned to userspace if such an error has occurred.
- * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM
- * to be returned to userspace if such an error has occurred.
* @timer_event_work: Work item to handle the progress timeout fatal event
* for the group.
* @deschedule_deferred_cnt: Counter keeping a track of the number of threads
@@ -544,6 +553,7 @@ struct kbase_queue_group {
u8 compute_max;
u8 csi_handlers;
+
u64 tiler_mask;
u64 fragment_mask;
u64 compute_mask;
@@ -566,8 +576,6 @@ struct kbase_queue_group {
DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP);
struct kbase_csf_notification error_fatal;
- struct kbase_csf_notification error_timeout;
- struct kbase_csf_notification error_tiler_oom;
struct work_struct timer_event_work;
@@ -582,6 +590,12 @@ struct kbase_queue_group {
#endif
void *csg_reg;
u8 csg_reg_bind_retries;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ /**
+ * @prev_act: Previous CSG activity transition in a GPU metrics.
+ */
+ bool prev_act;
+#endif
};
/**
@@ -834,8 +848,6 @@ struct kbase_csf_user_reg_context {
* @link: Link to this csf context in the 'runnable_kctxs' list of
* the scheduler instance
* @sched: Object representing the scheduler's context
- * @pending_submission_worker: Worker for the pending submission work item
- * @pending_submission_work: Work item to process pending kicked GPU command queues.
* @protm_event_worker: Worker to process requests to enter protected mode.
* @cpu_queue: CPU queue information. Only be available when DEBUG_FS
* is enabled.
@@ -855,8 +867,6 @@ struct kbase_csf_context {
struct workqueue_struct *wq;
struct list_head link;
struct kbase_csf_scheduler_context sched;
- struct kthread_worker pending_submission_worker;
- struct kthread_work pending_submission_work;
struct kthread_worker protm_event_worker;
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_csf_cpu_queue_context cpu_queue;
@@ -1004,21 +1014,19 @@ struct kbase_csf_mcu_shared_regions {
* "tock" schedule operation concluded. Used for
* evaluating the exclusion window for in-cycle
* schedule operation.
+ * @csf_worker: Dedicated kthread_worker to execute the @tick_work.
* @timer_enabled: Whether the CSF scheduler wakes itself up for
* periodic scheduling tasks. If this value is 0
* then it will only perform scheduling under the
* influence of external factors e.g., IRQs, IOCTLs.
- * @csf_worker: Dedicated kthread_worker to execute the @tick_work.
* @tick_timer: High-resolution timer employed to schedule tick
* workqueue items (kernel-provided delayed_work
* items do not use hrtimer and for some reason do
* not provide sufficiently reliable periodicity).
- * @tick_work: Work item that performs the "schedule on tick"
- * operation to implement timeslice-based scheduling.
- * @tock_work: Work item that would perform the schedule on tock
- * operation to implement the asynchronous scheduling.
- * @pending_tock_work: Indicates that the tock work item should re-execute
- * once it's finished instead of going back to sleep.
+ * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should perform
+ * a scheduling tick.
+ * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should perform
+ * a scheduling tock.
* @ping_work: Work item that would ping the firmware at regular
* intervals, only if there is a single active CSG
* slot, to check if firmware is alive and would
@@ -1064,13 +1072,6 @@ struct kbase_csf_mcu_shared_regions {
* after GPU and L2 cache have been powered up. So when
* this count is zero, MCU will not be powered up.
* @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds.
- * @tick_timer_active: Indicates whether the @tick_timer is effectively
- * active or not, as the callback function of
- * @tick_timer will enqueue @tick_work only if this
- * flag is true. This is mainly useful for the case
- * when scheduling tick needs to be advanced from
- * interrupt context, without actually deactivating
- * the @tick_timer first and then enqueing @tick_work.
* @tick_protm_pending_seq: Scan out sequence number of the group that has
* protected mode execution pending for the queue(s)
* bound to it and will be considered first for the
@@ -1097,6 +1098,12 @@ struct kbase_csf_mcu_shared_regions {
* @mcu_regs_data: Scheduler MCU shared regions data for managing the
* shared interface mappings for on-slot queues and
* CSG suspend buffers.
+ * @kthread_signal: Used to wake up the GPU queue submission
+ * thread when a queue needs attention.
+ * @kthread_running: Whether the GPU queue submission thread should keep
+ * executing.
+ * @gpuq_kthread: High-priority thread used to handle GPU queue
+ * submissions.
*/
struct kbase_csf_scheduler {
struct rt_mutex lock;
@@ -1118,11 +1125,10 @@ struct kbase_csf_scheduler {
DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS);
DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS);
unsigned long last_schedule;
- bool timer_enabled;
struct kthread_worker csf_worker;
+ atomic_t timer_enabled;
struct hrtimer tick_timer;
- struct kthread_work tick_work;
- struct kthread_delayed_work tock_work;
+ atomic_t pending_tick_work;
atomic_t pending_tock_work;
struct delayed_work ping_work;
struct kbase_context *top_ctx;
@@ -1140,7 +1146,6 @@ struct kbase_csf_scheduler {
u32 non_idle_scanout_grps;
u32 pm_active_count;
unsigned int csg_scheduling_period_ms;
- bool tick_timer_active;
u32 tick_protm_pending_seq;
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
struct work_struct sc_rails_off_work;
@@ -1151,6 +1156,15 @@ struct kbase_csf_scheduler {
ktime_t protm_enter_time;
struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr;
struct kbase_csf_mcu_shared_regions mcu_regs_data;
+ struct completion kthread_signal;
+ bool kthread_running;
+ struct task_struct *gpuq_kthread;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ /**
+ * @gpu_metrics_tb: Handler of firmware trace buffer for gpu_metrics
+ */
+ struct firmware_trace_buffer *gpu_metrics_tb;
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
};
/*
@@ -1167,9 +1181,9 @@ struct kbase_csf_scheduler {
GLB_PROGRESS_TIMER_TIMEOUT_SCALE)
/*
- * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds.
+ * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of nanosecond.
*/
-#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800)
+#define DEFAULT_GLB_PWROFF_TIMEOUT_NS (800 * 1000)
/*
* In typical operations, the management of the shader core power transitions
@@ -1389,7 +1403,7 @@ struct kbase_csf_mcu_fw {
/*
* Firmware log polling period.
*/
-#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25
+#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT 25
/**
* enum kbase_csf_firmware_log_mode - Firmware log operating mode
@@ -1401,10 +1415,16 @@ struct kbase_csf_mcu_fw {
* @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log
* will be periodically emptied into dmesg, manual reading through debugfs is
* disabled.
+ *
+ * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD: Automatic discarding mode, firmware
+ * log will be periodically discarded, the remaining log can be read manually by
+ * the userspace (and it will also be dumped automatically into dmesg on GPU
+ * reset).
*/
enum kbase_csf_firmware_log_mode {
KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL,
- KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT
+ KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT,
+ KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD
};
/**
@@ -1418,6 +1438,7 @@ enum kbase_csf_firmware_log_mode {
* @dump_buf: Buffer used for dumping the log.
* @func_call_list_va_start: Virtual address of the start of the call list of FW log functions.
* @func_call_list_va_end: Virtual address of the end of the call list of FW log functions.
+ * @poll_period_ms: Firmware log polling period in milliseconds.
*/
struct kbase_csf_firmware_log {
enum kbase_csf_firmware_log_mode mode;
@@ -1426,6 +1447,7 @@ struct kbase_csf_firmware_log {
u8 *dump_buf;
u32 func_call_list_va_start;
u32 func_call_list_va_end;
+ atomic_t poll_period_ms;
};
/**
@@ -1521,7 +1543,7 @@ struct kbase_csf_user_reg {
* image.
* @shared_interface: Pointer to the interface object containing info for
* the memory area shared between firmware & host.
- * @shared_reg_rbtree: RB tree of the memory regions allocated from the
+ * @mcu_shared_zone: Memory zone tracking memory regions allocated from the
* shared interface segment in MCU firmware address
* space.
* @db_filp: Pointer to a dummy file, that alongwith
@@ -1584,22 +1606,28 @@ struct kbase_csf_user_reg {
* fatal event.
* @coredump_work: Work item for initiating a platform core dump.
* @ipa_control: IPA Control component manager.
- * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input
- * in unit of micro-seconds. The firmware does not use
+ * @mcu_core_pwroff_dur_ns: Sysfs attribute for the glb_pwroff timeout input
+ * in unit of nanoseconds. The firmware does not use
* it directly.
* @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input
* in interface required format, ready to be used
* directly in the firmware.
+ * @mcu_core_pwroff_dur_count_modifier: Update csffw_glb_req_cfg_pwroff_timer
+ * to make the shr(10) modifier conditional
+ * on new flag in GLB_PWROFF_TIMER_CONFIG
* @mcu_core_pwroff_reg_shadow: The actual value that has been programed into
* the glb_pwoff register. This is separated from
* the @p mcu_core_pwroff_dur_count as an update
* to the latter is asynchronous.
- * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time
- * window in unit of microseconds. The firmware does not
+ * @gpu_idle_hysteresis_ns: Sysfs attribute for the idle hysteresis time
+ * window in unit of nanoseconds. The firmware does not
* use it directly.
* @gpu_idle_dur_count: The counterpart of the hysteresis time window in
* interface required format, ready to be used
* directly in the firmware.
+ * @gpu_idle_dur_count_modifier: Update csffw_glb_req_idle_enable to make the shr(10)
+ * modifier conditional on the new flag
+ * in GLB_IDLE_TIMER_CONFIG.
* @fw_timeout_ms: Timeout value (in milliseconds) used when waiting
* for any request sent to the firmware.
* @hwcnt: Contain members required for handling the dump of
@@ -1611,6 +1639,12 @@ struct kbase_csf_user_reg {
* @dof: Structure for dump on fault.
* @user_reg: Collective information to support the mapping to
* USER Register page for user processes.
+ * @pending_gpuq_kicks: Lists of GPU queue that have been kicked but not
+ * yet processed, categorised by queue group's priority.
+ * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and
+ * kbase_queue.pending_kick_link.
+ * @quirks_ext: Pointer to an allocated buffer containing the firmware
+ * workarounds configuration.
*/
struct kbase_csf_device {
struct kbase_mmu_table mcu_mmu;
@@ -1620,7 +1654,7 @@ struct kbase_csf_device {
struct kobject *fw_cfg_kobj;
struct kbase_csf_trace_buffers firmware_trace_buffers;
void *shared_interface;
- struct rb_root shared_reg_rbtree;
+ struct kbase_reg_zone mcu_shared_zone;
struct file *db_filp;
u32 db_file_offsets;
struct tagged_addr dummy_db_page;
@@ -1642,11 +1676,13 @@ struct kbase_csf_device {
struct work_struct fw_error_work;
struct work_struct coredump_work;
struct kbase_ipa_control ipa_control;
- u32 mcu_core_pwroff_dur_us;
+ u32 mcu_core_pwroff_dur_ns;
u32 mcu_core_pwroff_dur_count;
+ u32 mcu_core_pwroff_dur_count_modifier;
u32 mcu_core_pwroff_reg_shadow;
- u32 gpu_idle_hysteresis_us;
+ u32 gpu_idle_hysteresis_ns;
u32 gpu_idle_dur_count;
+ u32 gpu_idle_dur_count_modifier;
unsigned int fw_timeout_ms;
struct kbase_csf_hwcnt hwcnt;
struct kbase_csf_mcu_fw fw;
@@ -1662,6 +1698,9 @@ struct kbase_csf_device {
struct kbase_debug_coresight_device coresight;
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
struct kbase_csf_user_reg user_reg;
+ struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
+ spinlock_t pending_gpuq_kicks_lock;
+ u32 *quirks_ext;
};
/**
@@ -1678,10 +1717,6 @@ struct kbase_csf_device {
* @bf_data: Data relating to Bus fault.
* @gf_data: Data relating to GPU fault.
* @current_setup: Stores the MMU configuration for this address space.
- * @is_unresponsive: Flag to indicate MMU is not responding.
- * Set if a MMU command isn't completed within
- * &kbase_device:mmu_as_inactive_wait_time_ms.
- * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -1693,7 +1728,6 @@ struct kbase_as {
struct kbase_fault bf_data;
struct kbase_fault gf_data;
struct kbase_mmu_setup current_setup;
- bool is_unresponsive;
};
#endif /* _KBASE_CSF_DEFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index a4f561b..22f9aeb 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -52,11 +52,12 @@
#include <mmu/mali_kbase_mmu.h>
#include <asm/arch_timer.h>
#include <linux/delay.h>
+#include <linux/version_compat_defs.h>
-#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20)
+#define MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN ((size_t)20)
-static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
-module_param_string(fw_name, fw_name, sizeof(fw_name), 0644);
+static char default_fw_name[MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN] = "mali_csffw.bin";
+module_param_string(fw_name, default_fw_name, sizeof(default_fw_name), 0644);
MODULE_PARM_DESC(fw_name, "firmware image");
/* The waiting time for firmware to boot */
@@ -78,7 +79,6 @@ MODULE_PARM_DESC(fw_debug,
"Enables effective use of a debugger for debugging firmware code.");
#endif
-
#define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul)
#define FIRMWARE_HEADER_VERSION_MAJOR (0ul)
#define FIRMWARE_HEADER_VERSION_MINOR (3ul)
@@ -188,7 +188,7 @@ struct firmware_timeline_metadata {
/* The shared interface area, used for communicating with firmware, is managed
* like a virtual memory zone. Reserve the virtual space from that zone
* corresponding to shared interface entry parsed from the firmware image.
- * The shared_reg_rbtree should have been initialized before calling this
+ * The MCU_SHARED_ZONE should have been initialized before calling this
* function.
*/
static int setup_shared_iface_static_region(struct kbase_device *kbdev)
@@ -201,8 +201,7 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev)
if (!interface)
return -EINVAL;
- reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0,
- interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED);
+ reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, interface->num_pages_aligned);
if (reg) {
mutex_lock(&kbdev->csf.reg_lock);
ret = kbase_add_va_region_rbtree(kbdev, reg,
@@ -308,7 +307,7 @@ static void boot_csf_firmware(struct kbase_device *kbdev)
static int wait_ready(struct kbase_device *kbdev)
{
const ktime_t wait_loop_start = ktime_get_raw();
- const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms;
s64 diff;
do {
@@ -316,7 +315,8 @@ static int wait_ready(struct kbase_device *kbdev)
for (i = 0; i < 1000; i++) {
/* Wait for the MMU status to indicate there is no active command */
- if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) &
+ if (!(kbase_reg_read(kbdev,
+ MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) &
AS_STATUS_AS_ACTIVE))
return 0;
}
@@ -449,7 +449,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
for (page_num = 0; page_num < page_limit; ++page_num) {
struct page *const page = as_page(phys[page_num]);
- char *const p = kmap_atomic(page);
+ char *const p = kbase_kmap_atomic(page);
u32 const copy_len = min_t(u32, PAGE_SIZE, data_len);
if (copy_len > 0) {
@@ -466,7 +466,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data,
kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]),
PAGE_SIZE, DMA_TO_DEVICE);
- kunmap_atomic(p);
+ kbase_kunmap_atomic(p);
}
}
@@ -533,6 +533,7 @@ out:
* within the 2MB pages aligned allocation.
* @is_small_page: This is an output flag used to select between the small and large page
* to be used for the FW entry allocation.
+ * @force_small_page: Use 4kB pages to allocate memory needed for FW loading
*
* Go through all the already initialized interfaces and find if a previously
* allocated large page can be used to store contents of new FW interface entry.
@@ -544,7 +545,7 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
const u32 flags, struct tagged_addr **phys,
struct protected_memory_allocation ***pma,
u32 num_pages, u32 *num_pages_aligned,
- bool *is_small_page)
+ bool *is_small_page, bool force_small_page)
{
struct kbase_csf_firmware_interface *interface = NULL;
struct kbase_csf_firmware_interface *target_interface = NULL;
@@ -560,6 +561,8 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev,
*phys = NULL;
*pma = NULL;
+ if (force_small_page)
+ goto out;
/* If the section starts at 2MB aligned boundary,
* then use 2MB page(s) for it.
@@ -653,7 +656,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
struct protected_memory_allocation **pma = NULL;
bool reuse_pages = false;
bool is_small_page = true;
- bool ignore_page_migration = true;
+ bool force_small_page = false;
if (data_end < data_start) {
dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n",
@@ -696,16 +699,15 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
num_pages = (virtual_end - virtual_start)
>> PAGE_SHIFT;
- if(!protected_mode) {
- reuse_pages = entry_find_large_page_to_reuse(
- kbdev, virtual_start, virtual_end, flags, &phys, &pma,
- num_pages, &num_pages_aligned, &is_small_page);
- }
- else {
- num_pages_aligned = num_pages;
+ if(protected_mode) {
+ force_small_page = true;
dev_warn(kbdev->dev, "Protected memory allocation requested for %u bytes (%u pages), serving with small pages and tight allocation.", (virtual_end - virtual_start), num_pages);
}
+retry_alloc:
+ reuse_pages = entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags,
+ &phys, &pma, num_pages, &num_pages_aligned,
+ &is_small_page, force_small_page);
if (!reuse_pages)
phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL);
@@ -716,16 +718,18 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
if (!reuse_pages) {
pma = kbase_csf_protected_memory_alloc(
kbdev, phys, num_pages_aligned, is_small_page);
- }
-
- if (!pma) {
- /* If we can't allocate sufficient memory for FW - bail out and leave protected execution unsupported by termintating the allocator. */
- dev_warn(kbdev->dev,
- "Protected memory allocation failed during FW initialization - Firmware protected mode entry will not be supported");
- kbase_csf_protected_memory_term(kbdev);
- kbdev->csf.pma_dev = NULL;
- kfree(phys);
- return 0;
+ if (!pma) {
+ /* If we can't allocate sufficient memory for FW - bail out and leave protected execution unsupported by termintating the allocator. */
+ dev_warn(kbdev->dev,
+ "Protected memory allocation failed during FW initialization - Firmware protected mode entry will not be supported");
+ kbase_csf_protected_memory_term(kbdev);
+ kbdev->csf.pma_dev = NULL;
+ kfree(phys);
+ return 0;
+ }
+ } else if (WARN_ON(!pma)) {
+ ret = -EINVAL;
+ goto out;
}
} else {
if (!reuse_pages) {
@@ -733,14 +737,22 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW,
is_small_page),
num_pages_aligned, phys, false, NULL);
- ignore_page_migration = false;
}
}
if (ret < 0) {
- dev_err(kbdev->dev,
- "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n",
- num_pages_aligned, virtual_start);
+ dev_warn(
+ kbdev->dev,
+ "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x using %s ",
+ num_pages_aligned, virtual_start,
+ is_small_page ? "small pages" : "large page");
+ WARN_ON(reuse_pages);
+ if (!is_small_page) {
+ dev_warn(kbdev->dev, "Retrying by using small pages");
+ force_small_page = true;
+ kfree(phys);
+ goto retry_alloc;
+ }
goto out;
}
@@ -843,8 +855,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu,
virtual_start >> PAGE_SHIFT, phys,
num_pages_aligned, mem_flags,
- KBASE_MEM_GROUP_CSF_FW, NULL, NULL,
- ignore_page_migration);
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL);
if (ret != 0) {
dev_err(kbdev->dev, "Failed to insert firmware pages\n");
@@ -1316,7 +1327,7 @@ static inline void access_firmware_memory_common(struct kbase_device *kbdev,
u32 page_num = offset_bytes >> PAGE_SHIFT;
u32 offset_in_page = offset_bytes & ~PAGE_MASK;
struct page *target_page = as_page(interface->phys[page_num]);
- uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page);
+ uintptr_t cpu_addr = (uintptr_t)kbase_kmap_atomic(target_page);
u32 *addr = (u32 *)(cpu_addr + offset_in_page);
if (read) {
@@ -1331,7 +1342,7 @@ static inline void access_firmware_memory_common(struct kbase_device *kbdev,
sizeof(u32), DMA_BIDIRECTIONAL);
}
- kunmap_atomic((u32 *)cpu_addr);
+ kbase_kunmap_atomic((u32 *)cpu_addr);
}
static inline void access_firmware_memory(struct kbase_device *kbdev,
@@ -1713,6 +1724,11 @@ static void enable_shader_poweroff_timer(struct kbase_device *const kbdev,
kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER,
pwroff_reg);
+
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_PWROFF_TIMER_CONFIG,
+ kbdev->csf.mcu_core_pwroff_dur_count_modifier,
+ GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK);
+
set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK);
/* Save the programed reg value in its shadow field */
@@ -1739,6 +1755,11 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev)
kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER,
kbdev->csf.gpu_idle_dur_count);
+
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG,
+ kbdev->csf.gpu_idle_dur_count_modifier,
+ GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK);
+
kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE,
GLB_REQ_IDLE_ENABLE_MASK);
dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x",
@@ -2000,6 +2021,10 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
return;
#endif
+ err = kbase_csf_firmware_cfg_fw_wa_enable(kbdev);
+ if (WARN_ON(err))
+ return;
+
/* Reboot the firmware */
kbase_csf_firmware_enable_mcu(kbdev);
}
@@ -2042,13 +2067,13 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
kbase_pm_update_state(kbdev);
}
-static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us)
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ns, u32 *modifier)
{
#define MICROSECONDS_PER_SECOND 1000000u
#define HYSTERESIS_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
- u64 dur_val = dur_us;
+ u64 dur_val = dur_ns;
u32 cnt_val_u32, reg_val_u32;
bool src_system_timestamp = freq > 0;
@@ -2066,21 +2091,24 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_u
"Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!");
}
- /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */
- dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
- dur_val = div_u64(dur_val, 1000000);
+ /* Formula for dur_val = (dur/1e9) * freq_HZ) */
+ dur_val = dur_val * freq;
+ dur_val = div_u64(dur_val, NSEC_PER_SEC);
+ if (dur_val < S32_MAX) {
+ *modifier = 1;
+ } else {
+ dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT;
+ *modifier = 0;
+ }
/* Interface limits the value field to S32_MAX */
cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32);
/* add the source flag */
- if (src_system_timestamp)
- reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
- GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
- else
- reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32,
- GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER);
+ reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(
+ reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
+ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER));
return reg_val_u32;
}
@@ -2091,19 +2119,21 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- dur = kbdev->csf.gpu_idle_hysteresis_us;
+ dur = kbdev->csf.gpu_idle_hysteresis_ns;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
}
-u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
+u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur_ns)
{
unsigned long flags;
+ u32 modifier = 0;
+
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
- const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US);
+ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS, &modifier);
#else
- const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur_ns, &modifier);
#endif
/* The 'fw_load_lock' is taken to synchronize against the deferred
@@ -2112,19 +2142,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_us = dur;
+ kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
goto end;
}
mutex_unlock(&kbdev->fw_load_lock);
+ if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
+ dev_warn(kbdev->dev,
+ "Failed to prevent GPU reset when updating idle_hysteresis_time");
+ return kbdev->csf.gpu_idle_dur_count;
+ }
+
kbase_csf_scheduler_pm_active(kbdev);
- if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+ if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) {
dev_err(kbdev->dev,
"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
kbase_csf_scheduler_pm_idle(kbdev);
+ kbase_reset_gpu_allow(kbdev);
+
return kbdev->csf.gpu_idle_dur_count;
}
@@ -2153,8 +2192,9 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK);
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_us = dur;
+ kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
@@ -2164,8 +2204,9 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
* enabled
*/
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_us = dur;
+ kbdev->csf.gpu_idle_hysteresis_ns = dur_ns;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
kbase_csf_scheduler_unlock(kbdev);
@@ -2173,11 +2214,11 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
mutex_unlock(&kbdev->csf.reg_lock);
#endif
- dev_dbg(kbdev->dev, "GPU suspend timeout updated: %i us (0x%.8x)",
- kbdev->csf.gpu_idle_hysteresis_us,
+ dev_dbg(kbdev->dev, "GPU suspend timeout updated: %i ns (0x%.8x)",
+ kbdev->csf.gpu_idle_hysteresis_ns,
kbdev->csf.gpu_idle_dur_count);
kbase_csf_scheduler_pm_idle(kbdev);
-
+ kbase_reset_gpu_allow(kbdev);
end:
dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
hysteresis_val);
@@ -2185,14 +2226,18 @@ end:
return hysteresis_val;
}
-static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_ns,
+ u32 *modifier)
{
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
- u64 dur_val = dur_us;
+ u64 dur_val = dur_ns;
u32 cnt_val_u32, reg_val_u32;
bool src_system_timestamp = freq > 0;
+ const struct kbase_pm_policy *current_policy = kbase_pm_get_policy(kbdev);
+ bool always_on = current_policy == &kbase_pm_always_on_policy_ops;
+
if (!src_system_timestamp) {
/* Get the cycle_counter source alternative */
spin_lock(&kbdev->pm.clk_rtm.lock);
@@ -2207,21 +2252,32 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
"Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!");
}
- /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */
- dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
- dur_val = div_u64(dur_val, 1000000);
+ /* Formula for dur_val = (dur/1e9) * freq_HZ) */
+ dur_val = dur_val * freq;
+ dur_val = div_u64(dur_val, NSEC_PER_SEC);
+ if (dur_val < S32_MAX) {
+ *modifier = 1;
+ } else {
+ dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT;
+ *modifier = 0;
+ }
- /* Interface limits the value field to S32_MAX */
- cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
+ if (dur_val == 0 && !always_on) {
+ /* Lower Bound - as 0 disables timeout and host controls shader-core power management. */
+ cnt_val_u32 = 1;
+ } else if (dur_val > S32_MAX) {
+ /* Upper Bound - as interface limits the field to S32_MAX */
+ cnt_val_u32 = S32_MAX;
+ } else {
+ cnt_val_u32 = (u32)dur_val;
+ }
reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32);
/* add the source flag */
- if (src_system_timestamp)
- reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
- GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP);
- else
- reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32,
- GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER);
+ reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(
+ reg_val_u32,
+ (src_system_timestamp ? GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
+ GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER));
return reg_val_u32;
}
@@ -2232,20 +2288,23 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+ pwroff = kbdev->csf.mcu_core_pwroff_dur_ns;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return pwroff;
}
-u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
+u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur_ns)
{
unsigned long flags;
- const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur);
+ u32 modifier = 0;
+
+ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur_ns, &modifier);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->csf.mcu_core_pwroff_dur_us = dur;
+ kbdev->csf.mcu_core_pwroff_dur_ns = dur_ns;
kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
+ kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
@@ -2253,6 +2312,11 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
return pwroff;
}
+u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev)
+{
+ return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS);
+}
+
/**
* kbase_device_csf_iterator_trace_init - Send request to enable iterator
* trace port.
@@ -2264,19 +2328,25 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev)
{
/* Enable the iterator trace port if supported by the GPU.
- * It requires the GPU to have a nonzero "iter_trace_enable"
+ * It requires the GPU to have a nonzero "iter-trace-enable"
* property in the device tree, and the FW must advertise
* this feature in GLB_FEATURES.
*/
if (kbdev->pm.backend.gpu_powered) {
- /* check device tree for iterator trace enable property */
+ /* check device tree for iterator trace enable property
+ * and fallback to "iter_trace_enable" if it is not found
+ */
const void *iter_trace_param = of_get_property(
kbdev->dev->of_node,
- "iter_trace_enable", NULL);
+ "iter-trace-enable", NULL);
const struct kbase_csf_global_iface *iface =
&kbdev->csf.global_iface;
+ if (!iter_trace_param)
+ iter_trace_param =
+ of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL);
+
if (iter_trace_param) {
u32 iter_trace_value = be32_to_cpup(iter_trace_param);
@@ -2324,6 +2394,8 @@ static void coredump_worker(struct work_struct *data)
int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
{
+ u32 modifier = 0;
+
init_waitqueue_head(&kbdev->csf.event_wait);
kbdev->csf.interrupt_received = false;
@@ -2336,11 +2408,13 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
*/
kbdev->csf.mcu_core_pwroff_dur_count = 1;
#else
- kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+ kbdev->csf.mcu_core_pwroff_dur_ns = DEFAULT_GLB_PWROFF_TIMEOUT_NS;
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
- kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+ kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS, &modifier);
+ kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
#endif
+ kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata);
@@ -2352,6 +2426,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
INIT_WORK(&kbdev->csf.coredump_work, coredump_worker);
mutex_init(&kbdev->csf.reg_lock);
+ kbase_csf_pending_gpuq_kicks_init(kbdev);
kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL };
@@ -2360,21 +2435,25 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
{
+ kbase_csf_pending_gpuq_kicks_term(kbdev);
mutex_destroy(&kbdev->csf.reg_lock);
}
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
- kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
+ u32 modifier = 0;
+
+ kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS;
+
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
- kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+ kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
- WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns);
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
- kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US);
+ kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS, &modifier);
/* Set to the lowest possible value for FW to immediately write
* to the power off register to disable the cores.
@@ -2382,10 +2461,12 @@ int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
kbdev->csf.mcu_core_pwroff_dur_count = 1;
#else
kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count(
- kbdev, kbdev->csf.gpu_idle_hysteresis_us);
- kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US;
+ kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier);
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
+ kbdev->csf.mcu_core_pwroff_dur_ns = DEFAULT_GLB_PWROFF_TIMEOUT_NS;
kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count(
- kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US);
+ kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS, &modifier);
+ kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
#endif
return 0;
@@ -2401,6 +2482,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
u32 entry_end_offset;
u32 entry_offset;
int ret;
+ const char *fw_name = default_fw_name;
lockdep_assert_held(&kbdev->fw_load_lock);
@@ -2424,6 +2506,33 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
goto err_out;
}
+#if IS_ENABLED(CONFIG_OF)
+ /* If we can't read CSF firmware name from DTB,
+ * fw_name is not modified and remains the default.
+ */
+ ret = of_property_read_string(kbdev->dev->of_node, "firmware-name", &fw_name);
+ if (ret == -EINVAL) {
+ /* Property doesn't exist in DTB, and fw_name already points to default FW name
+ * so just reset return value and continue.
+ */
+ ret = 0;
+ } else if (ret == -ENODATA) {
+ dev_warn(kbdev->dev,
+ "\"firmware-name\" DTB property contains no data, using default FW name");
+ /* Reset return value so FW does not fail to load */
+ ret = 0;
+ } else if (ret == -EILSEQ) {
+ /* This is reached when the size of the fw_name buffer is too small for the string
+ * stored in the DTB and the null terminator.
+ */
+ dev_warn(kbdev->dev,
+ "\"firmware-name\" DTB property value too long, using default FW name.");
+ /* Reset return value so FW does not fail to load */
+ ret = 0;
+ }
+
+#endif /* IS_ENABLED(CONFIG_OF) */
+
if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) {
dev_err(kbdev->dev,
"Failed to load firmware image '%s'\n",
@@ -2534,6 +2643,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
}
#endif
+ ret = kbase_csf_firmware_cfg_fw_wa_init(kbdev);
+ if (ret != 0) {
+ dev_err(kbdev->dev, "Failed to initialize firmware workarounds");
+ goto err_out;
+ }
+
/* Make sure L2 cache is powered up */
kbase_pm_wait_for_l2_powered(kbdev);
@@ -2568,6 +2683,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
if (ret != 0)
goto err_out;
+ ret = kbase_csf_firmware_log_init(kbdev);
+ if (ret != 0) {
+ dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
+ goto err_out;
+ }
+
ret = kbase_csf_firmware_cfg_init(kbdev);
if (ret != 0)
goto err_out;
@@ -2576,12 +2697,6 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev)
if (ret != 0)
goto err_out;
- ret = kbase_csf_firmware_log_init(kbdev);
- if (ret != 0) {
- dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret);
- goto err_out;
- }
-
if (kbdev->csf.fw_core_dump.available)
kbase_csf_firmware_core_dump_init(kbdev);
@@ -2607,10 +2722,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
WARN(ret, "failed to wait for GPU reset");
- kbase_csf_firmware_log_term(kbdev);
-
kbase_csf_firmware_cfg_term(kbdev);
+ kbase_csf_firmware_log_term(kbdev);
+
kbase_csf_timeout_term(kbdev);
kbase_csf_free_dummy_user_reg_page(kbdev);
@@ -2638,6 +2753,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
unload_mmu_tables(kbdev);
+ kbase_csf_firmware_cfg_fw_wa_term(kbdev);
+
kbase_csf_firmware_trace_buffers_term(kbdev);
while (!list_empty(&kbdev->csf.firmware_interfaces)) {
@@ -3014,7 +3131,9 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
/* Ensure GPU is powered-up until we complete config update.*/
kbase_csf_scheduler_pm_active(kbdev);
- kbase_csf_scheduler_wait_mcu_active(kbdev);
+ err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev);
+ if (err)
+ goto exit;
/* The 'reg_lock' is also taken and is held till the update is
* complete, to ensure the config update gets serialized.
@@ -3031,6 +3150,7 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev)
GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK);
mutex_unlock(&kbdev->csf.reg_lock);
+exit:
kbase_csf_scheduler_pm_idle(kbdev);
return err;
}
@@ -3176,8 +3296,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
- va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
- KBASE_REG_ZONE_MCU_SHARED);
+ va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages);
if (!va_reg)
goto va_region_alloc_error;
@@ -3193,7 +3312,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
- KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL);
if (ret)
goto mmu_insert_pages_error;
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index 9e85c1d..d8ed8d6 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -56,7 +56,7 @@
#define CSF_NUM_DOORBELL ((u8)24)
/* Offset to the first HW doorbell page */
-#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000)
+#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)DOORBELLS_BASE)
/* Size of HW Doorbell page, used to calculate the offset to subsequent pages */
#define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000)
@@ -870,6 +870,22 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev);
u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur);
/**
+ * kbase_csf_firmware_reset_mcu_core_pwroff_time - Reset the MCU shader Core power-off
+ * time value
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * Sets the MCU Shader Core power-off time value to the default.
+ *
+ * The configured MCU shader Core power-off timer will only have effect when the host
+ * driver has delegated the shader cores' power management to MCU.
+ *
+ * Return: the actual internal core power-off timer value in register defined
+ * format.
+ */
+u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev);
+
+/**
* kbase_csf_interface_version - Helper function to build the full firmware
* interface version in a format compatible with
* GLB_VERSION register
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
index 13a816b..48ddbb5 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,6 +35,7 @@
#define HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME "Host controls SC rails"
#endif
+#define CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME "WA_CFG0"
/**
* struct firmware_config - Configuration item within the MCU firmware
@@ -117,7 +118,7 @@ static ssize_t show_fw_cfg(struct kobject *kobj,
return -EINVAL;
}
- return snprintf(buf, PAGE_SIZE, "%u\n", val);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", val);
}
static ssize_t store_fw_cfg(struct kobject *kobj,
@@ -150,6 +151,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj,
HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME))
return -EPERM;
#endif
+ if (!strcmp(config->name,
+ CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME))
+ return -EPERM;
if ((val < config->min) || (val > config->max))
return -EINVAL;
@@ -275,6 +279,19 @@ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
kbase_csf_read_firmware_memory(kbdev, config->address,
&config->cur_val);
+ if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) &&
+ (config->cur_val)) {
+ err = kbase_csf_firmware_log_toggle_logging_calls(config->kbdev,
+ config->cur_val);
+
+ if (err) {
+ kobject_put(&config->kobj);
+ dev_err(kbdev->dev, "Failed to enable logging (result: %d)", err);
+ return err;
+ }
+ }
+
+
err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type,
kbdev->csf.fw_cfg_kobj, "%s", config->name);
if (err) {
@@ -361,6 +378,25 @@ int kbase_csf_firmware_cfg_find_config_address(struct kbase_device *kbdev, const
return -ENOENT;
}
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev)
+{
+ struct firmware_config *config;
+
+ /* "quirks_ext" property is optional */
+ if (!kbdev->csf.quirks_ext)
+ return 0;
+
+ list_for_each_entry(config, &kbdev->csf.firmware_config, node) {
+ if (strcmp(config->name, CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME))
+ continue;
+ dev_info(kbdev->dev, "External quirks 0: 0x%08x", kbdev->csf.quirks_ext[0]);
+ kbase_csf_update_firmware_memory(kbdev, config->address, kbdev->csf.quirks_ext[0]);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
#ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS
int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
{
@@ -379,6 +415,54 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
}
#endif
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
+{
+ int ret;
+ int entry_count;
+ size_t entry_bytes;
+
+ /* "quirks-ext" property is optional and may have no value.
+ * Also try fallback "quirks_ext" property if it doesn't exist.
+ */
+ entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext");
+
+ if (entry_count == -EINVAL)
+ entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext");
+
+ if (entry_count == -EINVAL || entry_count == -ENODATA)
+ return 0;
+
+ entry_bytes = entry_count * sizeof(u32);
+ kbdev->csf.quirks_ext = kzalloc(entry_bytes, GFP_KERNEL);
+ if (!kbdev->csf.quirks_ext)
+ return -ENOMEM;
+
+ ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks-ext", kbdev->csf.quirks_ext,
+ entry_count);
+
+ if (ret == -EINVAL)
+ ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks_ext",
+ kbdev->csf.quirks_ext, entry_count);
+
+ if (ret == -EINVAL || ret == -ENODATA) {
+ /* This is unexpected since the property is already accessed for counting the number
+ * of its elements.
+ */
+ dev_err(kbdev->dev, "\"quirks_ext\" DTB property data read failed");
+ return ret;
+ }
+ if (ret == -EOVERFLOW) {
+ dev_err(kbdev->dev, "\"quirks_ext\" DTB property data size exceeds 32 bits");
+ return ret;
+ }
+
+ return kbase_csf_firmware_cfg_fw_wa_enable(kbdev);
+}
+
+void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev)
+{
+ kfree(kbdev->csf.quirks_ext);
+}
#else
int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev)
@@ -404,4 +488,15 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
return 0;
}
#endif
+
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev)
+{
+ return 0;
+}
+
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
+{
+ return 0;
+}
+
#endif /* CONFIG_SYSFS */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
index bf99c46..f565290 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -97,5 +97,37 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev)
*/
int kbase_csf_firmware_cfg_find_config_address(struct kbase_device *kbdev, const char *name,
u32 *addr);
+/**
+ * kbase_csf_firmware_cfg_fw_wa_enable() - Enable firmware workarounds configuration.
+ *
+ * @kbdev: Kbase device structure
+ *
+ * Look for the config entry that enables support in FW for workarounds and set it according to
+ * the firmware workaround configuration before the initial boot or reload of firmware.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_cfg_fw_wa_init() - Initialize firmware workarounds configuration.
+ *
+ * @kbdev: Kbase device structure
+ *
+ * Retrieve and save the firmware workarounds configuration from device-tree "quirks_ext" property.
+ * Then, look for the config entry that enables support in FW for workarounds and set it according
+ * to the configuration before the initial firmware boot.
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_csf_firmware_cfg_fw_wa_term - Delete local cache for firmware workarounds configuration.
+ *
+ * @kbdev: Pointer to the Kbase device
+ *
+ */
+void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev);
#endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
index ce8e4af..493e1c8 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,7 @@
#include <linux/file.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
+#include <linux/version_compat_defs.h>
#include "mali_kbase.h"
#include "mali_kbase_csf_firmware_core_dump.h"
@@ -507,7 +508,7 @@ static int fw_core_dump_create(struct kbase_device *kbdev)
/* Ensure MCU is active before requesting the core dump. */
kbase_csf_scheduler_pm_active(kbdev);
- err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+ err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev);
if (!err)
err = kbase_csf_firmware_req_core_dump(kbdev);
@@ -666,9 +667,9 @@ static int fw_core_dump_seq_show(struct seq_file *m, void *v)
/* Write the current page. */
page = as_page(data->interface->phys[data->page_num]);
- p = kmap_atomic(page);
+ p = kbase_kmap_atomic(page);
seq_write(m, p, FW_PAGE_SIZE);
- kunmap_atomic(p);
+ kbase_kunmap_atomic(p);
return 0;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
index 77d3b1e..89df839 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_log.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -55,7 +55,7 @@ static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+ kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
@@ -70,7 +70,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val)
{
struct kbase_device *kbdev = (struct kbase_device *)data;
struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+ kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
u64 new_mask;
unsigned int enable_bits_count;
@@ -115,7 +115,7 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us
int ret;
struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+ kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "Couldn't get the firmware trace buffer");
@@ -125,8 +125,9 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us
if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
return -EBUSY;
- /* Reading from userspace is only allowed in manual mode */
- if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) {
+ /* Reading from userspace is only allowed in manual mode or auto-discard mode */
+ if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL &&
+ fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD) {
ret = -EINVAL;
goto out;
}
@@ -176,8 +177,9 @@ static int kbase_csf_firmware_log_mode_write(void *data, u64 val)
cancel_delayed_work_sync(&fw_log->poll_work);
break;
case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT:
+ case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD:
schedule_delayed_work(&fw_log->poll_work,
- msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+ msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms)));
break;
default:
ret = -EINVAL;
@@ -191,6 +193,24 @@ out:
return ret;
}
+static int kbase_csf_firmware_log_poll_period_read(void *data, u64 *val)
+{
+ struct kbase_device *kbdev = (struct kbase_device *)data;
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+ *val = atomic_read(&fw_log->poll_period_ms);
+ return 0;
+}
+
+static int kbase_csf_firmware_log_poll_period_write(void *data, u64 val)
+{
+ struct kbase_device *kbdev = (struct kbase_device *)data;
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+
+ atomic_set(&fw_log->poll_period_ms, val);
+ return 0;
+}
+
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops,
kbase_csf_firmware_log_enable_mask_read,
kbase_csf_firmware_log_enable_mask_write, "%llx\n");
@@ -204,56 +224,135 @@ static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = {
DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read,
kbase_csf_firmware_log_mode_write, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_poll_period_fops,
+ kbase_csf_firmware_log_poll_period_read,
+ kbase_csf_firmware_log_poll_period_write, "%llu\n");
#endif /* CONFIG_DEBUG_FS */
+static void kbase_csf_firmware_log_discard_buffer(struct kbase_device *kbdev)
+{
+ struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+ struct firmware_trace_buffer *tb =
+ kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
+
+ if (tb == NULL) {
+ dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware log discard skipped");
+ return;
+ }
+
+ if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0)
+ return;
+
+ kbase_csf_firmware_trace_buffer_discard(tb);
+
+ atomic_set(&fw_log->busy, 0);
+}
+
static void kbase_csf_firmware_log_poll(struct work_struct *work)
{
struct kbase_device *kbdev =
container_of(work, struct kbase_device, csf.fw_log.poll_work.work);
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
- schedule_delayed_work(&fw_log->poll_work,
- msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS));
+ if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT)
+ kbase_csf_firmware_log_dump_buffer(kbdev);
+ else if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD)
+ kbase_csf_firmware_log_discard_buffer(kbdev);
+ else
+ return;
- kbase_csf_firmware_log_dump_buffer(kbdev);
+ schedule_delayed_work(&fw_log->poll_work,
+ msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms)));
}
int kbase_csf_firmware_log_init(struct kbase_device *kbdev)
{
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
+ int err = 0;
+#if defined(CONFIG_DEBUG_FS)
+ struct dentry *dentry;
+#endif /* CONFIG_DEBUG_FS */
/* Add one byte for null-termination */
fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL);
- if (fw_log->dump_buf == NULL)
- return -ENOMEM;
+ if (fw_log->dump_buf == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
/* Ensure null-termination for all strings */
fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0;
+ /* Set default log polling period */
+ atomic_set(&fw_log->poll_period_ms, KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT);
+
+ INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
+#ifdef CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD
+ fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD;
+ schedule_delayed_work(&fw_log->poll_work,
+ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT));
+#elif defined(CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT)
+ fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT;
+ schedule_delayed_work(&fw_log->poll_work,
+ msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT));
+#else /* CONFIG_MALI_FW_TRACE_MODE_MANUAL */
fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL;
+#endif
atomic_set(&fw_log->busy, 0);
- INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll);
-#if defined(CONFIG_DEBUG_FS)
- debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev,
- &kbase_csf_firmware_log_enable_mask_fops);
- debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
- &kbasep_csf_firmware_log_debugfs_fops);
- debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
- &kbase_csf_firmware_log_mode_fops);
-#endif /* CONFIG_DEBUG_FS */
+#if !defined(CONFIG_DEBUG_FS)
+ return 0;
+#else /* !CONFIG_DEBUG_FS */
+ dentry = debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory,
+ kbdev, &kbase_csf_firmware_log_enable_mask_fops);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create fw_trace_enable_mask\n");
+ err = -ENOENT;
+ goto free_out;
+ }
+ dentry = debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev,
+ &kbasep_csf_firmware_log_debugfs_fops);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create fw_traces\n");
+ err = -ENOENT;
+ goto free_out;
+ }
+ dentry = debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev,
+ &kbase_csf_firmware_log_mode_fops);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create fw_trace_mode\n");
+ err = -ENOENT;
+ goto free_out;
+ }
+ dentry = debugfs_create_file("fw_trace_poll_period_ms", 0644, kbdev->mali_debugfs_directory,
+ kbdev, &kbase_csf_firmware_log_poll_period_fops);
+ if (IS_ERR_OR_NULL(dentry)) {
+ dev_err(kbdev->dev, "Unable to create fw_trace_poll_period_ms");
+ err = -ENOENT;
+ goto free_out;
+ }
return 0;
+
+free_out:
+ kfree(fw_log->dump_buf);
+ fw_log->dump_buf = NULL;
+#endif /* CONFIG_DEBUG_FS */
+out:
+ return err;
}
void kbase_csf_firmware_log_term(struct kbase_device *kbdev)
{
struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log;
- cancel_delayed_work_sync(&fw_log->poll_work);
- kfree(fw_log->dump_buf);
+ if (fw_log->dump_buf) {
+ cancel_delayed_work_sync(&fw_log->poll_work);
+ kfree(fw_log->dump_buf);
+ fw_log->dump_buf = NULL;
+ }
}
void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
@@ -262,7 +361,7 @@ void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev)
u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf;
unsigned int read_size, remaining_size;
struct firmware_trace_buffer *tb =
- kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+ kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
if (tb == NULL) {
dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped");
@@ -415,7 +514,7 @@ int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32
/* Wait for the MCU to get disabled */
dev_info(kbdev->dev, "Wait for the MCU to get disabled");
- ret = kbase_pm_wait_for_desired_state(kbdev);
+ ret = kbase_pm_killable_wait_for_desired_state(kbdev);
if (ret) {
dev_err(kbdev->dev,
"wait for PM state failed when toggling FW logging calls");
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 514492c..764c18d 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -936,7 +936,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev)
kbase_pm_update_state(kbdev);
}
-static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms)
+static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms, u32 *modifier)
{
#define HYSTERESIS_VAL_UNIT_SHIFT (10)
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
@@ -963,6 +963,8 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m
dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
dur_val = div_u64(dur_val, 1000);
+ *modifier = 0;
+
/* Interface limits the value field to S32_MAX */
cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
@@ -984,7 +986,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
u32 dur;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- dur = kbdev->csf.gpu_idle_hysteresis_us;
+ dur = kbdev->csf.gpu_idle_hysteresis_ns;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
return dur;
@@ -993,7 +995,9 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev)
u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur)
{
unsigned long flags;
- const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur);
+ u32 modifier = 0;
+
+ const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur, &modifier);
/* The 'fw_load_lock' is taken to synchronize against the deferred
* loading of FW, where the idle timer will be enabled.
@@ -1001,19 +1005,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
mutex_lock(&kbdev->fw_load_lock);
if (unlikely(!kbdev->csf.firmware_inited)) {
kbase_csf_scheduler_spin_lock(kbdev, &flags);
- kbdev->csf.gpu_idle_hysteresis_us = dur;
+ kbdev->csf.gpu_idle_hysteresis_ns = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
mutex_unlock(&kbdev->fw_load_lock);
goto end;
}
mutex_unlock(&kbdev->fw_load_lock);
+ if (kbase_reset_gpu_prevent_and_wait(kbdev)) {
+ dev_warn(kbdev->dev,
+ "Failed to prevent GPU reset when updating idle_hysteresis_time");
+ return kbdev->csf.gpu_idle_dur_count;
+ }
+
kbase_csf_scheduler_pm_active(kbdev);
- if (kbase_csf_scheduler_wait_mcu_active(kbdev)) {
+ if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) {
dev_err(kbdev->dev,
"Unable to activate the MCU, the idle hysteresis value shall remain unchanged");
kbase_csf_scheduler_pm_idle(kbdev);
+ kbase_reset_gpu_allow(kbdev);
+
return kbdev->csf.gpu_idle_dur_count;
}
@@ -1041,6 +1054,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
kbase_csf_firmware_enable_gpu_idle_timer(kbdev);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK);
@@ -1052,6 +1066,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
kbase_csf_scheduler_spin_lock(kbdev, &flags);
kbdev->csf.gpu_idle_hysteresis_us = dur;
kbdev->csf.gpu_idle_dur_count = hysteresis_val;
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
kbase_csf_scheduler_spin_unlock(kbdev, flags);
}
kbase_csf_scheduler_unlock(kbdev);
@@ -1060,7 +1075,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev,
#endif
kbase_csf_scheduler_pm_idle(kbdev);
-
+ kbase_reset_gpu_allow(kbdev);
end:
dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x",
hysteresis_val);
@@ -1068,7 +1083,8 @@ end:
return hysteresis_val;
}
-static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us)
+static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us,
+ u32 *modifier)
{
/* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */
u64 freq = arch_timer_get_cntfrq();
@@ -1094,6 +1110,8 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3
dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT;
dur_val = div_u64(dur_val, 1000000);
+ *modifier = 0;
+
/* Interface limits the value field to S32_MAX */
cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val;
@@ -1115,7 +1133,7 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
unsigned long flags;
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- pwroff = kbdev->csf.mcu_core_pwroff_dur_us;
+ pwroff = kbdev->csf.mcu_core_pwroff_dur_ns;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return pwroff;
@@ -1124,11 +1142,14 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev)
u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur)
{
unsigned long flags;
- const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur);
+ u32 modifier = 0;
+
+ const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur, &modifier);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- kbdev->csf.mcu_core_pwroff_dur_us = dur;
+ kbdev->csf.mcu_core_pwroff_dur_ns = dur;
kbdev->csf.mcu_core_pwroff_dur_count = pwroff;
+ kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff);
@@ -1136,6 +1157,11 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32
return pwroff;
}
+u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev)
+{
+ return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS);
+}
+
int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
{
init_waitqueue_head(&kbdev->csf.event_wait);
@@ -1144,6 +1170,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
kbdev->csf.fw_timeout_ms =
kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
+ kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev);
INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces);
INIT_LIST_HEAD(&kbdev->csf.firmware_config);
INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list);
@@ -1153,25 +1180,30 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker);
mutex_init(&kbdev->csf.reg_lock);
+ kbase_csf_pending_gpuq_kicks_init(kbdev);
return 0;
}
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
{
+ kbase_csf_pending_gpuq_kicks_term(kbdev);
mutex_destroy(&kbdev->csf.reg_lock);
}
int kbase_csf_firmware_late_init(struct kbase_device *kbdev)
{
- kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC;
+ u32 modifier = 0;
+
+ kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS;
#ifdef KBASE_PM_RUNTIME
if (kbase_pm_gpu_sleep_allowed(kbdev))
- kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
+ kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER;
#endif
- WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us);
+ WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns);
kbdev->csf.gpu_idle_dur_count =
- convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us);
+ convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier);
+ kbdev->csf.gpu_idle_dur_count_modifier = modifier;
return 0;
}
@@ -1254,10 +1286,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev)
/* NO_MALI: Don't stop firmware or unload MMU tables */
- kbase_csf_scheduler_term(kbdev);
-
kbase_csf_free_dummy_user_reg_page(kbdev);
+ kbase_csf_scheduler_term(kbdev);
+
kbase_csf_doorbell_mapping_term(kbdev);
free_global_iface(kbdev);
@@ -1604,8 +1636,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
if (!cpu_addr)
goto vmap_error;
- va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages,
- KBASE_REG_ZONE_MCU_SHARED);
+ va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages);
if (!va_reg)
goto va_region_alloc_error;
@@ -1621,7 +1652,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn,
&phys[0], num_pages, gpu_map_properties,
- KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false);
+ KBASE_MEM_GROUP_CSF_FW, NULL, NULL);
if (ret)
goto mmu_insert_pages_error;
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index 6cb6733..08d82d2 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -24,7 +24,9 @@
#include <mali_kbase_ctx_sched.h>
#include "device/mali_kbase_device.h"
#include "mali_kbase_csf.h"
+#include "mali_kbase_csf_sync_debugfs.h"
#include <linux/export.h>
+#include <linux/version_compat_defs.h>
#if IS_ENABLED(CONFIG_SYNC_FILE)
#include "mali_kbase_fence.h"
@@ -679,7 +681,7 @@ static int kbase_csf_queue_group_suspend_prepare(
struct tagged_addr *page_array;
u64 start, end, i;
- if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) ||
+ if ((kbase_bits_to_zone(reg->flags) != SAME_VA_ZONE) ||
(kbase_reg_current_backed_size(reg) < nr_pages) ||
!(reg->flags & KBASE_REG_CPU_WR) ||
(reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) ||
@@ -1343,6 +1345,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence,
/* Fence gets signaled. Deactivate the timer for fence-wait timeout */
del_timer(&kcpu_queue->fence_timeout);
#endif
+
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
fence->context, fence->seqno);
@@ -1445,14 +1448,14 @@ static void fence_timeout_callback(struct timer_list *timer)
}
/**
- * fence_timeout_start() - Start a timer to check fence-wait timeout
+ * fence_wait_timeout_start() - Start a timer to check fence-wait timeout
*
* @cmd: KCPU command queue
*
* Activate a timer to check whether a fence-wait command in the queue
* gets completed within FENCE_WAIT_TIMEOUT_MS
*/
-static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd)
+static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd)
{
mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS));
}
@@ -1489,18 +1492,20 @@ static int kbase_kcpu_fence_wait_process(
if (kcpu_queue->fence_wait_processed) {
fence_status = dma_fence_get_status(fence);
} else {
- int cb_err = dma_fence_add_callback(fence,
+ int cb_err;
+
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_START, kcpu_queue,
+ fence->context, fence->seqno);
+
+ cb_err = dma_fence_add_callback(fence,
&fence_info->fence_cb,
kbase_csf_fence_wait_callback);
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev,
- KCPU_FENCE_WAIT_START, kcpu_queue,
- fence->context, fence->seqno);
fence_status = cb_err;
if (cb_err == 0) {
kcpu_queue->fence_wait_processed = true;
#ifdef CONFIG_MALI_FENCE_DEBUG
- fence_timeout_start(kcpu_queue);
+ fence_wait_timeout_start(kcpu_queue);
#endif
} else if (cb_err == -ENOENT) {
fence_status = dma_fence_get_status(fence);
@@ -1512,14 +1517,12 @@ static int kbase_kcpu_fence_wait_process(
"Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u",
info.name, kctx->tgid, kctx->id, kcpu_queue->id);
}
- /*
- * At this point the fence in question is already signalled without
- * any error. Its useful to print a FENCE_WAIT_END trace here to
- * indicate completion.
- */
- KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev,
- KCPU_FENCE_WAIT_END, kcpu_queue,
- fence->context, fence->seqno);
+
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
+ fence->context, fence->seqno);
+ } else {
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue,
+ fence->context, fence->seqno);
}
}
@@ -1565,12 +1568,193 @@ static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_q
return 0;
}
+/**
+ * fence_signal_timeout_start() - Start a timer to check enqueued fence-signal command is
+ * blocked for too long a duration
+ *
+ * @kcpu_queue: KCPU command queue
+ *
+ * Activate the queue's fence_signal_timeout timer to check whether a fence-signal command
+ * enqueued has been blocked for longer than a configured wait duration.
+ */
+static void fence_signal_timeout_start(struct kbase_kcpu_command_queue *kcpu_queue)
+{
+ struct kbase_device *kbdev = kcpu_queue->kctx->kbdev;
+ unsigned int wait_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT);
+
+ if (atomic_read(&kbdev->fence_signal_timeout_enabled))
+ mod_timer(&kcpu_queue->fence_signal_timeout, jiffies + msecs_to_jiffies(wait_ms));
+}
+
+static void kbase_kcpu_command_fence_force_signaled_set(
+ struct kbase_kcpu_command_fence_info *fence_info,
+ bool has_force_signaled)
+{
+ fence_info->fence_has_force_signaled = has_force_signaled;
+}
+
+bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info)
+{
+ return fence_info->fence_has_force_signaled;
+}
+
+static int kbase_kcpu_fence_force_signal_process(
+ struct kbase_kcpu_command_queue *kcpu_queue,
+ struct kbase_kcpu_command_fence_info *fence_info)
+{
+ struct kbase_context *const kctx = kcpu_queue->kctx;
+ int ret;
+
+ /* already force signaled just return*/
+ if (kbase_kcpu_command_fence_has_force_signaled(fence_info))
+ return 0;
+
+ if (WARN_ON(!fence_info->fence))
+ return -EINVAL;
+
+ ret = dma_fence_signal(fence_info->fence);
+ if (unlikely(ret < 0)) {
+ dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret);
+ /* Treated as a success */
+ ret = 0;
+ }
+
+ KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue,
+ fence_info->fence->context,
+ fence_info->fence->seqno);
+
+#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE)
+ dev_info(kctx->kbdev->dev,
+ "ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%u\n",
+ kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence,
+ fence_info->fence->context, fence_info->fence->seqno);
+#else
+ dev_info(kctx->kbdev->dev,
+ "ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%llu\n",
+ kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence,
+ fence_info->fence->context, fence_info->fence->seqno);
+#endif
+
+ /* dma_fence refcount needs to be decreased to release it. */
+ dma_fence_put(fence_info->fence);
+ fence_info->fence = NULL;
+
+ return ret;
+}
+
+static void kcpu_force_signal_fence(struct kbase_kcpu_command_queue *kcpu_queue)
+{
+ int status;
+ int i;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence;
+#else
+ struct dma_fence *fence;
+#endif
+ struct kbase_context *const kctx = kcpu_queue->kctx;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ int del;
+#endif
+
+ /* Force trigger all pending fence-signal commands */
+ for (i = 0; i != kcpu_queue->num_pending_cmds; ++i) {
+ struct kbase_kcpu_command *cmd =
+ &kcpu_queue->commands[(u8)(kcpu_queue->start_offset + i)];
+
+ if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) {
+ /* If a fence had already force-signalled previously,
+ * just skip it in this round of force signalling.
+ */
+ if (kbase_kcpu_command_fence_has_force_signaled(&cmd->info.fence))
+ continue;
+
+ fence = kbase_fence_get(&cmd->info.fence);
+
+ dev_info(kctx->kbdev->dev, "kbase KCPU[%pK] cmd%d fence[%pK] force signaled\n",
+ kcpu_queue, i+1, fence);
+
+ /* set ETIMEDOUT error flag before signal the fence*/
+ dma_fence_set_error_helper(fence, -ETIMEDOUT);
+
+ /* force signal fence */
+ status = kbase_kcpu_fence_force_signal_process(
+ kcpu_queue, &cmd->info.fence);
+ if (status < 0)
+ dev_err(kctx->kbdev->dev, "kbase signal failed\n");
+ else
+ kbase_kcpu_command_fence_force_signaled_set(&cmd->info.fence, true);
+
+ kcpu_queue->has_error = true;
+ }
+ }
+
+ /* set fence_signal_pending_cnt to 0
+ * and del_timer of the kcpu_queue
+ * because we signaled all the pending fence in the queue
+ */
+ atomic_set(&kcpu_queue->fence_signal_pending_cnt, 0);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ del = del_timer_sync(&kcpu_queue->fence_signal_timeout);
+ dev_info(kctx->kbdev->dev, "kbase KCPU [%pK] delete fence signal timeout timer ret: %d",
+ kcpu_queue, del);
+#else
+ del_timer_sync(&kcpu_queue->fence_signal_timeout);
+#endif
+}
+
+static void kcpu_queue_force_fence_signal(struct kbase_kcpu_command_queue *kcpu_queue)
+{
+ struct kbase_context *const kctx = kcpu_queue->kctx;
+ char buff[] = "surfaceflinger";
+
+ /* Force signal unsignaled fence expect surfaceflinger */
+ if (memcmp(kctx->comm, buff, sizeof(buff))) {
+ mutex_lock(&kcpu_queue->lock);
+ kcpu_force_signal_fence(kcpu_queue);
+ mutex_unlock(&kcpu_queue->lock);
+ }
+}
+
+/**
+ * fence_signal_timeout_cb() - Timeout callback function for fence-signal-wait
+ *
+ * @timer: Timer struct
+ *
+ * Callback function on an enqueued fence signal command has expired on its configured wait
+ * duration. At the moment it's just a simple place-holder for other tasks to expand on actual
+ * sync state dump via a bottom-half workqueue item.
+ */
+static void fence_signal_timeout_cb(struct timer_list *timer)
+{
+ struct kbase_kcpu_command_queue *kcpu_queue =
+ container_of(timer, struct kbase_kcpu_command_queue, fence_signal_timeout);
+ struct kbase_context *const kctx = kcpu_queue->kctx;
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ dev_warn(kctx->kbdev->dev, "kbase KCPU fence signal timeout callback triggered");
+#endif
+
+ /* If we have additional pending fence signal commands in the queue, re-arm for the
+ * remaining fence signal commands, and dump the work to dmesg, only if the
+ * global configuration option is set.
+ */
+ if (atomic_read(&kctx->kbdev->fence_signal_timeout_enabled)) {
+ if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1)
+ fence_signal_timeout_start(kcpu_queue);
+
+ kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->timeout_work);
+ }
+}
+
static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue,
struct kbase_kcpu_command_fence_info *fence_info)
{
struct kbase_context *const kctx = kcpu_queue->kctx;
int ret;
+ /* already force signaled */
+ if (kbase_kcpu_command_fence_has_force_signaled(fence_info))
+ return 0;
+
if (WARN_ON(!fence_info->fence))
return -EINVAL;
@@ -1586,6 +1770,25 @@ static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcp
fence_info->fence->context,
fence_info->fence->seqno);
+ /* If one has multiple enqueued fence signal commands, re-arm the timer */
+ if (atomic_dec_return(&kcpu_queue->fence_signal_pending_cnt) > 0) {
+ fence_signal_timeout_start(kcpu_queue);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ dev_dbg(kctx->kbdev->dev,
+ "kbase re-arm KCPU fence signal timeout timer for next signal command");
+#endif
+ } else {
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ int del = del_timer_sync(&kcpu_queue->fence_signal_timeout);
+
+ dev_dbg(kctx->kbdev->dev, "kbase KCPU delete fence signal timeout timer ret: %d",
+ del);
+ CSTD_UNUSED(del);
+#else
+ del_timer_sync(&kcpu_queue->fence_signal_timeout);
+#endif
+ }
+
/* dma_fence refcount needs to be decreased to release it. */
kbase_fence_put(fence_info->fence);
fence_info->fence = NULL;
@@ -1614,6 +1817,10 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
/* Set reference to KCPU metadata */
kcpu_fence->metadata = kcpu_queue->metadata;
+ /* Set reference to KCPU metadata and increment refcount */
+ kcpu_fence->metadata = kcpu_queue->metadata;
+ WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
+
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
fence_out = (struct fence *)kcpu_fence;
#else
@@ -1635,8 +1842,6 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
dma_fence_get(fence_out);
#endif
- WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount));
-
/* create a sync_file fd representing the fence */
*sync_file = sync_file_create(fence_out);
if (!(*sync_file)) {
@@ -1654,6 +1859,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q
current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL;
current_command->info.fence.fence = fence_out;
+ kbase_kcpu_command_fence_force_signaled_set(&current_command->info.fence, false);
return 0;
@@ -1700,6 +1906,10 @@ static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu
* before returning success.
*/
fd_install(fd, sync_file->file);
+
+ if (atomic_inc_return(&kcpu_queue->fence_signal_pending_cnt) == 1)
+ fence_signal_timeout_start(kcpu_queue);
+
return 0;
fail:
@@ -1732,6 +1942,90 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init);
#endif /* CONFIG_SYNC_FILE */
+static void kcpu_queue_dump(struct kbase_kcpu_command_queue *queue)
+{
+ struct kbase_context *kctx = queue->kctx;
+ struct kbase_kcpu_command *cmd;
+ struct kbase_kcpu_command_fence_info *fence_info;
+ struct kbase_kcpu_dma_fence *kcpu_fence;
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ struct fence *fence;
+#else
+ struct dma_fence *fence;
+#endif
+ struct kbase_sync_fence_info info;
+ size_t i;
+
+ mutex_lock(&queue->lock);
+
+ /* Find the next fence signal command in the queue */
+ for (i = 0; i != queue->num_pending_cmds; ++i) {
+ cmd = &queue->commands[(u8)(queue->start_offset + i)];
+ if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) {
+ fence_info = &cmd->info.fence;
+ /* find the first unforce signaled fence */
+ if (!kbase_kcpu_command_fence_has_force_signaled(fence_info))
+ break;
+ }
+ }
+
+ if (i == queue->num_pending_cmds) {
+ dev_err(kctx->kbdev->dev,
+ "%s: No fence signal command found in ctx:%d_%d kcpu queue:%u", __func__,
+ kctx->tgid, kctx->id, queue->id);
+ mutex_unlock(&queue->lock);
+ return;
+ }
+
+
+ fence = kbase_fence_get(fence_info);
+ if (!fence) {
+ dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid,
+ kctx->id, queue->id);
+ mutex_unlock(&queue->lock);
+ return;
+ }
+
+ kcpu_fence = kbase_kcpu_dma_fence_get(fence);
+ if (!kcpu_fence) {
+ dev_err(kctx->kbdev->dev, "no fence metadata found in ctx:%d_%d kcpu queue:%u",
+ kctx->tgid, kctx->id, queue->id);
+ kbase_fence_put(fence);
+ mutex_unlock(&queue->lock);
+ return;
+ }
+
+ kbase_sync_fence_info_get(fence, &info);
+
+ dev_warn(kctx->kbdev->dev, "------------------------------------------------\n");
+ dev_warn(kctx->kbdev->dev, "KCPU Fence signal timeout detected for ctx:%d_%d\n", kctx->tgid,
+ kctx->id);
+ dev_warn(kctx->kbdev->dev, "------------------------------------------------\n");
+ dev_warn(kctx->kbdev->dev, "Kcpu queue:%u still waiting for fence[%pK] context#seqno:%s\n",
+ queue->id, fence, info.name);
+ dev_warn(kctx->kbdev->dev, "Fence metadata timeline name: %s\n",
+ kcpu_fence->metadata->timeline_name);
+
+ kbase_fence_put(fence);
+ mutex_unlock(&queue->lock);
+
+ mutex_lock(&kctx->csf.kcpu_queues.lock);
+ kbasep_csf_sync_kcpu_dump_locked(kctx, NULL);
+ mutex_unlock(&kctx->csf.kcpu_queues.lock);
+
+ dev_warn(kctx->kbdev->dev, "-----------------------------------------------\n");
+}
+
+static void kcpu_queue_timeout_worker(struct kthread_work *data)
+{
+ struct kbase_kcpu_command_queue *queue =
+ container_of(data, struct kbase_kcpu_command_queue, timeout_work);
+
+ kcpu_queue_dump(queue);
+
+ kcpu_queue_force_fence_signal(queue);
+}
+
static void kcpu_queue_process_worker(struct kthread_work *data)
{
struct kbase_kcpu_command_queue *queue = container_of(data,
@@ -2087,6 +2381,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue,
status = kbase_csf_queue_group_suspend_process(
queue->kctx, sus_buf,
cmd->info.suspend_buf_copy.group_handle);
+
if (status)
queue->has_error = true;
@@ -2579,6 +2874,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
INIT_LIST_HEAD(&queue->jit_blocked);
queue->has_error = false;
kthread_init_work(&queue->work, kcpu_queue_process_worker);
+ kthread_init_work(&queue->timeout_work, kcpu_queue_timeout_worker);
queue->id = idx;
newq->id = idx;
@@ -2594,9 +2890,96 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx,
#ifdef CONFIG_MALI_FENCE_DEBUG
kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
#endif
+
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+ atomic_set(&queue->fence_signal_pending_cnt, 0);
+ kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb);
+#endif
out:
mutex_unlock(&kctx->csf.kcpu_queues.lock);
return ret;
}
KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new);
+
+int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev)
+{
+ struct kbase_context *kctx;
+
+ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+ unsigned long queue_idx;
+ struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues;
+
+ mutex_lock(&kcpu_ctx->lock);
+
+ for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) {
+ struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx];
+
+ if (unlikely(!kcpu_queue))
+ continue;
+
+ mutex_lock(&kcpu_queue->lock);
+
+ if (atomic_read(&kcpu_queue->fence_signal_pending_cnt)) {
+ int ret = del_timer_sync(&kcpu_queue->fence_signal_timeout);
+
+ dev_dbg(kbdev->dev,
+ "Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend",
+ queue_idx, kctx->tgid, kctx->id,
+ ret ? "pending" : "not pending");
+ }
+
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ if (kcpu_queue->fence_wait_processed) {
+ int ret = del_timer_sync(&kcpu_queue->fence_timeout);
+
+ dev_dbg(kbdev->dev,
+ "Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend",
+ queue_idx, kctx->tgid, kctx->id,
+ ret ? "pending" : "not pending");
+ }
+#endif
+ mutex_unlock(&kcpu_queue->lock);
+ }
+ mutex_unlock(&kcpu_ctx->lock);
+ }
+ return 0;
+}
+
+void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev)
+{
+ struct kbase_context *kctx;
+
+ list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) {
+ unsigned long queue_idx;
+ struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues;
+
+ mutex_lock(&kcpu_ctx->lock);
+
+ for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) {
+ struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx];
+
+ if (unlikely(!kcpu_queue))
+ continue;
+
+ mutex_lock(&kcpu_queue->lock);
+#ifdef CONFIG_MALI_FENCE_DEBUG
+ if (kcpu_queue->fence_wait_processed) {
+ fence_wait_timeout_start(kcpu_queue);
+ dev_dbg(kbdev->dev,
+ "Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume",
+ queue_idx, kctx->tgid, kctx->id);
+ }
+#endif
+ if (atomic_read(&kbdev->fence_signal_timeout_enabled) &&
+ atomic_read(&kcpu_queue->fence_signal_pending_cnt)) {
+ fence_signal_timeout_start(kcpu_queue);
+ dev_dbg(kbdev->dev,
+ "Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume",
+ queue_idx, kctx->tgid, kctx->id);
+ }
+ mutex_unlock(&kcpu_queue->lock);
+ }
+ mutex_unlock(&kcpu_ctx->lock);
+ }
+}
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 41c6e07..4a8d937 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -53,6 +53,7 @@ struct kbase_kcpu_command_import_info {
* @fence_cb: Fence callback
* @fence: Fence
* @kcpu_queue: kcpu command queue
+ * @fence_has_force_signaled: fence has forced signaled after fence timeouted
*/
struct kbase_kcpu_command_fence_info {
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
@@ -63,6 +64,7 @@ struct kbase_kcpu_command_fence_info {
struct dma_fence *fence;
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
struct kbase_kcpu_command_queue *kcpu_queue;
+ bool fence_has_force_signaled;
};
/**
@@ -249,10 +251,13 @@ struct kbase_kcpu_command {
* enqueued to this command queue.
* @csf_kcpu_worker: Dedicated worker for processing kernel CPU command
* queues.
- * @work: struct work_struct which contains a pointer to
+ * @work: struct kthread_work which contains a pointer to
* the function which handles processing of kcpu
* commands enqueued into a kcpu command queue;
* part of kernel API for processing workqueues
+ * @timeout_work: struct kthread_work which contains a pointer to the
+ * function which handles post-timeout actions
+ * queue when a fence signal timeout occurs.
* @start_offset: Index of the command to be executed next
* @id: KCPU command queue ID.
* @num_pending_cmds: The number of commands enqueued but not yet
@@ -284,6 +289,9 @@ struct kbase_kcpu_command {
* @fence_timeout: Timer used to detect the fence wait timeout.
* @metadata: Metadata structure containing basic information about
* this queue for any fence objects associated with this queue.
+ * @fence_signal_timeout: Timer used for detect a fence signal command has
+ * been blocked for too long.
+ * @fence_signal_pending_cnt: Enqueued fence signal commands in the queue.
*/
struct kbase_kcpu_command_queue {
struct mutex lock;
@@ -291,6 +299,7 @@ struct kbase_kcpu_command_queue {
struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
struct kthread_worker csf_kcpu_worker;
struct kthread_work work;
+ struct kthread_work timeout_work;
u8 start_offset;
u8 id;
u16 num_pending_cmds;
@@ -308,6 +317,8 @@ struct kbase_kcpu_command_queue {
#if IS_ENABLED(CONFIG_SYNC_FILE)
struct kbase_kcpu_dma_fence_meta *metadata;
#endif /* CONFIG_SYNC_FILE */
+ struct timer_list fence_signal_timeout;
+ atomic_t fence_signal_pending_cnt;
};
/**
@@ -382,4 +393,32 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue,
struct base_fence *fence, struct sync_file **sync_file, int *fd);
#endif /* CONFIG_SYNC_FILE */
+/*
+ * kbase_csf_kcpu_queue_halt_timers - Halt the KCPU fence timers associated with
+ * the kbase device.
+ *
+ * @kbdev: Kbase device
+ *
+ * Note that this function assumes that the caller has ensured that the
+ * kbase_device::kctx_list does not get updated during this function's runtime.
+ * At the moment, the function is only safe to call during system suspend, when
+ * the device PM active count has reached zero.
+ *
+ * Return: 0 on success, negative value otherwise.
+ */
+int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev);
+
+/*
+ * kbase_csf_kcpu_queue_resume_timers - Resume the KCPU fence timers associated
+ * with the kbase device.
+ *
+ * @kbdev: Kbase device
+ *
+ * Note that this function assumes that the caller has ensured that the
+ * kbase_device::kctx_list does not get updated during this function's runtime.
+ * At the moment, the function is only safe to call during system resume.
+ */
+void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev);
+
+bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info);
#endif /* _KBASE_CSF_KCPU_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c
new file mode 100644
index 0000000..cd55f62
--- /dev/null
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+#include <linux/debugfs.h>
+#endif
+
+#include <mali_kbase.h>
+#include <csf/mali_kbase_csf_kcpu_fence_debugfs.h>
+#include <mali_kbase_hwaccess_time.h>
+
+#define BUF_SIZE 10
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_get(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ struct kbase_device *kbdev = file->private_data;
+
+ if (atomic_read(&kbdev->fence_signal_timeout_enabled))
+ ret = simple_read_from_buffer(buf, count, ppos, "1\n", 2);
+ else
+ ret = simple_read_from_buffer(buf, count, ppos, "0\n", 2);
+
+ return ret;
+};
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_set(struct file *file,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ int ret;
+ unsigned int enabled;
+ struct kbase_device *kbdev = file->private_data;
+
+ ret = kstrtouint_from_user(buf, count, 10, &enabled);
+ if (ret < 0)
+ return ret;
+
+ atomic_set(&kbdev->fence_signal_timeout_enabled, enabled);
+
+ return count;
+}
+
+static const struct file_operations kbase_csf_kcpu_queue_fence_signal_fops = {
+ .owner = THIS_MODULE,
+ .read = kbase_csf_kcpu_queue_fence_signal_enabled_get,
+ .write = kbase_csf_kcpu_queue_fence_signal_enabled_set,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_get(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int size;
+ char buffer[BUF_SIZE];
+ struct kbase_device *kbdev = file->private_data;
+ unsigned int timeout_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT);
+
+ size = scnprintf(buffer, sizeof(buffer), "%u\n", timeout_ms);
+ return simple_read_from_buffer(buf, count, ppos, buffer, size);
+}
+
+static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_set(struct file *file,
+ const char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ int ret;
+ unsigned int timeout_ms;
+ struct kbase_device *kbdev = file->private_data;
+
+ ret = kstrtouint_from_user(buf, count, 10, &timeout_ms);
+ if (ret < 0)
+ return ret;
+
+ /* The timeout passed by the user is bounded when trying to insert it into
+ * the precomputed timeout table, so we don't need to do any more validation
+ * before-hand.
+ */
+ kbase_device_set_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT, timeout_ms);
+
+ return count;
+}
+
+static const struct file_operations kbase_csf_kcpu_queue_fence_signal_timeout_fops = {
+ .owner = THIS_MODULE,
+ .read = kbase_csf_kcpu_queue_fence_signal_timeout_get,
+ .write = kbase_csf_kcpu_queue_fence_signal_timeout_set,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev)
+{
+ struct dentry *file;
+ const mode_t mode = 0644;
+
+ if (WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory)))
+ return -1;
+
+ file = debugfs_create_file("fence_signal_timeout_enable", mode,
+ kbdev->mali_debugfs_directory, kbdev,
+ &kbase_csf_kcpu_queue_fence_signal_fops);
+
+ if (IS_ERR_OR_NULL(file)) {
+ dev_warn(kbdev->dev, "Unable to create fence signal timer toggle entry");
+ return -1;
+ }
+
+ file = debugfs_create_file("fence_signal_timeout_ms", mode, kbdev->mali_debugfs_directory,
+ kbdev, &kbase_csf_kcpu_queue_fence_signal_timeout_fops);
+
+ if (IS_ERR_OR_NULL(file)) {
+ dev_warn(kbdev->dev, "Unable to create fence signal timeout entry");
+ return -1;
+ }
+ return 0;
+}
+
+#else
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev)
+{
+ return 0;
+}
+
+#endif
+void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev)
+{
+}
diff --git a/mali_kbase/mali_kbase_bits.h b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.h
index a085fd8..e3799fb 100644
--- a/mali_kbase/mali_kbase_bits.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -18,14 +18,25 @@
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
+#ifndef _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_
+#define _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_
-#ifndef _KBASE_BITS_H_
-#define _KBASE_BITS_H_
+struct kbase_device;
-#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
-#include <linux/bits.h>
-#else
-#include <linux/bitops.h>
-#endif
+/*
+ * kbase_csf_fence_timer_debugfs_init - Initialize fence signal timeout debugfs
+ * entries.
+ * @kbdev: Kbase device.
+ *
+ * Return: 0 on success, -1 on failure.
+ */
+int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev);
+
+/*
+ * kbase_csf_fence_timer_debugfs_term - Terminate fence signal timeout debugfs
+ * entries.
+ * @kbdev: Kbase device.
+ */
+void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev);
-#endif /* _KBASE_BITS_H_ */
+#endif /* _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
index bb5a092..863cf10 100644
--- a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
+++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c
@@ -83,7 +83,7 @@ static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev)
static void set_page_meta_status_not_movable(struct tagged_addr phy)
{
- if (kbase_page_migration_enabled) {
+ if (kbase_is_page_migration_enabled()) {
struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy));
if (page_md) {
@@ -117,7 +117,7 @@ static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 n
return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
- mmu_sync_info, NULL, false);
+ mmu_sync_info, NULL);
}
/* Reset consecutive retry count to zero */
@@ -613,8 +613,7 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
int err, i;
INIT_LIST_HEAD(&csg_reg->link);
- reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages,
- KBASE_REG_ZONE_MCU_SHARED);
+ reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, nr_csg_reg_pages);
if (!reg) {
dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n",
@@ -667,18 +666,19 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev,
fail_userio_pages_map_fail:
while (i-- > 0) {
vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
- MCU_AS_NR, true);
+ kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn,
+ shared_regs->dummy_phys,
+ KBASEP_NUM_CS_USER_IO_PAGES,
+ KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR);
}
vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+ kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR);
fail_pmod_map_fail:
vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+ kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR);
fail_susp_map_fail:
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
@@ -701,17 +701,18 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev,
for (i = 0; i < nr_csis; i++) {
vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages);
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES,
- MCU_AS_NR, true);
+ kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn,
+ shared_regs->dummy_phys,
+ KBASEP_NUM_CS_USER_IO_PAGES,
+ KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR);
}
vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages);
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+ kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR);
vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages);
- kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
- nr_susp_pages, nr_susp_pages, MCU_AS_NR, true);
+ kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys,
+ nr_susp_pages, nr_susp_pages, MCU_AS_NR);
mutex_lock(&kbdev->csf.reg_lock);
kbase_remove_va_region(kbdev, reg);
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index b5bf7bb..b5ca885 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -143,12 +143,15 @@
#define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */
#define CSG_DB_REQ 0x0008 /* () Global doorbell request */
#define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */
+
+
#define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */
#define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */
#define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */
#define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */
#define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */
-#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */
+#define CSG_EP_REQ_LO 0x0034 /* () Maximum number of endpoints allowed, low word */
+#define CSG_EP_REQ_HI 0x0038 /* () Maximum number of endpoints allowed, high word */
#define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */
#define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */
#define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */
@@ -645,6 +648,7 @@
(((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \
(((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK))
+
/* CS_STATUS_WAIT_SYNC_POINTER register */
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0
#define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \
@@ -953,41 +957,46 @@
/* CSG_EP_REQ register */
#define CSG_EP_REQ_COMPUTE_EP_SHIFT 0
-#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT)
+#define CSG_EP_REQ_COMPUTE_EP_MASK ((u64)0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT)
#define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT)
-#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \
- (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK))
+#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \
+ (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \
+ ((((u64)value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK))
#define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8
-#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT)
+#define CSG_EP_REQ_FRAGMENT_EP_MASK ((u64)0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT)
#define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT)
-#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \
- (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK))
+#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \
+ (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \
+ ((((u64)value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK))
#define CSG_EP_REQ_TILER_EP_SHIFT 16
-#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT)
+#define CSG_EP_REQ_TILER_EP_MASK ((u64)0xF << CSG_EP_REQ_TILER_EP_SHIFT)
#define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT)
-#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK))
+#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \
+ (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | \
+ ((((u64)value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK))
#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \
(((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \
- (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \
+ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \
+ ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & \
+ CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK))
#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \
(((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT)
-#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \
- (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
+#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \
+ (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \
+ ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & \
+ CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
#define CSG_EP_REQ_PRIORITY_SHIFT 28
-#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT)
+#define CSG_EP_REQ_PRIORITY_MASK ((u64)0xF << CSG_EP_REQ_PRIORITY_SHIFT)
#define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT)
-#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \
- (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK))
+#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \
+ (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | \
+ ((((u64)value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK))
+
/* CSG_SUSPEND_BUF register */
#define CSG_SUSPEND_BUF_POINTER_SHIFT 0
@@ -1096,6 +1105,7 @@
(((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \
(((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK))
+
/* CSG_STATUS_EP_REQ register */
#define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0
#define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT)
@@ -1133,6 +1143,7 @@
(((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \
(((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK))
+
/* End of CSG_OUTPUT_BLOCK register set definitions */
/* STREAM_CONTROL_BLOCK register set definitions */
@@ -1481,6 +1492,20 @@
#define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
/* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */
+/* GLB_PWROFF_TIMER_CONFIG register */
+#ifndef GLB_PWROFF_TIMER_CONFIG
+#define GLB_PWROFF_TIMER_CONFIG 0x0088 /* () Configuration fields for GLB_PWROFF_TIMER */
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT 0
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_GET(reg_val) \
+ (((reg_val)&GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) >> \
+ GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value) \
+ (((reg_val) & ~GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) | \
+ (((value) << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \
+ GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK))
+#endif /* End of GLB_PWROFF_TIMER_CONFIG values */
+
/* GLB_ALLOC_EN register */
#define GLB_ALLOC_EN_MASK_SHIFT 0
#define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT)
@@ -1546,6 +1571,20 @@
#define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1
/* End of GLB_IDLE_TIMER_TIMER_SOURCE values */
+/* GLB_IDLE_TIMER_CONFIG values */
+#ifndef GLB_IDLE_TIMER_CONFIG
+#define GLB_IDLE_TIMER_CONFIG 0x0084 /* () Configuration fields for GLB_IDLE_TIMER */
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT 0
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_GET(reg_val) \
+ (((reg_val)&GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) >> \
+ GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT)
+#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value) \
+ (((reg_val) & ~GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) | \
+ (((value) << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \
+ GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK))
+#endif /* End of GLB_IDLE_TIMER_CONFIG values */
+
/* GLB_INSTR_FEATURES register */
#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0)
#define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT)
@@ -1670,6 +1709,7 @@
(((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \
(((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK))
+
/* RUN_MODE values */
#define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0
#define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index d076f3d..b8ad3a4 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -196,7 +196,7 @@ static void kbase_csf_reset_begin_hw_access_sync(
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags);
kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags);
- atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING);
+ atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_HAPPENING);
kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags);
}
@@ -257,14 +257,15 @@ void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS)));
- dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
+ dev_err(kbdev->dev,
+ " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x",
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)),
- kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)),
+ kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)));
dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x",
kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)),
kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)),
- kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)));
+ kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)));
dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x",
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)),
kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1)));
@@ -388,10 +389,12 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic
rt_mutex_unlock(&kbdev->pm.lock);
if (err) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (!kbase_pm_l2_is_in_desired_state(kbdev))
ret = L2_ON_FAILED;
else if (!kbase_pm_mcu_is_in_desired_state(kbdev))
ret = MCU_REINIT_FAILED;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
return ret;
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index f21067f..2573e3f 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -19,6 +19,8 @@
*
*/
+#include <linux/kthread.h>
+
#include <mali_kbase.h>
#include "mali_kbase_config_defaults.h"
#include <mali_kbase_ctx_sched.h>
@@ -36,6 +38,11 @@
#include "mali_kbase_csf_tiler_heap.h"
#include "mali_kbase_csf_tiler_heap_reclaim.h"
#include "mali_kbase_csf_mcu_shared_reg.h"
+#include <linux/version_compat_defs.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#include <csf/mali_kbase_csf_trace_buffer.h>
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -202,6 +209,222 @@ static bool queue_empty_or_blocked(struct kbase_queue *queue)
}
#endif
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/**
+ * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists,
+ * otherwise allocate and initialise one.
+ *
+ * @kctx: Pointer to the Kbase context.
+ *
+ * The GPU metrics context represents an "Application" for the purposes of GPU metrics
+ * reporting. There may be multiple kbase_contexts contributing data to a single GPU
+ * metrics context.
+ * This function takes a reference on GPU metrics context if it already exists
+ * corresponding to the Application that is creating the Kbase context, otherwise
+ * memory is allocated for it and initialised.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+static inline int gpu_metrics_ctx_init(struct kbase_context *kctx)
+{
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ int ret = 0;
+
+ const struct cred *cred = get_current_cred();
+ const unsigned int aid = cred->euid.val;
+
+ put_cred(cred);
+
+ /* Return early if this is not a Userspace created context */
+ if (unlikely(!kctx->kfile))
+ return 0;
+
+ /* Serialize against the other threads trying to create/destroy Kbase contexts. */
+ mutex_lock(&kbdev->kctx_list_lock);
+ rt_mutex_lock(&kbdev->csf.scheduler.lock);
+ gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid);
+ rt_mutex_unlock(&kbdev->csf.scheduler.lock);
+
+ if (!gpu_metrics_ctx) {
+ gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL);
+
+ if (gpu_metrics_ctx) {
+ rt_mutex_lock(&kbdev->csf.scheduler.lock);
+ kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid);
+ rt_mutex_unlock(&kbdev->csf.scheduler.lock);
+ } else {
+ dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed");
+ ret = -ENOMEM;
+ }
+ }
+
+ kctx->gpu_metrics_ctx = gpu_metrics_ctx;
+ mutex_unlock(&kbdev->kctx_list_lock);
+
+ return ret;
+}
+
+/**
+ * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it
+ * if the refcount becomes 0.
+ *
+ * @kctx: Pointer to the Kbase context.
+ */
+static inline void gpu_metrics_ctx_term(struct kbase_context *kctx)
+{
+ /* Return early if this is not a Userspace created context */
+ if (unlikely(!kctx->kfile))
+ return;
+
+ /* Serialize against the other threads trying to create/destroy Kbase contexts. */
+ mutex_lock(&kctx->kbdev->kctx_list_lock);
+ rt_mutex_lock(&kctx->kbdev->csf.scheduler.lock);
+ kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx);
+ rt_mutex_unlock(&kctx->kbdev->csf.scheduler.lock);
+ mutex_unlock(&kctx->kbdev->kctx_list_lock);
+}
+
+/**
+ * struct gpu_metrics_event - A GPU metrics event recorded in trace buffer.
+ *
+ * @csg_slot_act: The 32bit data consisting of a GPU metrics event.
+ * 5 bits[4:0] represents CSG slot number.
+ * 1 bit [5] represents the transition of the CSG group on the slot.
+ * '1' means idle->active whilst '0' does active->idle.
+ * @timestamp: 64bit timestamp consisting of a GPU metrics event.
+ *
+ * Note: It's packed and word-aligned as agreed layout with firmware.
+ */
+struct gpu_metrics_event {
+ u32 csg_slot_act;
+ u64 timestamp;
+} __packed __aligned(4);
+#define GPU_METRICS_EVENT_SIZE sizeof(struct gpu_metrics_event)
+
+#define GPU_METRICS_ACT_SHIFT 5
+#define GPU_METRICS_ACT_MASK (0x1 << GPU_METRICS_ACT_SHIFT)
+#define GPU_METRICS_ACT_GET(val) (((val)&GPU_METRICS_ACT_MASK) >> GPU_METRICS_ACT_SHIFT)
+
+#define GPU_METRICS_CSG_MASK 0x1f
+#define GPU_METRICS_CSG_GET(val) ((val)&GPU_METRICS_CSG_MASK)
+
+/**
+ * gpu_metrics_read_event() - Read a GPU metrics trace from trace buffer
+ *
+ * @kbdev: Pointer to the device
+ * @kctx: Kcontext that is derived from CSG slot field of a GPU metrics.
+ * @prev_act: Previous CSG activity transition in a GPU metrics.
+ * @cur_act: Current CSG activity transition in a GPU metrics.
+ * @ts: CSG activity transition timestamp in a GPU metrics.
+ *
+ * This function reads firmware trace buffer, named 'gpu_metrics' and
+ * parse one 12-byte data packet into following information.
+ * - The number of CSG slot on which CSG was transitioned to active or idle.
+ * - Activity transition (1: idle->active, 0: active->idle).
+ * - Timestamp in nanoseconds when the transition occurred.
+ *
+ * Return: true on success.
+ */
+static bool gpu_metrics_read_event(struct kbase_device *kbdev, struct kbase_context **kctx,
+ bool *prev_act, bool *cur_act, uint64_t *ts)
+{
+ struct firmware_trace_buffer *tb = kbdev->csf.scheduler.gpu_metrics_tb;
+ struct gpu_metrics_event e;
+
+ if (kbase_csf_firmware_trace_buffer_read_data(tb, (u8 *)&e, GPU_METRICS_EVENT_SIZE) ==
+ GPU_METRICS_EVENT_SIZE) {
+ const u8 slot = GPU_METRICS_CSG_GET(e.csg_slot_act);
+ struct kbase_queue_group *group =
+ kbdev->csf.scheduler.csg_slots[slot].resident_group;
+
+ if (unlikely(!group)) {
+ dev_err(kbdev->dev, "failed to find CSG group from CSG slot(%u)", slot);
+ return false;
+ }
+
+ *cur_act = GPU_METRICS_ACT_GET(e.csg_slot_act);
+ *ts = kbase_backend_time_convert_gpu_to_cpu(kbdev, e.timestamp);
+ *kctx = group->kctx;
+
+ *prev_act = group->prev_act;
+ group->prev_act = *cur_act;
+
+ return true;
+ }
+
+ dev_err(kbdev->dev, "failed to read a GPU metrics from trace buffer");
+
+ return false;
+}
+
+/**
+ * emit_gpu_metrics_to_frontend() - Emit GPU metrics events to the frontend.
+ *
+ * @kbdev: Pointer to the device
+ *
+ * This function must be called to emit GPU metrics data to the
+ * frontend whenever needed.
+ * Calls to this function will be serialized by scheduler lock.
+ *
+ * Kbase reports invalid activity traces when detected.
+ */
+static void emit_gpu_metrics_to_frontend(struct kbase_device *kbdev)
+{
+ u64 system_time = 0;
+ u64 ts_before_drain;
+ u64 ts = 0;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.lock);
+
+#if IS_ENABLED(CONFIG_MALI_NO_MALI)
+ return;
+#endif
+
+ if (WARN_ON_ONCE(kbdev->csf.scheduler.state == SCHED_SUSPENDED))
+ return;
+
+ kbase_backend_get_gpu_time_norequest(kbdev, NULL, &system_time, NULL);
+ ts_before_drain = kbase_backend_time_convert_gpu_to_cpu(kbdev, system_time);
+
+ while (!kbase_csf_firmware_trace_buffer_is_empty(kbdev->csf.scheduler.gpu_metrics_tb)) {
+ struct kbase_context *kctx;
+ bool prev_act;
+ bool cur_act;
+
+ if (gpu_metrics_read_event(kbdev, &kctx, &prev_act, &cur_act, &ts)) {
+ if (prev_act == cur_act) {
+ /* Error handling
+ *
+ * In case of active CSG, Kbase will try to recover the
+ * lost event by ending previously active event and
+ * starting a new one.
+ *
+ * In case of inactive CSG, the event is drop as Kbase
+ * cannot recover.
+ */
+ dev_err(kbdev->dev,
+ "Invalid activity state transition. (prev_act = %u, cur_act = %u)",
+ prev_act, cur_act);
+ if (cur_act) {
+ kbase_gpu_metrics_ctx_end_activity(kctx, ts);
+ kbase_gpu_metrics_ctx_start_activity(kctx, ts);
+ }
+ } else {
+ /* Normal handling */
+ if (cur_act)
+ kbase_gpu_metrics_ctx_start_activity(kctx, ts);
+ else
+ kbase_gpu_metrics_ctx_end_activity(kctx, ts);
+ }
+ } else
+ break;
+ }
+
+ kbase_gpu_metrics_emit_tracepoint(kbdev, ts >= ts_before_drain ? ts + 1 : ts_before_drain);
+}
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+
/**
* wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and
* scheduling tick/tock to complete before the group deschedule.
@@ -424,79 +647,20 @@ out:
*
* @timer: Pointer to the scheduling tick hrtimer
*
- * This function will enqueue the scheduling tick work item for immediate
- * execution, if it has not been queued already.
+ * This function will wake up kbase_csf_scheduler_kthread() to process a
+ * pending scheduling tick. It will be restarted manually once a tick has been
+ * processed if appropriate.
*
* Return: enum value to indicate that timer should not be restarted.
*/
static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer)
{
- struct kbase_device *kbdev = container_of(timer, struct kbase_device,
- csf.scheduler.tick_timer);
-
- kbase_csf_scheduler_tick_advance(kbdev);
- return HRTIMER_NORESTART;
-}
-
-/**
- * start_tick_timer() - Start the scheduling tick hrtimer.
- *
- * @kbdev: Pointer to the device
- *
- * This function will start the scheduling tick hrtimer and is supposed to
- * be called only from the tick work item function. The tick hrtimer should
- * not be active already.
- */
-static void start_tick_timer(struct kbase_device *kbdev)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- unsigned long flags;
-
- lockdep_assert_held(&scheduler->lock);
-
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- if (likely(!scheduler->tick_timer_active)) {
- scheduler->tick_timer_active = true;
-
- hrtimer_start(&scheduler->tick_timer,
- HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
- HRTIMER_MODE_REL);
- }
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
-/**
- * cancel_tick_timer() - Cancel the scheduling tick hrtimer
- *
- * @kbdev: Pointer to the device
- */
-static void cancel_tick_timer(struct kbase_device *kbdev)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- unsigned long flags;
-
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- scheduler->tick_timer_active = false;
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
- hrtimer_cancel(&scheduler->tick_timer);
-}
-
-/**
- * enqueue_tick_work() - Enqueue the scheduling tick work item
- *
- * @kbdev: Pointer to the device
- *
- * This function will queue the scheduling tick work item for immediate
- * execution. This shall only be called when both the tick hrtimer and tick
- * work item are not active/pending.
- */
-static void enqueue_tick_work(struct kbase_device *kbdev)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
- lockdep_assert_held(&scheduler->lock);
+ struct kbase_device *kbdev =
+ container_of(timer, struct kbase_device, csf.scheduler.tick_timer);
kbase_csf_scheduler_invoke_tick(kbdev);
+
+ return HRTIMER_NORESTART;
}
static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr)
@@ -642,8 +806,14 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
if (queue && queue->user_io_addr) {
u64 const *const output_addr =
- (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+ (u64 const *)(queue->user_io_addr +
+ PAGE_SIZE / sizeof(u64));
+ /*
+ * This 64-bit read will be atomic on a 64-bit kernel but may not
+ * be atomic on 32-bit kernels. Support for 32-bit kernels is
+ * limited to build-only.
+ */
queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
}
}
@@ -698,7 +868,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
* updated whilst gpu_idle_worker() is executing.
*/
scheduler->fast_gpu_idle_handling =
- (kbdev->csf.gpu_idle_hysteresis_us == 0) ||
+ (kbdev->csf.gpu_idle_hysteresis_ns == 0) ||
!kbase_csf_scheduler_all_csgs_idle(kbdev);
/* The GPU idle worker relies on update_on_slot_queues_offsets() to have
@@ -713,8 +883,8 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
}
#endif
} else {
- /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */
- kbase_csf_scheduler_tick_advance_nolock(kbdev);
+ /* Invoke the scheduling tick to get the non-idle suspended groups loaded soon */
+ kbase_csf_scheduler_invoke_tick(kbdev);
}
return ack_gpu_idle_event;
@@ -806,6 +976,14 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group)
return queue_group_scheduled(group);
}
+static void update_idle_protm_group_state_to_runnable(struct kbase_queue_group *group)
+{
+ lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock);
+
+ group->run_state = KBASE_CSF_GROUP_RUNNABLE;
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, group->run_state);
+}
+
/**
* scheduler_protm_wait_quit() - Wait for GPU to exit protected mode.
*
@@ -889,24 +1067,6 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev)
}
/**
- * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up
- * automatically for periodic tasks.
- *
- * @kbdev: Pointer to the device
- *
- * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the
- * CSF scheduler lock to already have been held.
- *
- * Return: true if the scheduler is configured to wake up periodically
- */
-static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev)
-{
- lockdep_assert_held(&kbdev->csf.scheduler.lock);
-
- return kbdev->csf.scheduler.timer_enabled;
-}
-
-/**
* scheduler_pm_active_handle_suspend() - Acquire the PM reference count for
* Scheduler
*
@@ -1694,9 +1854,9 @@ static void update_hw_active(struct kbase_queue *queue, bool active)
{
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
if (queue && queue->enabled) {
- u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+ u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64);
- output_addr[CS_ACTIVE / sizeof(u32)] = active;
+ output_addr[CS_ACTIVE / sizeof(*output_addr)] = active;
}
#else
CSTD_UNUSED(queue);
@@ -1706,11 +1866,16 @@ static void update_hw_active(struct kbase_queue *queue, bool active)
static void program_cs_extract_init(struct kbase_queue *queue)
{
- u64 *input_addr = (u64 *)queue->user_io_addr;
- u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE);
+ u64 *input_addr = queue->user_io_addr;
+ u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64);
- input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] =
- output_addr[CS_EXTRACT_LO / sizeof(u64)];
+ /*
+ * These 64-bit reads and writes will be atomic on a 64-bit kernel but may
+ * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to
+ * build-only.
+ */
+ input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] =
+ output_addr[CS_EXTRACT_LO / sizeof(*output_addr)];
}
static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream,
@@ -1930,7 +2095,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue)
kbase_reset_gpu_assert_prevented(kbdev);
lockdep_assert_held(&queue->kctx->csf.lock);
- if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
+ if (WARN_ON_ONCE(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND))
return -EINVAL;
rt_mutex_lock(&kbdev->csf.scheduler.lock);
@@ -2402,7 +2567,7 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force)
* of work needs to be enforced in situation such as entering into
* protected mode).
*/
- if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) {
+ if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) || force) {
dev_dbg(kbdev->dev, "Kicking async for group %d\n",
group->handle);
kbase_csf_scheduler_invoke_tock(kbdev);
@@ -2485,13 +2650,12 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
scheduler->total_runnable_grps++;
- if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
- (scheduler->total_runnable_grps == 1 ||
- scheduler->state == SCHED_SUSPENDED ||
+ if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) &&
+ (scheduler->total_runnable_grps == 1 || scheduler->state == SCHED_SUSPENDED ||
scheduler->state == SCHED_SLEEPING)) {
dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n");
/* Fire a scheduling to start the time-slice */
- enqueue_tick_work(kbdev);
+ kbase_csf_scheduler_invoke_tick(kbdev);
} else
schedule_in_cycle(group, false);
@@ -2501,6 +2665,17 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler,
scheduler_wakeup(kbdev, false);
}
+static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
+{
+ hrtimer_cancel(&scheduler->tick_timer);
+ atomic_set(&scheduler->pending_tick_work, false);
+}
+
+static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
+{
+ atomic_set(&scheduler->pending_tock_work, false);
+}
+
static
void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
struct kbase_queue_group *group,
@@ -2595,7 +2770,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
scheduler->total_runnable_grps--;
if (!scheduler->total_runnable_grps) {
dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups");
- cancel_tick_timer(kctx->kbdev);
+ cancel_tick_work(scheduler);
WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps));
if (scheduler->state != SCHED_SUSPENDED)
enqueue_gpu_idle_work(scheduler, 0);
@@ -2741,7 +2916,7 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
u32 glb_version = iface->version;
u64 const *input_addr = (u64 const *)queue->user_io_addr;
- u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+ u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64));
if (glb_version >= kbase_csf_interface_version(1, 0, 0)) {
/* CS_STATUS_SCOREBOARD supported from CSF 1.0 */
@@ -2755,6 +2930,11 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue)
CS_STATUS_SCOREBOARDS));
}
+ /*
+ * These 64-bit reads and writes will be atomic on a 64-bit kernel but may
+ * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to
+ * build-only.
+ */
cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] ==
output_addr[CS_EXTRACT_LO / sizeof(u64)]);
cs_idle = cs_empty && (!sb_status);
@@ -2858,7 +3038,7 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
s8 slot;
struct kbase_csf_csg_slot *csg_slot;
unsigned long flags;
- u32 i;
+ u32 csg_req, csg_ack, i;
bool as_fault = false;
lockdep_assert_held(&kbdev->csf.scheduler.lock);
@@ -2898,8 +3078,16 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group)
as_fault = true;
spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+
/* now marking the slot is vacant */
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
+ /* Process pending SYNC_UPDATE, if any */
+ csg_req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
+ csg_ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
+ kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, csg_req, csg_ack);
kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL;
clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask);
@@ -2962,10 +3150,10 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio)
return;
/* Read the csg_ep_cfg back for updating the priority field */
- ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ);
+ ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ_LO);
prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg);
ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
- kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
+ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg);
spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags);
csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
@@ -2999,12 +3187,11 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
const u64 compute_mask = shader_core_mask & group->compute_mask;
const u64 fragment_mask = shader_core_mask & group->fragment_mask;
const u64 tiler_mask = tiler_core_mask & group->tiler_mask;
- const u8 num_cores = kbdev->gpu_props.num_cores;
- const u8 compute_max = min(num_cores, group->compute_max);
- const u8 fragment_max = min(num_cores, group->fragment_max);
+ const u8 compute_max = min(kbdev->gpu_props.num_cores, group->compute_max);
+ const u8 fragment_max = min(kbdev->gpu_props.num_cores, group->fragment_max);
const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max);
struct kbase_csf_cmd_stream_group_info *ginfo;
- u32 ep_cfg = 0;
+ u64 ep_cfg = 0;
u32 csg_req;
u32 state;
int i;
@@ -3078,6 +3265,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
fragment_mask & U32_MAX);
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI,
fragment_mask >> 32);
+
kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER,
tiler_mask & U32_MAX);
@@ -3089,7 +3277,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot,
ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max);
ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max);
ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio);
- kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg);
+ kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg & U32_MAX);
/* Program the address space number assigned to the context */
kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr);
@@ -3719,7 +3907,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS);
DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0};
bool suspend_wait_failed = false;
- long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms);
lockdep_assert_held(&kbdev->csf.scheduler.lock);
@@ -3731,6 +3918,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) {
DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
+ long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT));
bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS);
@@ -3752,15 +3940,18 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev)
/* The on slot csg is now stopped */
clear_bit(i, slot_mask);
- KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
- kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
-
if (likely(group)) {
bool as_fault;
/* Only do save/cleanup if the
* group is not terminated during
* the sleep.
*/
+
+ /* Only emit suspend, if there was no AS fault */
+ if (kctx_as_enabled(group->kctx) && !group->faulted)
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev,
+ kbdev->gpu_props.props.raw_props.gpu_id, i);
save_csg_slot(group);
as_fault = cleanup_csg_slot(group);
/* If AS fault detected, evict it */
@@ -4258,16 +4449,13 @@ static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev)
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u32 num_groups = kbdev->csf.global_iface.group_num;
u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num;
- DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 };
u32 i;
kbase_csf_scheduler_spin_lock_assert_held(kbdev);
- bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap,
- num_groups);
/* Reset the tick's pending protm seq number to invalid initially */
scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID;
- for_each_set_bit(i, active_csgs, num_groups) {
+ for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) {
struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group;
/* Set to the next pending protm group's scan_seq_number */
@@ -4508,8 +4696,9 @@ static void scheduler_apply(struct kbase_device *kbdev)
program_suspending_csg_slots(kbdev);
}
-static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
- struct kbase_context *kctx, int priority)
+static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, struct kbase_context *kctx,
+ int priority, struct list_head *privileged_groups,
+ struct list_head *active_groups)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
struct kbase_queue_group *group;
@@ -4523,8 +4712,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
if (!kctx_as_enabled(kctx))
return;
- list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority],
- link) {
+ list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], link) {
+ bool protm_req;
+
if (WARN_ON(!list_empty(&group->link_to_schedule)))
/* This would be a bug */
list_del_init(&group->link_to_schedule);
@@ -4535,33 +4725,30 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev,
/* Set the scanout sequence number, starting from 0 */
group->scan_seq_num = scheduler->csg_scan_count_for_tick++;
+ protm_req = !bitmap_empty(group->protm_pending_bitmap,
+ kbdev->csf.global_iface.groups[0].stream_num);
+
if (scheduler->tick_protm_pending_seq ==
- KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
- if (!bitmap_empty(group->protm_pending_bitmap,
- kbdev->csf.global_iface.groups[0].stream_num))
- scheduler->tick_protm_pending_seq =
- group->scan_seq_num;
+ KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) {
+ if (protm_req)
+ scheduler->tick_protm_pending_seq = group->scan_seq_num;
}
- if (queue_group_idle_locked(group)) {
+ if (protm_req && on_slot_group_idle_locked(group))
+ update_idle_protm_group_state_to_runnable(group);
+ else if (queue_group_idle_locked(group)) {
if (can_schedule_idle_group(group))
list_add_tail(&group->link_to_schedule,
&scheduler->idle_groups_to_schedule);
continue;
}
- if (!scheduler->ngrp_to_schedule) {
- /* keep the top csg's origin */
- scheduler->top_ctx = kctx;
- scheduler->top_grp = group;
+ if (protm_req && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) {
+ list_add_tail(&group->link_to_schedule, privileged_groups);
+ continue;
}
- list_add_tail(&group->link_to_schedule,
- &scheduler->groups_to_schedule);
- group->prepared_seq_num = scheduler->ngrp_to_schedule++;
-
- kctx->csf.sched.ngrp_to_schedule++;
- count_active_address_space(kbdev, kctx);
+ list_add_tail(&group->link_to_schedule, active_groups);
}
}
@@ -4891,18 +5078,16 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev)
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
}
-static void scheduler_scan_idle_groups(struct kbase_device *kbdev)
+static void scheduler_scan_group_list(struct kbase_device *kbdev, struct list_head *groups)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
struct kbase_queue_group *group, *n;
- list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule,
- link_to_schedule) {
- WARN_ON(!can_schedule_idle_group(group));
-
+ list_for_each_entry_safe(group, n, groups, link_to_schedule) {
if (!scheduler->ngrp_to_schedule) {
/* keep the top csg's origin */
scheduler->top_ctx = group->kctx;
+ /* keep the top csg''s origin */
scheduler->top_grp = group;
}
@@ -5049,7 +5234,12 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev)
if (!queue || !queue->user_io_addr)
continue;
- output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE);
+ output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64));
+ /*
+ * These 64-bit reads and writes will be atomic on a 64-bit kernel
+ * but may not be atomic on 32-bit kernels. Support for 32-bit
+ * kernels is limited to build-only.
+ */
cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)];
if (cur_extract_ofs != queue->extract_ofs) {
/* More work has been executed since the idle
@@ -5141,10 +5331,13 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
dev_dbg(kbdev->dev,
"Scheduler to be put to sleep on GPU becoming idle");
- cancel_tick_timer(kbdev);
+ cancel_tick_work(scheduler);
scheduler_pm_idle_before_sleep(kbdev);
scheduler->state = SCHED_SLEEPING;
KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
}
#endif
@@ -5162,6 +5355,7 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev)
*/
static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
int ret = suspend_active_groups_on_powerdown(kbdev, false);
if (ret) {
@@ -5169,7 +5363,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
atomic_read(
&kbdev->csf.scheduler.non_idle_offslot_grps));
/* Bring forward the next tick */
- kbase_csf_scheduler_tick_advance(kbdev);
+ kbase_csf_scheduler_invoke_tick(kbdev);
return false;
}
@@ -5180,7 +5374,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle");
scheduler_suspend(kbdev);
- cancel_tick_timer(kbdev);
+ cancel_tick_work(scheduler);
return true;
}
@@ -5514,6 +5708,7 @@ static void sc_rails_off_worker(struct work_struct *work)
static int scheduler_prepare(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+ struct list_head privileged_groups, active_groups;
unsigned long flags;
int i;
@@ -5539,6 +5734,8 @@ static int scheduler_prepare(struct kbase_device *kbdev)
scheduler->num_active_address_spaces = 0;
scheduler->num_csg_slots_for_tick = 0;
bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS);
+ INIT_LIST_HEAD(&privileged_groups);
+ INIT_LIST_HEAD(&active_groups);
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
scheduler->tick_protm_pending_seq =
@@ -5548,10 +5745,17 @@ static int scheduler_prepare(struct kbase_device *kbdev)
struct kbase_context *kctx;
list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link)
- scheduler_ctx_scan_groups(kbdev, kctx, i);
+ scheduler_ctx_scan_groups(kbdev, kctx, i, &privileged_groups,
+ &active_groups);
}
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ /* Adds privileged (RT + p.mode) groups to the scanout list */
+ scheduler_scan_group_list(kbdev, &privileged_groups);
+
+ /* Adds remainder of active groups to the scanout list */
+ scheduler_scan_group_list(kbdev, &active_groups);
+
/* Update this tick's non-idle groups */
scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule;
@@ -5566,7 +5770,7 @@ static int scheduler_prepare(struct kbase_device *kbdev)
scheduler->non_idle_scanout_grps);
/* Adds those idle but runnable groups to the scanout list */
- scheduler_scan_idle_groups(kbdev);
+ scheduler_scan_group_list(kbdev, &scheduler->idle_groups_to_schedule);
WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule);
@@ -5668,11 +5872,9 @@ static int prepare_fast_local_tock(struct kbase_device *kbdev)
return bitmap_weight(csg_bitmap, num_groups);
}
-static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask,
- unsigned int timeout_ms)
+static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- long remaining = kbase_csf_timeout_in_jiffies(timeout_ms);
u32 num_groups = kbdev->csf.global_iface.group_num;
int err = 0;
DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS);
@@ -5681,11 +5883,11 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS);
- while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) {
+ while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)) {
+ long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT));
DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS);
bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS);
-
remaining = wait_event_timeout(
kbdev->csf.event_wait,
slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining);
@@ -5702,18 +5904,23 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
/* The on slot csg is now stopped */
clear_bit(i, slot_mask_local);
- KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
- kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i);
-
group = scheduler->csg_slots[i].resident_group;
if (likely(group)) {
/* Only do save/cleanup if the
* group is not terminated during
* the sleep.
*/
+
+ /* Only emit suspend, if there was no AS fault */
+ if (kctx_as_enabled(group->kctx) && !group->faulted)
+ KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG(
+ kbdev,
+ kbdev->gpu_props.props.raw_props.gpu_id, i);
+
save_csg_slot(group);
- if (cleanup_csg_slot(group))
+ if (cleanup_csg_slot(group)) {
sched_evict_group(group, true, true);
+ }
}
}
} else {
@@ -5724,8 +5931,8 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo
slot_mask_local[0]);
/* Return the bitmask of the timed out slots to the caller */
bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS);
-
err = -ETIMEDOUT;
+ break;
}
}
@@ -5787,7 +5994,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
* idle.
*/
if ((group->run_state == KBASE_CSF_GROUP_IDLE) &&
- (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
+ (group->priority != KBASE_QUEUE_GROUP_PRIORITY_REALTIME) &&
((lru_idle_group == NULL) ||
(lru_idle_group->prepared_seq_num < group->prepared_seq_num))) {
if (WARN_ON(group->kctx->as_nr < 0))
@@ -5809,7 +6016,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev)
lru_idle_group->handle, lru_idle_group->kctx->tgid,
lru_idle_group->kctx->id, lru_idle_group->csg_nr);
suspend_queue_group(lru_idle_group);
- if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) {
+ if (wait_csg_slots_suspend(kbdev, &slot_mask)) {
enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT;
dev_warn(
@@ -6033,10 +6240,8 @@ static bool can_skip_scheduling(struct kbase_device *kbdev)
return false;
}
-static void schedule_on_tock(struct kthread_work *work)
+static void schedule_on_tock(struct kbase_device *kbdev)
{
- struct kbase_device *kbdev =
- container_of(work, struct kbase_device, csf.scheduler.tock_work.work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
int err;
@@ -6071,12 +6276,12 @@ static void schedule_on_tock(struct kthread_work *work)
KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
if (!scheduler->total_runnable_grps)
enqueue_gpu_idle_work(scheduler, 0);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
rt_mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
- dev_dbg(kbdev->dev,
- "Waking up for event after schedule-on-tock completes.");
- wake_up_all(&kbdev->csf.event_wait);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u);
return;
@@ -6085,10 +6290,8 @@ exit_no_schedule_unlock:
kbase_reset_gpu_allow(kbdev);
}
-static void schedule_on_tick(struct kthread_work *work)
+static void schedule_on_tick(struct kbase_device *kbdev)
{
- struct kbase_device *kbdev =
- container_of(work, struct kbase_device, csf.scheduler.tick_work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
int err = kbase_reset_gpu_try_prevent(kbdev);
@@ -6115,23 +6318,25 @@ static void schedule_on_tick(struct kthread_work *work)
scheduler->last_schedule = jiffies;
/* Kicking next scheduling if needed */
- if (likely(scheduler_timer_is_enabled_nolock(kbdev)) &&
- (scheduler->total_runnable_grps > 0)) {
- start_tick_timer(kbdev);
- dev_dbg(kbdev->dev,
- "scheduling for next tick, num_runnable_groups:%u\n",
+ if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) &&
+ (scheduler->total_runnable_grps > 0)) {
+ hrtimer_start(&scheduler->tick_timer,
+ HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms),
+ HRTIMER_MODE_REL);
+ dev_dbg(kbdev->dev, "scheduling for next tick, num_runnable_groups:%u\n",
scheduler->total_runnable_grps);
} else if (!scheduler->total_runnable_grps) {
enqueue_gpu_idle_work(scheduler, 0);
}
scheduler->state = SCHED_INACTIVE;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ emit_gpu_metrics_to_frontend(kbdev);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
rt_mutex_unlock(&scheduler->lock);
KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state);
kbase_reset_gpu_allow(kbdev);
- dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes.");
- wake_up_all(&kbdev->csf.event_wait);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL,
scheduler->total_runnable_grps);
return;
@@ -6161,7 +6366,7 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev,
}
}
- ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms);
+ ret = wait_csg_slots_suspend(kbdev, slot_mask);
return ret;
}
@@ -6180,7 +6385,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n",
kbdev->csf.global_iface.group_num, slot_mask);
//TODO: should introduce SSCD report if this happens.
- kbase_gpu_timeout_debug_message(kbdev);
+ kbase_gpu_timeout_debug_message(kbdev, "");
dev_warn(kbdev->dev, "[%llu] Firmware ping %d",
kbase_backend_get_cycle_cnt(kbdev),
kbase_csf_firmware_ping_wait(kbdev, 0));
@@ -6201,11 +6406,10 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
* overflow.
*/
kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
- ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev,
- kbdev->reset_timeout_ms);
+ ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, kbdev->mmu_or_gpu_cache_op_wait_time_ms);
if (ret2) {
- dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset",
- kbase_backend_get_cycle_cnt(kbdev));
+ dev_err(kbdev->dev, "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC",
+ kbase_backend_get_cycle_cnt(kbdev));
if (!ret)
ret = ret2;
}
@@ -6323,17 +6527,6 @@ unlock:
return suspend_on_slot_groups;
}
-static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler)
-{
- kthread_cancel_work_sync(&scheduler->tick_work);
-}
-
-static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
-{
- atomic_set(&scheduler->pending_tock_work, false);
- kthread_cancel_delayed_work_sync(&scheduler->tock_work);
-}
-
static void scheduler_inner_reset(struct kbase_device *kbdev)
{
u32 const num_groups = kbdev->csf.global_iface.group_num;
@@ -6348,7 +6541,6 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
#else
cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
#endif
- cancel_tick_timer(kbdev);
cancel_tick_work(scheduler);
cancel_tock_work(scheduler);
cancel_delayed_work_sync(&scheduler->ping_work);
@@ -6547,8 +6739,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
if (!WARN_ON(scheduler->state == SCHED_SUSPENDED))
suspend_queue_group(group);
- err = wait_csg_slots_suspend(kbdev, slot_mask,
- kbdev->csf.fw_timeout_ms);
+ err = wait_csg_slots_suspend(kbdev, slot_mask);
if (err) {
const struct gpu_uevent evt = {
.type = GPU_UEVENT_TYPE_KMD_ERROR,
@@ -6593,7 +6784,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
target_page_nr < sus_buf->nr_pages; i++) {
struct page *pg =
as_page(group->normal_suspend_buf.phy[i]);
- void *sus_page = kmap(pg);
+ void *sus_page = kbase_kmap(pg);
if (sus_page) {
kbase_sync_single_for_cpu(kbdev,
@@ -6604,7 +6795,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group,
sus_buf->pages, sus_page,
&to_copy, sus_buf->nr_pages,
&target_page_nr, offset);
- kunmap(pg);
+ kbase_kunmap(pg, sus_page);
if (err)
break;
} else {
@@ -6720,12 +6911,21 @@ static struct kbase_queue_group *scheduler_get_protm_enter_async_group(
spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- if (kbase_csf_scheduler_protected_mode_in_use(kbdev) ||
- bitmap_empty(pending, ginfo->stream_num))
+ if (bitmap_empty(pending, ginfo->stream_num)) {
+ dev_dbg(kbdev->dev,
+ "Pmode requested for group %d of ctx %d_%d with no pending queues",
+ input_grp->handle, input_grp->kctx->tgid, input_grp->kctx->id);
+ input_grp = NULL;
+ } else if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) {
+ kbase_csf_scheduler_invoke_tock(kbdev);
input_grp = NULL;
+ }
spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
} else {
+ if (group && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME))
+ kbase_csf_scheduler_invoke_tock(kbdev);
+
input_grp = NULL;
}
@@ -6753,11 +6953,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group)
rt_mutex_lock(&scheduler->lock);
- if (group->run_state == KBASE_CSF_GROUP_IDLE) {
- group->run_state = KBASE_CSF_GROUP_RUNNABLE;
- KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group,
- group->run_state);
- }
+ if (on_slot_group_idle_locked(group))
+ update_idle_protm_group_state_to_runnable(group);
/* Check if the group is now eligible for execution in protected mode. */
if (scheduler_get_protm_enter_async_group(kbdev, group))
scheduler_group_check_protm_enter(kbdev, group);
@@ -7084,6 +7281,13 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
{
int priority;
int err;
+ struct kbase_device *kbdev = kctx->kbdev;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ err = gpu_metrics_ctx_init(kctx);
+ if (err)
+ return err;
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
kbase_ctx_sched_init_ctx(kctx);
@@ -7115,8 +7319,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx);
if (err) {
- dev_err(kctx->kbdev->dev,
- "Failed to register a sync update callback");
+ dev_err(kbdev->dev, "Failed to register a sync update callback");
goto event_wait_add_failed;
}
@@ -7126,6 +7329,9 @@ event_wait_add_failed:
kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
alloc_wq_failed:
kbase_ctx_sched_remove_ctx(kctx);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ gpu_metrics_ctx_term(kctx);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
return err;
}
@@ -7136,6 +7342,74 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
kbase_ctx_sched_remove_ctx(kctx);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ gpu_metrics_ctx_term(kctx);
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+}
+
+static int kbase_csf_scheduler_kthread(void *data)
+{
+ struct kbase_device *const kbdev = data;
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+
+ while (scheduler->kthread_running) {
+ struct kbase_queue *queue;
+
+ if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0)
+ continue;
+ reinit_completion(&scheduler->kthread_signal);
+
+ /* Iterate through queues with pending kicks */
+ do {
+ u8 prio;
+
+ spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+ queue = NULL;
+ for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) {
+ if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) {
+ queue = list_first_entry(
+ &kbdev->csf.pending_gpuq_kicks[prio],
+ struct kbase_queue, pending_kick_link);
+ list_del_init(&queue->pending_kick_link);
+ break;
+ }
+ }
+ spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+
+ if (queue != NULL) {
+ WARN_ONCE(
+ prio != queue->group_priority,
+ "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu",
+ (void *)queue, queue->group_priority, prio);
+
+ kbase_csf_process_queue_kick(queue);
+
+ /* Perform a scheduling tock for high-priority queue groups if
+ * required.
+ */
+ BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0);
+ BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1);
+ if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) &&
+ atomic_read(&scheduler->pending_tock_work))
+ schedule_on_tock(kbdev);
+ }
+ } while (queue != NULL);
+
+ /* Check if we need to perform a scheduling tick/tock. A tick
+ * event shall override a tock event but not vice-versa.
+ */
+ if (atomic_cmpxchg(&scheduler->pending_tick_work, true, false) == true) {
+ atomic_set(&scheduler->pending_tock_work, false);
+ schedule_on_tick(kbdev);
+ } else if (atomic_read(&scheduler->pending_tock_work)) {
+ schedule_on_tock(kbdev);
+ }
+
+ dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration.");
+ wake_up_all(&kbdev->csf.event_wait);
+ }
+
+ return 0;
}
int kbase_csf_scheduler_init(struct kbase_device *kbdev)
@@ -7154,33 +7428,51 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev)
return -ENOMEM;
}
+ init_completion(&scheduler->kthread_signal);
+ scheduler->kthread_running = true;
+ scheduler->gpuq_kthread =
+ kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread");
+ if (!scheduler->gpuq_kthread) {
+ kfree(scheduler->csg_slots);
+ scheduler->csg_slots = NULL;
+
+ dev_err(kbdev->dev, "Failed to spawn the GPU queue submission worker thread");
+ return -ENOMEM;
+ }
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) && !IS_ENABLED(CONFIG_MALI_NO_MALI)
+ scheduler->gpu_metrics_tb =
+ kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_GPU_METRICS_BUF_NAME);
+ if (!scheduler->gpu_metrics_tb) {
+ scheduler->kthread_running = false;
+ complete(&scheduler->kthread_signal);
+ kthread_stop(scheduler->gpuq_kthread);
+ scheduler->gpuq_kthread = NULL;
+
+ kfree(scheduler->csg_slots);
+ scheduler->csg_slots = NULL;
+
+ dev_err(kbdev->dev, "Failed to get the handler of gpu_metrics from trace buffer");
+ return -ENOENT;
+ }
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+
return kbase_csf_mcu_shared_regs_data_init(kbdev);
}
int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
{
- int err;
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- scheduler->timer_enabled = true;
+ atomic_set(&scheduler->timer_enabled, true);
- err = kbase_create_realtime_thread(kbdev, kthread_worker_fn, &scheduler->csf_worker,
- "csf_scheduler");
- if (err) {
- dev_err(kbdev->dev, "Failed to allocate scheduler kworker\n");
- return -ENOMEM;
- }
scheduler->idle_wq = alloc_ordered_workqueue(
"csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
if (!scheduler->idle_wq) {
- dev_err(kbdev->dev,
- "Failed to allocate GPU idle scheduler workqueue\n");
- kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker);
+ dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n");
return -ENOMEM;
}
- kthread_init_work(&scheduler->tick_work, schedule_on_tick);
- kthread_init_delayed_work(&scheduler->tock_work, schedule_on_tock);
+ atomic_set(&scheduler->pending_tick_work, false);
atomic_set(&scheduler->pending_tock_work, false);
INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
@@ -7223,7 +7515,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
scheduler->tick_timer.function = tick_timer_callback;
- scheduler->tick_timer_active = false;
kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
@@ -7232,6 +7523,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
void kbase_csf_scheduler_term(struct kbase_device *kbdev)
{
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ if (scheduler->gpuq_kthread) {
+ scheduler->kthread_running = false;
+ complete(&scheduler->kthread_signal);
+ kthread_stop(scheduler->gpuq_kthread);
+ }
+
if (kbdev->csf.scheduler.csg_slots) {
WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps));
/* The unload of Driver can take place only when all contexts have
@@ -7261,9 +7560,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
rt_mutex_unlock(&kbdev->csf.scheduler.lock);
cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work);
- cancel_tick_timer(kbdev);
- cancel_tick_work(&kbdev->csf.scheduler);
- cancel_tock_work(&kbdev->csf.scheduler);
kfree(kbdev->csf.scheduler.csg_slots);
kbdev->csf.scheduler.csg_slots = NULL;
}
@@ -7277,8 +7573,6 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
{
if (kbdev->csf.scheduler.idle_wq)
destroy_workqueue(kbdev->csf.scheduler.idle_wq);
- if (kbdev->csf.scheduler.csf_worker.task)
- kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker);
kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
}
@@ -7299,7 +7593,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->csf.scheduler.lock);
- if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev)))
+ if (unlikely(!kbase_csf_scheduler_timer_is_enabled(kbdev)))
return;
WARN_ON((scheduler->state != SCHED_INACTIVE) &&
@@ -7307,7 +7601,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev)
(scheduler->state != SCHED_SLEEPING));
if (scheduler->total_runnable_grps > 0) {
- enqueue_tick_work(kbdev);
+ kbase_csf_scheduler_invoke_tick(kbdev);
dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n");
} else if (scheduler->state != SCHED_SUSPENDED) {
enqueue_gpu_idle_work(scheduler, 0);
@@ -7321,43 +7615,24 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev)
rt_mutex_unlock(&kbdev->csf.scheduler.lock);
}
-bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
-{
- struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- bool enabled;
-
- rt_mutex_lock(&scheduler->lock);
- enabled = scheduler_timer_is_enabled_nolock(kbdev);
- rt_mutex_unlock(&scheduler->lock);
-
- return enabled;
-}
-
void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev,
bool enable)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
bool currently_enabled;
+ /* This lock is taken to prevent this code being executed concurrently
+ * by userspace.
+ */
rt_mutex_lock(&scheduler->lock);
- currently_enabled = scheduler_timer_is_enabled_nolock(kbdev);
+ currently_enabled = kbase_csf_scheduler_timer_is_enabled(kbdev);
if (currently_enabled && !enable) {
- scheduler->timer_enabled = false;
- cancel_tick_timer(kbdev);
- rt_mutex_unlock(&scheduler->lock);
- /* The non-sync version to cancel the normal work item is not
- * available, so need to drop the lock before cancellation.
- */
+ atomic_set(&scheduler->timer_enabled, false);
cancel_tick_work(scheduler);
- cancel_tock_work(scheduler);
- return;
- }
-
- if (!currently_enabled && enable) {
- scheduler->timer_enabled = true;
-
- scheduler_enable_tick_timer_nolock(kbdev);
+ } else if (!currently_enabled && enable) {
+ atomic_set(&scheduler->timer_enabled, true);
+ kbase_csf_scheduler_invoke_tick(kbdev);
}
rt_mutex_unlock(&scheduler->lock);
@@ -7367,17 +7642,17 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- rt_mutex_lock(&scheduler->lock);
+ if (unlikely(kbase_csf_scheduler_timer_is_enabled(kbdev)))
+ return;
- if (unlikely(scheduler_timer_is_enabled_nolock(kbdev)))
- goto out;
+ /* This lock is taken to prevent this code being executed concurrently
+ * by userspace.
+ */
+ rt_mutex_lock(&scheduler->lock);
- if (scheduler->total_runnable_grps > 0) {
- enqueue_tick_work(kbdev);
- dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
- }
+ kbase_csf_scheduler_invoke_tick(kbdev);
+ dev_dbg(kbdev->dev, "Kicking the scheduler manually\n");
-out:
rt_mutex_unlock(&scheduler->lock);
}
@@ -7414,7 +7689,7 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev)
} else {
dev_dbg(kbdev->dev, "Scheduler PM suspend");
scheduler_suspend(kbdev);
- cancel_tick_timer(kbdev);
+ cancel_tick_work(scheduler);
}
}
@@ -7492,7 +7767,7 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev)
}
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle);
-int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
+static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_wait)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
unsigned long flags;
@@ -7505,9 +7780,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
kbase_pm_unlock(kbdev);
- kbase_pm_wait_for_poweroff_work_complete(kbdev);
+ if (killable_wait)
+ err = kbase_pm_killable_wait_for_poweroff_work_complete(kbdev);
+ else
+ err = kbase_pm_wait_for_poweroff_work_complete(kbdev);
+ if (err)
+ return err;
- err = kbase_pm_wait_for_desired_state(kbdev);
+ if (killable_wait)
+ err = kbase_pm_killable_wait_for_desired_state(kbdev);
+ else
+ err = kbase_pm_wait_for_desired_state(kbdev);
if (!err) {
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON);
@@ -7516,6 +7799,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
return err;
}
+
+int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev)
+{
+ return scheduler_wait_mcu_active(kbdev, true);
+}
+
+int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev)
+{
+ return scheduler_wait_mcu_active(kbdev, false);
+}
+
KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active);
#ifdef KBASE_PM_RUNTIME
@@ -7594,8 +7888,7 @@ void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev)
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
rt_mutex_lock(&scheduler->lock);
- if (kbase_pm_gpu_sleep_allowed(kbdev) &&
- (scheduler->state == SCHED_INACTIVE))
+ if (kbase_pm_gpu_sleep_allowed(kbdev) && (scheduler->state == SCHED_INACTIVE))
scheduler_sleep_on_idle(kbdev);
rt_mutex_unlock(&scheduler->lock);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index 4062d78..88521f0 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -338,7 +338,10 @@ kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev)
*
* Return: true if the scheduler is configured to wake up periodically
*/
-bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev);
+static inline bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev)
+{
+ return atomic_read(&kbdev->csf.scheduler.timer_enabled);
+}
/**
* kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic
@@ -412,6 +415,22 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev);
int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev);
/**
+ * kbase_csf_scheduler_killable_wait_mcu_active - Wait for the MCU to actually become
+ * active in killable state.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ *
+ * This function is same as kbase_csf_scheduler_wait_mcu_active(), expect that
+ * it would allow the SIGKILL signal to interrupt the wait.
+ * This function is supposed to be called from the code that is executed in ioctl or
+ * Userspace context, wherever it is safe to do so.
+ *
+ * Return: 0 if the MCU was successfully activated, or -ETIMEDOUT code on timeout error or
+ * -ERESTARTSYS if the wait was interrupted.
+ */
+int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev);
+
+/**
* kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
@@ -474,69 +493,24 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
}
/**
- * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick
- *
- * @kbdev: Pointer to the device
- *
- * This function advances the scheduling tick by enqueing the tick work item for
- * immediate execution, but only if the tick hrtimer is active. If the timer
- * is inactive then the tick work item is already in flight.
- * The caller must hold the interrupt lock.
- */
-static inline void
-kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
-
- lockdep_assert_held(&scheduler->interrupt_lock);
-
- if (scheduler->tick_timer_active) {
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u);
- scheduler->tick_timer_active = false;
- kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work);
- } else {
- KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u);
- }
-}
-
-/**
- * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick
- *
- * @kbdev: Pointer to the device
- *
- * This function advances the scheduling tick by enqueing the tick work item for
- * immediate execution, but only if the tick hrtimer is active. If the timer
- * is inactive then the tick work item is already in flight.
- */
-static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev)
-{
- struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- unsigned long flags;
-
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- kbase_csf_scheduler_tick_advance_nolock(kbdev);
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
-}
-
-/**
* kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick
*
* @kbdev: Pointer to the device
*
- * This function will queue the scheduling tick work item for immediate
- * execution if tick timer is not active. This can be called from interrupt
- * context to resume the scheduling after GPU was put to sleep.
+ * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling
+ * tick regardless of whether the tick timer is enabled. This can be called
+ * from interrupt context to resume the scheduling after GPU was put to sleep.
+ *
+ * Caller is expected to check kbase_csf_scheduler.timer_enabled as required
+ * to see whether it is appropriate before calling this function.
*/
static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
- unsigned long flags;
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u);
- spin_lock_irqsave(&scheduler->interrupt_lock, flags);
- if (!scheduler->tick_timer_active)
- kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work);
- spin_unlock_irqrestore(&scheduler->interrupt_lock, flags);
+ if (atomic_cmpxchg(&scheduler->pending_tick_work, false, true) == false)
+ complete(&scheduler->kthread_signal);
}
/**
@@ -544,8 +518,11 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev)
*
* @kbdev: Pointer to the device
*
- * This function will queue the scheduling tock work item for immediate
- * execution.
+ * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling
+ * tock.
+ *
+ * Caller is expected to check kbase_csf_scheduler.timer_enabled as required
+ * to see whether it is appropriate before calling this function.
*/
static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
{
@@ -553,7 +530,7 @@ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev)
KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u);
if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false)
- kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0);
+ complete(&scheduler->kthread_signal);
}
/**
diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
index a5e0ab5..72c0b6f 100644
--- a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,49 +23,46 @@
#include "mali_kbase_csf_csg_debugfs.h"
#include <mali_kbase.h>
#include <linux/seq_file.h>
+#include <linux/version_compat_defs.h>
#if IS_ENABLED(CONFIG_SYNC_FILE)
#include "mali_kbase_sync.h"
#endif
-#if IS_ENABLED(CONFIG_DEBUG_FS)
-
#define CQS_UNREADABLE_LIVE_VALUE "(unavailable)"
-/* GPU queue related values */
-#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
-#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
-#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
-#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
-#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
-#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
-#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
-#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
-#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+#define CSF_SYNC_DUMP_SIZE 256
-#define MAX_NR_GPU_CALLS (5)
-#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
-#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
-#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
-#define MOVE_DEST_MASK ((u64)0xFF << 48)
-#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
-#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
-#define SYNC_SRC0_MASK ((u64)0xFF << 40)
-#define SYNC_SRC1_MASK ((u64)0xFF << 32)
-#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
-#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
-#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
-#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
-
-/* Enumeration for types of GPU queue sync events for
- * the purpose of dumping them through debugfs.
+/**
+ * kbasep_print() - Helper function to print to either debugfs file or dmesg.
+ *
+ * @kctx: The kbase context
+ * @file: The seq_file for printing to. This is NULL if printing to dmesg.
+ * @fmt: The message to print.
+ * @...: Arguments to format the message.
*/
-enum debugfs_gpu_sync_type {
- DEBUGFS_GPU_SYNC_WAIT,
- DEBUGFS_GPU_SYNC_SET,
- DEBUGFS_GPU_SYNC_ADD,
- NUM_DEBUGFS_GPU_SYNC_TYPES
-};
+__attribute__((format(__printf__, 3, 4))) static void
+kbasep_print(struct kbase_context *kctx, struct seq_file *file, const char *fmt, ...)
+{
+ int len = 0;
+ char buffer[CSF_SYNC_DUMP_SIZE];
+ va_list arglist;
+
+ va_start(arglist, fmt);
+ len = vsnprintf(buffer, CSF_SYNC_DUMP_SIZE, fmt, arglist);
+ if (len <= 0) {
+ pr_err("message write to the buffer failed");
+ goto exit;
+ }
+
+ if (file)
+ seq_printf(file, buffer);
+ else
+ dev_warn(kctx->kbdev->dev, buffer);
+
+exit:
+ va_end(arglist);
+}
/**
* kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object.
@@ -120,11 +117,12 @@ static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 o
* or Fence Signal command, contained in a
* KCPU queue.
*
- * @file: The seq_file for printing to.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
* @cmd: The KCPU Command to be printed.
* @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT.
*/
-static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file,
+static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *length,
struct kbase_kcpu_command *cmd,
const char *cmd_name)
{
@@ -133,38 +131,46 @@ static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *fil
#else
struct dma_fence *fence = NULL;
#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */
-
+ struct kbase_kcpu_command_fence_info *fence_info;
struct kbase_sync_fence_info info;
const char *timeline_name = NULL;
bool is_signaled = false;
- fence = cmd->info.fence.fence;
+ fence_info = &cmd->info.fence;
+ if (kbase_kcpu_command_fence_has_force_signaled(fence_info))
+ return;
+
+ fence = kbase_fence_get(fence_info);
if (WARN_ON(!fence))
return;
- kbase_sync_fence_info_get(cmd->info.fence.fence, &info);
+ kbase_sync_fence_info_get(fence, &info);
timeline_name = fence->ops->get_timeline_name(fence);
is_signaled = info.status > 0;
- seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence,
- is_signaled);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled);
/* Note: fence->seqno was u32 until 5.1 kernel, then u64 */
- seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
- timeline_name, fence->context, (u64)fence->seqno);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx",
+ timeline_name, fence->context, (u64)fence->seqno);
+
+ kbase_fence_put(fence);
}
/**
* kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command,
* contained in a KCPU queue.
*
- * @file: The seq_file for printing to.
- * @cmd: The KCPU Command to be printed.
+ * @kctx: The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd: The KCPU Command to be printed.
*/
-static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
- struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char *buffer,
+ int *length, struct kbase_kcpu_command *cmd)
{
- struct kbase_context *kctx = file->private;
size_t i;
for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
@@ -174,14 +180,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
bool live_val_valid = (ret >= 0);
- seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+ *length +=
+ snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
if (live_val_valid)
- seq_printf(file, "0x%.16llx", (u64)live_val);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "0x%.16llx", (u64)live_val);
else
- seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ CQS_UNREADABLE_LIVE_VALUE);
- seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ " | op:gt arg_value:0x%.8x", cqs_obj->val);
}
}
@@ -189,13 +200,14 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file,
* kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS
* Set command, contained in a KCPU queue.
*
- * @file: The seq_file for printing to.
- * @cmd: The KCPU Command to be printed.
+ * @kctx: The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd: The KCPU Command to be printed.
*/
-static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
- struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char *buffer,
+ int *length, struct kbase_kcpu_command *cmd)
{
- struct kbase_context *kctx = file->private;
size_t i;
for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) {
@@ -205,14 +217,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file,
int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val);
bool live_val_valid = (ret >= 0);
- seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
+ *length +=
+ snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr);
if (live_val_valid)
- seq_printf(file, "0x%.16llx", (u64)live_val);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "0x%.16llx", (u64)live_val);
else
- seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ CQS_UNREADABLE_LIVE_VALUE);
- seq_printf(file, " | op:add arg_value:0x%.8x", 1);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ " | op:add arg_value:0x%.8x", 1);
}
}
@@ -271,14 +288,15 @@ static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op
* Wait Operation command, contained
* in a KCPU queue.
*
- * @file: The seq_file for printing to.
- * @cmd: The KCPU Command to be printed.
+ * @kctx: The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd: The KCPU Command to be printed.
*/
-static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
- struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, char *buffer,
+ int *length, struct kbase_kcpu_command *cmd)
{
size_t i;
- struct kbase_context *kctx = file->private;
for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) {
struct base_cqs_wait_operation_info *wait_op =
@@ -290,14 +308,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
bool live_val_valid = (ret >= 0);
- seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
+ *length +=
+ snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr);
if (live_val_valid)
- seq_printf(file, "0x%.16llx", live_val);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "0x%.16llx", live_val);
else
- seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ CQS_UNREADABLE_LIVE_VALUE);
- seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ " | op:%s arg_value:0x%.16llx", op_name, wait_op->val);
}
}
@@ -306,14 +329,15 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file,
* Set Operation command, contained
* in a KCPU queue.
*
- * @file: The seq_file for printing to.
- * @cmd: The KCPU Command to be printed.
+ * @kctx: The kbase context.
+ * @buffer: The buffer to write to.
+ * @length: The length of text in the buffer.
+ * @cmd: The KCPU Command to be printed.
*/
-static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
- struct kbase_kcpu_command *cmd)
+static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, char *buffer,
+ int *length, struct kbase_kcpu_command *cmd)
{
size_t i;
- struct kbase_context *kctx = file->private;
for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) {
struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i];
@@ -325,29 +349,35 @@ static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file,
bool live_val_valid = (ret >= 0);
- seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
+ *length +=
+ snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr);
if (live_val_valid)
- seq_printf(file, "0x%.16llx", live_val);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ "0x%.16llx", live_val);
else
- seq_puts(file, CQS_UNREADABLE_LIVE_VALUE);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ CQS_UNREADABLE_LIVE_VALUE);
- seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
+ *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length,
+ " | op:%s arg_value:0x%.16llx", op_name, set_op->val);
}
}
/**
* kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue
*
+ * @kctx: The kbase context.
* @file: The seq_file to print to.
* @queue: Pointer to the KCPU queue.
*/
-static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
+static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct kbase_context *kctx,
+ struct seq_file *file,
struct kbase_kcpu_command_queue *queue)
{
char started_or_pending;
struct kbase_kcpu_command *cmd;
- struct kbase_context *kctx = file->private;
size_t i;
if (WARN_ON(!queue))
@@ -357,72 +387,115 @@ static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file,
mutex_lock(&queue->lock);
for (i = 0; i != queue->num_pending_cmds; ++i) {
+ char buffer[CSF_SYNC_DUMP_SIZE];
+ int length = 0;
started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P';
- seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id,
- started_or_pending);
+ length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ",
+ kctx->id, queue->id, started_or_pending);
- cmd = &queue->commands[queue->start_offset + i];
+ cmd = &queue->commands[(u8)(queue->start_offset + i)];
switch (cmd->type) {
#if IS_ENABLED(CONFIG_SYNC_FILE)
case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL:
- kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL");
+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd,
+ "FENCE_SIGNAL");
break;
case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT:
- kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT");
+ kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd,
+ "FENCE_WAIT");
break;
#endif
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT:
- kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd);
+ kbasep_csf_sync_print_kcpu_cqs_wait(kctx, buffer, &length, cmd);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_SET:
- kbasep_csf_sync_print_kcpu_cqs_set(file, cmd);
+ kbasep_csf_sync_print_kcpu_cqs_set(kctx, buffer, &length, cmd);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION:
- kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd);
+ kbasep_csf_sync_print_kcpu_cqs_wait_op(kctx, buffer, &length, cmd);
break;
case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION:
- kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd);
+ kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd);
break;
default:
- seq_puts(file, ", U, Unknown blocking command");
+ length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length,
+ ", U, Unknown blocking command");
break;
}
- seq_puts(file, "\n");
+ length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n");
+ kbasep_print(kctx, file, buffer);
}
mutex_unlock(&queue->lock);
}
-/**
- * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info
- *
- * @file: The seq_file for printing to.
- *
- * Return: Negative error code or 0 on success.
- */
-static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file)
+int kbasep_csf_sync_kcpu_dump_locked(struct kbase_context *kctx, struct seq_file *file)
{
- struct kbase_context *kctx = file->private;
unsigned long queue_idx;
- mutex_lock(&kctx->csf.kcpu_queues.lock);
- seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id);
+ lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+
+ kbasep_print(kctx, file, "KCPU queues for ctx %d:\n", kctx->id);
queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES);
while (queue_idx < KBASEP_MAX_KCPU_QUEUES) {
- kbasep_csf_sync_kcpu_debugfs_print_queue(file,
+ kbasep_csf_sync_kcpu_debugfs_print_queue(kctx, file,
kctx->csf.kcpu_queues.array[queue_idx]);
queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES,
queue_idx + 1);
}
+ return 0;
+}
+
+int kbasep_csf_sync_kcpu_dump(struct kbase_context *kctx, struct seq_file *file)
+{
+ mutex_lock(&kctx->csf.kcpu_queues.lock);
+ kbasep_csf_sync_kcpu_dump_locked(kctx, file);
mutex_unlock(&kctx->csf.kcpu_queues.lock);
return 0;
}
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+/* GPU queue related values */
+#define GPU_CSF_MOVE_OPCODE ((u64)0x1)
+#define GPU_CSF_MOVE32_OPCODE ((u64)0x2)
+#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25)
+#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26)
+#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27)
+#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33)
+#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34)
+#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35)
+#define GPU_CSF_CALL_OPCODE ((u64)0x20)
+
+#define MAX_NR_GPU_CALLS (5)
+#define INSTR_OPCODE_MASK ((u64)0xFF << 56)
+#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56)
+#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL)
+#define MOVE_DEST_MASK ((u64)0xFF << 48)
+#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48)
+#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL)
+#define SYNC_SRC0_MASK ((u64)0xFF << 40)
+#define SYNC_SRC1_MASK ((u64)0xFF << 32)
+#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40)
+#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32)
+#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28)
+#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28)
+
+/* Enumeration for types of GPU queue sync events for
+ * the purpose of dumping them through debugfs.
+ */
+enum debugfs_gpu_sync_type {
+ DEBUGFS_GPU_SYNC_WAIT,
+ DEBUGFS_GPU_SYNC_SET,
+ DEBUGFS_GPU_SYNC_ADD,
+ NUM_DEBUGFS_GPU_SYNC_TYPES
+};
+
/**
* kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations
* from a MOVE instruction.
@@ -476,10 +549,21 @@ static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringb
u64 page_off = ringbuff_offset >> PAGE_SHIFT;
u64 offset_within_page = ringbuff_offset & ~PAGE_MASK;
struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]);
- u64 *ringbuffer = kmap_atomic(page);
- u64 value = ringbuffer[offset_within_page / sizeof(u64)];
+ u64 *ringbuffer = vmap(&page, 1, VM_MAP, pgprot_noncached(PAGE_KERNEL));
+ u64 value;
+
+ if (!ringbuffer) {
+ struct kbase_context *kctx = queue->kctx;
+
+ dev_err(kctx->kbdev->dev, "%s failed to map the buffer page for read a command!",
+ __func__);
+ /* Return an alternative 0 for dumpping operation*/
+ value = 0;
+ } else {
+ value = ringbuffer[offset_within_page / sizeof(u64)];
+ vunmap(ringbuffer);
+ }
- kunmap_atomic(ringbuffer);
return value;
}
@@ -559,24 +643,25 @@ static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_con
return;
/* 5. Print info */
- seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle,
- queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P',
- gpu_sync_type_name[type]);
+ kbasep_print(kctx, file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id,
+ queue->group->handle, queue->csi_index,
+ queue->enabled && !follows_wait ? 'S' : 'P', gpu_sync_type_name[type]);
if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID)
- seq_puts(file, "slot:-");
+ kbasep_print(kctx, file, "slot:-");
else
- seq_printf(file, "slot:%d", (int)queue->group->csg_nr);
+ kbasep_print(kctx, file, "slot:%d", (int)queue->group->csg_nr);
- seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
+ kbasep_print(kctx, file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val);
if (type == DEBUGFS_GPU_SYNC_WAIT) {
wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd);
- seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition));
+ kbasep_print(kctx, file, "op:%s ",
+ kbasep_csf_sync_get_wait_op_name(wait_condition));
} else
- seq_printf(file, "op:%s ", gpu_sync_type_op[type]);
+ kbasep_print(kctx, file, "op:%s ", gpu_sync_type_op[type]);
- seq_printf(file, "arg_value:0x%.16llx\n", compare_val);
+ kbasep_print(kctx, file, "arg_value:0x%.16llx\n", compare_val);
}
/**
@@ -595,7 +680,7 @@ static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_con
static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue)
{
struct kbase_context *kctx;
- u32 *addr;
+ u64 *addr;
u64 cs_extract, cs_insert, instr, cursor;
bool follows_wait = false;
int nr_calls = 0;
@@ -605,11 +690,11 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
kctx = queue->kctx;
- addr = (u32 *)queue->user_io_addr;
- cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32);
+ addr = queue->user_io_addr;
+ cs_insert = addr[CS_INSERT_LO / sizeof(*addr)];
- addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
- cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32);
+ addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr);
+ cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)];
cursor = cs_extract;
@@ -637,6 +722,7 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
case GPU_CSF_SYNC_SET64_OPCODE:
case GPU_CSF_SYNC_WAIT64_OPCODE:
instr_is_64_bit = true;
+ break;
default:
break;
}
@@ -663,7 +749,7 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
break;
case GPU_CSF_CALL_OPCODE:
nr_calls++;
- /* Fallthrough */
+ break;
default:
/* Unrecognized command, skip past it */
break;
@@ -677,36 +763,37 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct
* kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of
* the provided queue group.
*
+ * @kctx: The kbase context
* @file: seq_file for printing to.
* @group: Address of a GPU command group to iterate through.
*
* This function will iterate through each queue in the provided GPU queue group and
* print its SYNC related commands.
*/
-static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file,
+static void kbasep_csf_dump_active_group_sync_state(struct kbase_context *kctx,
+ struct seq_file *file,
struct kbase_queue_group *const group)
{
- struct kbase_context *kctx = file->private;
unsigned int i;
- seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
- group->csg_nr, kctx->tgid, kctx->id);
+ kbasep_print(kctx, file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle,
+ group->csg_nr, kctx->tgid, kctx->id);
for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++)
kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]);
}
/**
- * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info
+ * kbasep_csf_sync_gpu_dump() - Print CSF GPU queue sync info
*
+ * @kctx: The kbase context
* @file: The seq_file for printing to.
*
* Return: Negative error code or 0 on success.
*/
-static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
+static int kbasep_csf_sync_gpu_dump(struct kbase_context *kctx, struct seq_file *file)
{
u32 gr;
- struct kbase_context *kctx = file->private;
struct kbase_device *kbdev;
if (WARN_ON(!kctx))
@@ -721,7 +808,7 @@ static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
kbdev->csf.scheduler.csg_slots[gr].resident_group;
if (!group || group->kctx != kctx)
continue;
- kbasep_csf_dump_active_group_sync_state(file, group);
+ kbasep_csf_dump_active_group_sync_state(kctx, file, group);
}
kbase_csf_scheduler_unlock(kbdev);
@@ -738,10 +825,13 @@ static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file)
*/
static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data)
{
- seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION);
+ struct kbase_context *kctx = file->private;
+
+ kbasep_print(kctx, file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n",
+ MALI_CSF_SYNC_DEBUGFS_VERSION);
- kbasep_csf_sync_kcpu_debugfs_show(file);
- kbasep_csf_sync_gpu_debugfs_show(file);
+ kbasep_csf_sync_kcpu_dump(kctx, file);
+ kbasep_csf_sync_gpu_dump(kctx, file);
return 0;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
index 177e15d..2fe5060 100644
--- a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
+++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
#ifndef _KBASE_CSF_SYNC_DEBUGFS_H_
#define _KBASE_CSF_SYNC_DEBUGFS_H_
+#include <linux/seq_file.h>
+
/* Forward declaration */
struct kbase_context;
@@ -34,4 +36,27 @@ struct kbase_context;
*/
void kbase_csf_sync_debugfs_init(struct kbase_context *kctx);
+/**
+ * kbasep_csf_sync_kcpu_dump() - Print CSF KCPU queue sync info
+ *
+ * @kctx: The kbase context.
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ *
+ * Note: This function should not be used if kcpu_queues.lock is held. Use
+ * kbasep_csf_sync_kcpu_dump_locked() instead.
+ */
+int kbasep_csf_sync_kcpu_dump(struct kbase_context *kctx, struct seq_file *file);
+
+/**
+ * kbasep_csf_sync_kcpu_dump() - Print CSF KCPU queue sync info
+ *
+ * @kctx: The kbase context.
+ * @file: The seq_file for printing to.
+ *
+ * Return: Negative error code or 0 on success.
+ */
+int kbasep_csf_sync_kcpu_dump_locked(struct kbase_context *kctx, struct seq_file *file);
+
#endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
index 8072a8b..85d8018 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c
@@ -362,7 +362,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *
/* If page migration is enabled, we don't want to migrate tiler heap pages.
* This does not change if the constituent pages are already marked as isolated.
*/
- if (kbase_page_migration_enabled)
+ if (kbase_is_page_migration_enabled())
kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
return chunk;
@@ -748,7 +748,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_
KBASE_REG_CPU_RD, &heap->buf_desc_map,
KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
- if (kbase_page_migration_enabled)
+ if (kbase_is_page_migration_enabled())
kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
kbase_gpu_vm_unlock(kctx);
diff --git a/mali_kbase/csf/mali_kbase_csf_timeout.c b/mali_kbase/csf/mali_kbase_csf_timeout.c
index ea6c116..f7fcbb1 100644
--- a/mali_kbase/csf/mali_kbase_csf_timeout.c
+++ b/mali_kbase/csf/mali_kbase_csf_timeout.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,7 @@ static int set_timeout(struct kbase_device *const kbdev, u64 const timeout)
dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout);
atomic64_set(&kbdev->csf.progress_timeout, timeout);
+ kbase_device_set_timeout(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, timeout, 1);
return 0;
}
@@ -100,7 +101,7 @@ static ssize_t progress_timeout_store(struct device * const dev,
if (!err) {
kbase_csf_scheduler_pm_active(kbdev);
- err = kbase_csf_scheduler_wait_mcu_active(kbdev);
+ err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev);
if (!err)
err = kbase_csf_firmware_set_timeout(kbdev, timeout);
@@ -147,8 +148,14 @@ int kbase_csf_timeout_init(struct kbase_device *const kbdev)
int err;
#if IS_ENABLED(CONFIG_OF)
- err = of_property_read_u64(kbdev->dev->of_node,
- "progress_timeout", &timeout);
+ /* Read "progress-timeout" property and fallback to "progress_timeout"
+ * if not found.
+ */
+ err = of_property_read_u64(kbdev->dev->of_node, "progress-timeout", &timeout);
+
+ if (err == -EINVAL)
+ err = of_property_read_u64(kbdev->dev->of_node, "progress_timeout", &timeout);
+
if (!err)
dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n",
timeout);
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 910ba22..ce50683 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -39,8 +39,6 @@
#include <linux/version_compat_defs.h>
#endif
-/* Name of the CSFFW timeline tracebuffer. */
-#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline"
/* Name of the timeline header metatadata */
#define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header"
@@ -299,16 +297,13 @@ static int tl_reader_init_late(
if (self->kbdev)
return 0;
- tb = kbase_csf_firmware_get_trace_buffer(
- kbdev, KBASE_CSFFW_TRACEBUFFER_NAME);
+ tb = kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_TIMELINE_BUF_NAME);
hdr = kbase_csf_firmware_get_timeline_metadata(
kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size);
if (!tb) {
- dev_warn(
- kbdev->dev,
- "'%s' tracebuffer is not present in the firmware image.",
- KBASE_CSFFW_TRACEBUFFER_NAME);
+ dev_warn(kbdev->dev, "'%s' tracebuffer is not present in the firmware image.",
+ KBASE_CSFFW_TIMELINE_BUF_NAME);
return -1;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index 9ce6776..2b63f19 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -89,7 +89,7 @@ struct firmware_trace_buffer {
} cpu_va;
u32 num_pages;
u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX];
- char name[1]; /* this field must be last */
+ char name[]; /* this field must be last */
};
/**
@@ -118,16 +118,19 @@ struct firmware_trace_buffer_data {
*/
static const struct firmware_trace_buffer_data trace_buffer_data[] = {
#if MALI_UNIT_TEST
- { "fwutf", { 0 }, 1 },
+ { KBASE_CSFFW_UTF_BUF_NAME, { 0 }, 1 },
#endif
#ifdef CONFIG_MALI_PIXEL_GPU_SSCD
/* Enable all the logs */
- { FIRMWARE_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES },
+ { KBASE_CSFFW_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES },
#else
- { FIRMWARE_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES },
+ { KBASE_CSFFW_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES },
#endif /* CONFIG_MALI_PIXEL_GPU_SSCD */
- { "benchmark", { 0 }, 2 },
- { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
+ { KBASE_CSFFW_BENCHMARK_BUF_NAME, { 0 }, 2 },
+ { KBASE_CSFFW_TIMELINE_BUF_NAME, { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES },
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ { KBASE_CSFFW_GPU_METRICS_BUF_NAME, { 0 }, 8 },
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
};
int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev)
@@ -265,7 +268,7 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev,
* trace buffer name (with NULL termination).
*/
trace_buffer =
- kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL);
+ kmalloc(struct_size(trace_buffer, name, name_len + 1), GFP_KERNEL);
if (!trace_buffer)
return -ENOMEM;
@@ -512,6 +515,37 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(
}
EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data);
+void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer)
+{
+ unsigned int bytes_discarded;
+ u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT;
+ u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va);
+ u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va);
+ unsigned int trace_size;
+
+ if (insert_offset >= extract_offset) {
+ trace_size = insert_offset - extract_offset;
+ if (trace_size > buffer_size / 2) {
+ bytes_discarded = trace_size - buffer_size / 2;
+ extract_offset += bytes_discarded;
+ *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset;
+ }
+ } else {
+ unsigned int bytes_tail;
+
+ bytes_tail = buffer_size - extract_offset;
+ trace_size = bytes_tail + insert_offset;
+ if (trace_size > buffer_size / 2) {
+ bytes_discarded = trace_size - buffer_size / 2;
+ extract_offset += bytes_discarded;
+ if (extract_offset >= buffer_size)
+ extract_offset = extract_offset - buffer_size;
+ *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset;
+ }
+ }
+}
+EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard);
+
static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
{
unsigned int i;
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
index 037dc22..c0a42ca 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,8 +25,16 @@
#include <linux/types.h>
#define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4)
-#define FIRMWARE_LOG_BUF_NAME "fwlog"
#define FW_TRACE_BUF_NR_PAGES 4
+#if MALI_UNIT_TEST
+#define KBASE_CSFFW_UTF_BUF_NAME "fwutf"
+#endif
+#define KBASE_CSFFW_LOG_BUF_NAME "fwlog"
+#define KBASE_CSFFW_BENCHMARK_BUF_NAME "benchmark"
+#define KBASE_CSFFW_TIMELINE_BUF_NAME "timeline"
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#define KBASE_CSFFW_GPU_METRICS_BUF_NAME "gpu_metrics"
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
/* Forward declarations */
struct firmware_trace_buffer;
@@ -117,7 +125,8 @@ struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer(
struct kbase_device *kbdev, const char *name);
/**
- * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer
+ * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits
+ * for a trace buffer
*
* @trace_buffer: Trace buffer handle
*
@@ -167,6 +176,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(
struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes);
/**
+ * kbase_csf_firmware_trace_buffer_discard - Discard data from a trace buffer
+ *
+ * @trace_buffer: Trace buffer handle
+ *
+ * Discard part of the data in the trace buffer to reduce its utilization to half of its size.
+ */
+void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer);
+
+/**
* kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask
*
* @tb: Trace buffer handle
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 492684f..571761f 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -34,13 +34,16 @@
#include <mali_kbase.h>
#include <backend/gpu/mali_kbase_irq_internal.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
-#include <backend/gpu/mali_kbase_js_internal.h>
#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
#include <csf/mali_kbase_csf_csg_debugfs.h>
+#include <csf/mali_kbase_csf_kcpu_fence_debugfs.h>
#include <hwcnt/mali_kbase_hwcnt_virtualizer.h>
#include <mali_kbase_kinstr_prfcnt.h>
#include <mali_kbase_vinstr.h>
#include <tl/mali_kbase_timeline.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#endif
/**
* kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC
@@ -84,10 +87,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_pm_powerup;
- err = kbase_backend_timer_init(kbdev);
- if (err)
- goto fail_timer;
-
#ifdef CONFIG_MALI_DEBUG
#if IS_ENABLED(CONFIG_MALI_REAL_HW)
if (kbasep_common_test_interrupt_handlers(kbdev) != 0) {
@@ -123,10 +122,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
- err = kbase_backend_time_init(kbdev);
- if (err)
- goto fail_update_l2_features;
-
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
kbase_pm_context_idle(kbdev);
@@ -148,8 +143,6 @@ fail_interrupt_test:
#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
- kbase_backend_timer_term(kbdev);
-fail_timer:
kbase_pm_context_idle(kbdev);
kbase_hwaccess_pm_halt(kbdev);
fail_pm_powerup:
@@ -285,20 +278,21 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev)
static const struct kbase_device_init dev_init[] = {
#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
- { kbase_gpu_device_create, kbase_gpu_device_destroy,
- "Dummy model initialization failed" },
+ { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ assign_irqs, NULL, "IRQ search failed" },
#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
+#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
{ power_control_init, power_control_term, "Power control initialization failed" },
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
- { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
- { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
+ { kbase_backend_time_init, NULL, "Time backend initialization failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -330,6 +324,8 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_debug_csf_fault_init, kbase_debug_csf_fault_term,
"CSF fault debug initialization failed" },
{ kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" },
+ { kbase_csf_fence_timer_debugfs_init, kbase_csf_fence_timer_debugfs_term,
+ "Fence timeout DebugFS initialization failed" },
/* Sysfs init needs to happen before registering the device with
* misc_register(), otherwise it causes a race condition between
* registering the device and a uevent event being generated for
@@ -522,4 +518,4 @@ out:
return ret;
}
-KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once);
+KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once); \ No newline at end of file
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index 5e27094..c837f5a 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev)
{
const u32 status = kbase_reg_read(kbdev,
GPU_CONTROL_REG(GPU_FAULTSTATUS));
- const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG;
+ const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK;
const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >>
GPU_FAULTSTATUS_JASID_SHIFT;
bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) ==
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
index 38223af..8f7b39b 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -124,9 +124,10 @@ KBASE_EXPORT_TEST_API(kbase_reg_write);
u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset)
{
- u32 val;
+ u32 val = 0;
- WARN_ON(!kbdev->pm.backend.gpu_powered);
+ if (WARN_ON(!kbdev->pm.backend.gpu_powered))
+ return val;
val = readl(kbdev->reg + offset);
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 14b5602..89635b5 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -45,6 +45,9 @@
#include <backend/gpu/mali_kbase_pm_internal.h>
#include <mali_kbase_dummy_job_wa.h>
#include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+#endif
/**
* kbase_backend_late_init - Perform any backend-specific initialization.
@@ -102,10 +105,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev)
if (err)
goto fail_update_l2_features;
- err = kbase_backend_time_init(kbdev);
- if (err)
- goto fail_update_l2_features;
-
init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait);
/* Idle the GPU and/or cores, if the policy wants it to */
@@ -224,12 +223,14 @@ static const struct kbase_device_init dev_init[] = {
#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
+#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
{ kbase_device_io_history_init, kbase_device_io_history_term,
"Register access history initialization failed" },
{ kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" },
{ kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" },
- { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" },
- { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" },
+ { kbase_backend_time_init, NULL, "Time backend initialization failed" },
{ kbase_device_misc_init, kbase_device_misc_term,
"Miscellaneous device initialization failed" },
{ kbase_device_pcm_dev_init, kbase_device_pcm_dev_term,
@@ -363,4 +364,4 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev)
mutex_unlock(&kbdev->fw_load_lock);
return ret;
-}
+} \ No newline at end of file
diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c
index e90e791..e5b3e2b 100644
--- a/mali_kbase/device/mali_kbase_device.c
+++ b/mali_kbase/device/mali_kbase_device.c
@@ -230,11 +230,14 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
kbdev->cci_snoop_enabled = false;
np = kbdev->dev->of_node;
if (np != NULL) {
- if (of_property_read_u32(np, "snoop_enable_smc",
- &kbdev->snoop_enable_smc))
+ /* Read "-" versions of the properties and fallback to "_"
+ * if these are not found
+ */
+ if (of_property_read_u32(np, "snoop-enable-smc", &kbdev->snoop_enable_smc) &&
+ of_property_read_u32(np, "snoop_enable_smc", &kbdev->snoop_enable_smc))
kbdev->snoop_enable_smc = 0;
- if (of_property_read_u32(np, "snoop_disable_smc",
- &kbdev->snoop_disable_smc))
+ if (of_property_read_u32(np, "snoop-disable-smc", &kbdev->snoop_disable_smc) &&
+ of_property_read_u32(np, "snoop_disable_smc", &kbdev->snoop_disable_smc))
kbdev->snoop_disable_smc = 0;
/* Either both or none of the calls should be provided. */
if (!((kbdev->snoop_disable_smc == 0
@@ -306,13 +309,13 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD;
#if MALI_USE_CSF
- kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT);
-#else
+ kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT);
+#else /* MALI_USE_CSF */
kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS;
-#endif /* MALI_USE_CSF */
+#endif /* !MALI_USE_CSF */
kbdev->mmu_mode = kbase_mmu_mode_get_aarch64();
- kbdev->mmu_as_inactive_wait_time_ms =
+ kbdev->mmu_or_gpu_cache_op_wait_time_ms =
kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT);
mutex_init(&kbdev->kctx_list_lock);
INIT_LIST_HEAD(&kbdev->kctx_list);
@@ -327,9 +330,13 @@ int kbase_device_misc_init(struct kbase_device * const kbdev)
kbdev->oom_notifier_block.notifier_call = NULL;
}
-#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
+#if MALI_USE_CSF
+#if IS_ENABLED(CONFIG_SYNC_FILE)
atomic_set(&kbdev->live_fence_metadata, 0);
+#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */
+ atomic_set(&kbdev->fence_signal_timeout_enabled, 1);
#endif
+
return 0;
term_as:
@@ -367,8 +374,7 @@ void kbase_device_free(struct kbase_device *kbdev)
void kbase_device_id_init(struct kbase_device *kbdev)
{
- scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name,
- kbase_dev_nr);
+ scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", KBASE_DRV_NAME, kbase_dev_nr);
kbdev->id = kbase_dev_nr;
}
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index f025011..e9cb5c2 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -191,6 +191,7 @@ void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev);
* called from paths (like GPU reset) where an indefinite wait for the
* completion of cache clean operation can cause deadlock, as the operation may
* never complete.
+ * If cache clean times out, reset GPU to recover.
*
* Return: 0 if successful or a negative error code on failure.
*/
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index 8b4588e..8126b9b 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,34 +27,47 @@
#include <mali_kbase_reset_gpu.h>
#include <mmu/mali_kbase_mmu.h>
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
bool kbase_is_gpu_removed(struct kbase_device *kbdev)
{
- u32 val;
+ if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT))
+ return false;
- val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID));
-
- return val == 0;
+ return (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)) == 0);
}
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
-static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
+/**
+ * busy_wait_cache_operation - Wait for a pending cache flush to complete
+ *
+ * @kbdev: Pointer of kbase device.
+ * @irq_bit: IRQ bit cache flush operation to wait on.
+ *
+ * It will reset GPU if the wait fails.
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+static int busy_wait_cache_operation(struct kbase_device *kbdev, u32 irq_bit)
{
- char *irq_flag_name;
- /* Previously MMU-AS command was used for L2 cache flush on page-table update.
- * And we're using the same max-loops count for GPU command, because amount of
- * L2 cache flush overhead are same between them.
- */
- unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS;
+ const ktime_t wait_loop_start = ktime_get_raw();
+ const u32 wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms;
+ bool completed = false;
+ s64 diff;
+
+ do {
+ unsigned int i;
+
+ for (i = 0; i < 1000; i++) {
+ if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit) {
+ completed = true;
+ break;
+ }
+ }
- /* Wait for the GPU cache clean operation to complete */
- while (--max_loops &&
- !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) {
- ;
- }
+ diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start));
+ } while ((diff < wait_time_ms) && !completed);
+
+ if (!completed) {
+ char *irq_flag_name;
- /* reset gpu if time-out occurred */
- if (max_loops == 0) {
switch (irq_bit) {
case CLEAN_CACHES_COMPLETED:
irq_flag_name = "CLEAN_CACHES_COMPLETED";
@@ -68,15 +81,15 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit)
}
dev_err(kbdev->dev,
- "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n",
+ "Stuck waiting on %s bit, might be due to unstable GPU clk/pwr or possible faulty FPGA connector\n",
irq_flag_name);
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
kbase_reset_gpu_locked(kbdev);
+
return -EBUSY;
}
- /* Clear the interrupt bit. */
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit);
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit);
@@ -110,7 +123,7 @@ int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phy
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
/* 3. Busy-wait irq status to be enabled. */
- ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED);
+ ret = busy_wait_cache_operation(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED);
return ret;
}
@@ -143,7 +156,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
irq_mask & ~CLEAN_CACHES_COMPLETED);
/* busy wait irq status to be enabled */
- ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
+ ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED);
if (ret)
return ret;
@@ -164,7 +177,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev,
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op);
/* 3. Busy-wait irq status to be enabled. */
- ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED);
+ ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED);
if (ret)
return ret;
@@ -271,8 +284,9 @@ static inline bool get_cache_clean_flag(struct kbase_device *kbdev)
void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev)
{
while (get_cache_clean_flag(kbdev)) {
- wait_event_interruptible(kbdev->cache_clean_wait,
- !kbdev->cache_clean_in_progress);
+ if (wait_event_interruptible(kbdev->cache_clean_wait,
+ !kbdev->cache_clean_in_progress))
+ dev_warn(kbdev->dev, "Wait for cache clean is interrupted");
}
}
@@ -280,6 +294,7 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev,
unsigned int wait_timeout_ms)
{
long remaining = msecs_to_jiffies(wait_timeout_ms);
+ int result = 0;
while (remaining && get_cache_clean_flag(kbdev)) {
remaining = wait_event_timeout(kbdev->cache_clean_wait,
@@ -287,5 +302,15 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev,
remaining);
}
- return (remaining ? 0 : -ETIMEDOUT);
+ if (!remaining) {
+ dev_err(kbdev->dev,
+ "Cache clean timed out. Might be caused by unstable GPU clk/pwr or faulty system");
+
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
+ kbase_reset_gpu_locked(kbdev);
+
+ result = -ETIMEDOUT;
+ }
+
+ return result;
}
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
index e7457dd..ab989e0 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,17 @@
#error "Cannot be compiled with JM"
#endif
+/* GPU control registers */
+#define MCU_CONTROL 0x700
+
+#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12)
+#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT)
+#define L2_CONFIG_PBHA_HWU_GET(reg_val) \
+ (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT)
+#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \
+ (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \
+ (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK))
+
/* GPU_CONTROL_MCU base address */
#define GPU_CONTROL_MCU_BASE 0x3000
@@ -35,35 +46,39 @@
#define MCU_SUBSYSTEM_BASE 0x20000
/* IPA control registers */
-#define COMMAND 0x000 /* (WO) Command register */
-#define TIMER 0x008 /* (RW) Timer control register */
-
-#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */
-#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */
-#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */
-#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */
-#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */
-#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */
-#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */
-#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */
+#define IPA_CONTROL_BASE 0x40000
+#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r))
+
+#define COMMAND 0x000 /* (WO) Command register */
+#define STATUS 0x004 /* (RO) Status register */
+#define TIMER 0x008 /* (RW) Timer control register */
+
+#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */
+#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */
+#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */
+#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */
+#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */
+#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */
+#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */
+#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */
/* Accumulated counter values for CS hardware */
-#define VALUE_CSHW_BASE 0x100
-#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+#define VALUE_CSHW_BASE 0x100
+#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
+#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
/* Accumulated counter values for memory system */
-#define VALUE_MEMSYS_BASE 0x140
-#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+#define VALUE_MEMSYS_BASE 0x140
+#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
+#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
-#define VALUE_TILER_BASE 0x180
-#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+#define VALUE_TILER_BASE 0x180
+#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
+#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
-#define VALUE_SHADER_BASE 0x1C0
-#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
-#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
+#define VALUE_SHADER_BASE 0x1C0
+#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */
+#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */
#define AS_STATUS_AS_ACTIVE_INT 0x2
@@ -112,7 +127,6 @@
/* GPU control registers */
#define CORE_FEATURES 0x008 /* () Shader Core Features */
-#define MCU_CONTROL 0x700
#define MCU_STATUS 0x704
#define MCU_CNTRL_ENABLE (1 << 0)
@@ -122,15 +136,7 @@
#define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31)
#define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT)
-#define MCU_STATUS_HALTED (1 << 1)
-
-#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12)
-#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT)
-#define L2_CONFIG_PBHA_HWU_GET(reg_val) \
- (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT)
-#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \
- (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \
- (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK))
+#define MCU_STATUS_HALTED (1 << 1)
/* JOB IRQ flags */
#define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */
@@ -292,13 +298,13 @@
#define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \
(0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT)
-#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10
-#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \
- (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10)
+#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK \
+ (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT)
-#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11
-#define GPU_FAULTSTATUS_JASID_VALID_FLAG \
- (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT)
+#define GPU_FAULTSTATUS_JASID_VALID_SHIFT GPU_U(11)
+#define GPU_FAULTSTATUS_JASID_VALID_MASK \
+ (GPU_U(0x1) << GPU_FAULTSTATUS_JASID_VALID_SHIFT)
#define GPU_FAULTSTATUS_JASID_SHIFT 12
#define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT)
diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
index f86f493..387cd50 100644
--- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
+++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -59,28 +59,27 @@
#define CORE_FEATURES 0x008 /* (RO) Shader Core Features */
#define JS_PRESENT 0x01C /* (RO) Job slots present */
-
-#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
- * region base address, low word
- */
-#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory
- * region base address, high word
- */
-#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter
- * configuration
- */
-#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable
- * flags for Job Manager
- */
-#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable
- * flags for shader cores
- */
-#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable
- * flags for tiler
- */
-#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable
- * flags for MMU/L2 cache
- */
+#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest
+ * clean-and-invalidate operation
+ */
+#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory
+ * region base address, low word
+ */
+#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory
+ * region base address, high word
+ */
+#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */
+#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable
+ * flags for Job Manager
+ */
+#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable
+ * flags for shader cores */
+#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable
+ * flags for tiler
+ */
+#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable
+ * flags for MMU/L2 cache
+ */
#define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */
#define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */
@@ -108,6 +107,8 @@
#define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */
#define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */
+#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */
+#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r))
#define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */
#define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */
#define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */
@@ -124,29 +125,41 @@
#define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */
#define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */
-#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/
-
-#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
-#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
-
-#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */
-
-#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */
+/* JM Job control register definitions for mali_kbase_debug_job_fault */
+#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */
+#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */
+#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */
+#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */
+#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */
+#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */
+#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */
+#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/
+#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */
+#define JS_STATUS 0x24 /* (RO) Status register for job slot n */
+#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */
+#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */
+#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */
+#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */
+#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */
+#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */
+#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */
+
+#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */
/* No JM-specific MMU control registers */
/* No JM-specific MMU address space control registers */
/* JS_COMMAND register commands */
-#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */
-#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */
-#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */
-#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */
-#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
-#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
-#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
-
-#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */
+#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */
+#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */
+#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */
+#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */
+#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */
+#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */
+#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */
+
+#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */
/* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */
#define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0)
diff --git a/mali_kbase/gpu/mali_kbase_gpu.c b/mali_kbase/gpu/mali_kbase_gpu.c
index 8a84ef5..eee670f 100644
--- a/mali_kbase/gpu/mali_kbase_gpu.c
+++ b/mali_kbase/gpu/mali_kbase_gpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,7 @@ const char *kbase_gpu_access_type_name(u32 fault_status)
return "READ";
case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
return "WRITE";
- case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+ case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE:
return "EXECUTE";
default:
WARN_ON(1);
diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
index e51791f..a92b498 100644
--- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h
+++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,6 +25,7 @@
#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h>
#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h>
#include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h>
+
#if MALI_USE_CSF
#include "backend/mali_kbase_gpu_regmap_csf.h"
#else
@@ -42,19 +43,29 @@
#define GPU_ULL(x) x##ull
#endif /* __ASSEMBLER__ */
+
/* Begin Register Offsets */
/* GPU control registers */
+#define GPU_CONTROL_BASE 0x0000
+#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r))
+
+#define GPU_ID 0x000 /* (RO) GPU and revision identifier */
#define L2_FEATURES 0x004 /* (RO) Level 2 cache features */
#define TILER_FEATURES 0x00C /* (RO) Tiler Features */
#define MEM_FEATURES 0x010 /* (RO) Memory system features */
#define MMU_FEATURES 0x014 /* (RO) MMU features */
#define AS_PRESENT 0x018 /* (RO) Address space slots present */
#define GPU_IRQ_RAWSTAT 0x020 /* (RW) */
+#define GPU_IRQ_CLEAR 0x024 /* (WO) */
#define GPU_IRQ_MASK 0x028 /* (RW) */
-
+#define GPU_IRQ_STATUS 0x02C /* (RO) */
#define GPU_COMMAND 0x030 /* (WO) */
+
#define GPU_STATUS 0x034 /* (RO) */
+#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
+#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */
+#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */
#define GPU_DBGEN (1 << 8) /* DBGEN wire status */
@@ -64,10 +75,9 @@
#define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */
-#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */
-#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core
- * supergroup are l2 coherent
- */
+/* Cores groups are l2 coherent */
+#define MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT GPU_U(0)
+#define MEM_FEATURES_COHERENT_CORE_GROUP_MASK (GPU_U(0x1) << MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT)
#define PWR_KEY 0x050 /* (WO) Power manager key register */
#define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */
@@ -95,10 +105,10 @@
#define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2))
-#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */
-#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */
-#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */
-#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */
+#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */
+#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */
+#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */
+#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */
#define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */
#define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */
@@ -109,14 +119,32 @@
#define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */
#define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */
+#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */
+#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */
+
+#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */
+#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */
+
+#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */
+#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */
+
+#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */
+#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */
+
+#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */
+
+#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */
+#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */
+
+#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */
+#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */
+
#define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */
#define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */
#define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */
#define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */
-#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */
-
#define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */
#define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */
@@ -173,10 +201,25 @@
/* Job control registers */
+#define JOB_CONTROL_BASE 0x1000
+#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r))
+
#define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */
+#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */
+#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */
+#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */
/* MMU control registers */
+#define MMU_CONTROL_BASE 0x2000
+#define MMU_CONTROL_REG(r) (MMU_CONTROL_BASE + (r))
+
+#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */
+#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */
+#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */
+#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */
+
+#define MMU_AS0 0x400 /* Configuration registers for address space 0 */
#define MMU_AS1 0x440 /* Configuration registers for address space 1 */
#define MMU_AS2 0x480 /* Configuration registers for address space 2 */
#define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */
@@ -194,17 +237,27 @@
#define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */
/* MMU address space control registers */
-#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
-#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
-#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
-#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
-#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
-#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
-
-/* (RO) Secondary fault address for address space n, low word */
-#define AS_FAULTEXTRA_LO 0x38
-/* (RO) Secondary fault address for address space n, high word */
-#define AS_FAULTEXTRA_HI 0x3C
+
+#define MMU_STAGE1 0x2000 /* () MMU control registers */
+#define MMU_STAGE1_REG(r) (MMU_STAGE1 + (r))
+
+#define MMU_AS_REG(n, r) (MMU_AS0 + ((n) << 6) + (r))
+
+#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */
+#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */
+#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */
+#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */
+#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */
+#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */
+#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */
+#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */
+#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */
+#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */
+#define AS_STATUS 0x28 /* (RO) Status flags for address space n */
+#define AS_TRANSCFG_LO 0x30 /* (RW) Translation table configuration for address space n, low word */
+#define AS_TRANSCFG_HI 0x34 /* (RW) Translation table configuration for address space n, high word */
+#define AS_FAULTEXTRA_LO 0x38 /* (RO) Secondary fault address for address space n, low word */
+#define AS_FAULTEXTRA_HI 0x3C /* (RO) Secondary fault address for address space n, high word */
/* End Register Offsets */
@@ -254,7 +307,7 @@
(((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT)
#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0)
-#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1)
+#define AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE (0x1)
#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2)
#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3)
@@ -336,11 +389,6 @@
(((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \
((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK))
-/* GPU_STATUS values */
-#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */
-#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */
-#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */
-
/* PRFCNT_CONFIG register values */
#define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */
#define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */
@@ -452,16 +500,6 @@
(((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \
(((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \
AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK))
-#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6)
-#define AMBA_FEATURES_INVALIDATE_HINT_MASK \
- (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
-#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \
- (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \
- AMBA_FEATURES_INVALIDATE_HINT_SHIFT)
-#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \
- (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \
- (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \
- AMBA_FEATURES_INVALIDATE_HINT_MASK))
/* AMBA_ENABLE register */
#define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0)
@@ -489,16 +527,6 @@
(((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \
(((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \
AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK))
-#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6)
-#define AMBA_ENABLE_INVALIDATE_HINT_MASK \
- (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
-#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \
- (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \
- AMBA_ENABLE_INVALIDATE_HINT_SHIFT)
-#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \
- (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \
- (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \
- AMBA_ENABLE_INVALIDATE_HINT_MASK))
/* IDVS_GROUP register */
#define IDVS_GROUP_SIZE_SHIFT (16)
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index 9a409f6..c8cf934 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -347,7 +347,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc(
/* Update MMU table */
ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys,
num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW,
- mmu_sync_info, NULL, false);
+ mmu_sync_info, NULL);
if (ret)
goto mmu_insert_failed;
@@ -480,10 +480,10 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
if (fw_ring_buf->phys) {
u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START;
- WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu,
- gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys,
- fw_ring_buf->num_pages, fw_ring_buf->num_pages,
- MCU_AS_NR, true));
+ WARN_ON(kbase_mmu_teardown_firmware_pages(
+ fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT,
+ fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages,
+ MCU_AS_NR));
vunmap(fw_ring_buf->cpu_dump_base);
diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h
index 639b35f..e694f9f 100644
--- a/mali_kbase/jm/mali_kbase_jm_defs.h
+++ b/mali_kbase/jm/mali_kbase_jm_defs.h
@@ -140,15 +140,17 @@
* @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT
* to be updated on HW side so a Job Slot is
* considered free.
- * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in
- * the enum.
+ * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors.
+ * @KBASE_DEFAULT_TIMEOUT: Fallthrough in case an invalid timeout is
+ * passed.
*/
enum kbase_timeout_selector {
MMU_AS_INACTIVE_WAIT_TIMEOUT,
JM_DEFAULT_JS_FREE_TIMEOUT,
/* Must be the last in the enum */
- KBASE_TIMEOUT_SELECTOR_COUNT
+ KBASE_TIMEOUT_SELECTOR_COUNT,
+ KBASE_DEFAULT_TIMEOUT = JM_DEFAULT_JS_FREE_TIMEOUT
};
#if IS_ENABLED(CONFIG_DEBUG_FS)
@@ -862,10 +864,6 @@ struct jsctx_queue {
* @pf_data: Data relating to Page fault.
* @bf_data: Data relating to Bus fault.
* @current_setup: Stores the MMU configuration for this address space.
- * @is_unresponsive: Flag to indicate MMU is not responding.
- * Set if a MMU command isn't completed within
- * &kbase_device:mmu_as_inactive_wait_time_ms.
- * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
*/
struct kbase_as {
int number;
@@ -875,7 +873,6 @@ struct kbase_as {
struct kbase_fault pf_data;
struct kbase_fault bf_data;
struct kbase_mmu_setup current_setup;
- bool is_unresponsive;
};
#endif /* _KBASE_JM_DEFS_H_ */
diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h
index 5023eaa..009ff02 100644
--- a/mali_kbase/jm/mali_kbase_js_defs.h
+++ b/mali_kbase/jm/mali_kbase_js_defs.h
@@ -342,6 +342,30 @@ struct kbasep_js_device_data {
* * the kbasep_js_kctx_info::runpool substructure
*/
struct mutex runpool_mutex;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ /**
+ * @gpu_metrics_timer: High-resolution timer used to periodically emit the GPU metrics
+ * tracepoints for applications that are using the GPU. The timer is
+ * needed for the long duration handling so that the length of work
+ * period is within the allowed limit.
+ */
+ struct hrtimer gpu_metrics_timer;
+
+ /**
+ * @gpu_metrics_timer_needed: Flag to indicate if the @gpu_metrics_timer is needed.
+ * The timer won't be started after the expiry if the flag
+ * isn't set.
+ */
+ bool gpu_metrics_timer_needed;
+
+ /**
+ * @gpu_metrics_timer_running: Flag to indicate if the @gpu_metrics_timer is running.
+ * The flag is set to false when the timer is cancelled or
+ * is not restarted after the expiry.
+ */
+ bool gpu_metrics_timer_running;
+#endif
};
/**
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 11aedef..724145f 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,7 @@
/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
* please update base/tools/hwconfig_generator/hwc_{issues,features}.py
- * For more information see base/tools/hwconfig_generator/README
+ * For more information see base/tools/docs/hwconfig_generator.md
*/
#ifndef _BASE_HWCONFIG_FEATURES_H_
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 0fbdec0..91b9b83 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -21,7 +21,7 @@
/* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features,
* please update base/tools/hwconfig_generator/hwc_{issues,features}.py
- * For more information see base/tools/hwconfig_generator/README
+ * For more information see base/tools/docs/hwconfig_generator.md
*/
#ifndef _BASE_HWCONFIG_ISSUES_H_
diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h
index 7de793c..c39ba99 100644
--- a/mali_kbase/mali_kbase.h
+++ b/mali_kbase/mali_kbase.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -52,6 +52,7 @@
#include <uapi/gpu/arm/midgard/mali_base_kernel.h>
#include <mali_kbase_linux.h>
+#include <linux/version_compat_defs.h>
/*
* Include mali_kbase_defs.h first as this provides types needed by other local
@@ -61,9 +62,7 @@
#include "debug/mali_kbase_debug_ktrace.h"
#include "context/mali_kbase_context.h"
-#include "mali_kbase_strings.h"
#include "mali_kbase_mem_lowlevel.h"
-#include "mali_kbase_utility.h"
#include "mali_kbase_mem.h"
#include "mmu/mali_kbase_mmu.h"
#include "mali_kbase_gpu_memory_debugfs.h"
@@ -87,6 +86,9 @@
#include "mali_linux_trace.h"
+#define KBASE_DRV_NAME "mali"
+#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
+
#if MALI_USE_CSF
#include "csf/mali_kbase_csf.h"
@@ -462,9 +464,9 @@ void kbasep_as_do_poke(struct work_struct *work);
*
* @kbdev: The kbase device structure for the device
*
- * The caller should ensure that either kbdev->pm.active_count_lock is held, or
- * a dmb was executed recently (to ensure the value is most
- * up-to-date). However, without a lock the value could change afterwards.
+ * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held,
+ * or a dmb was executed recently (to ensure the value is most up-to-date).
+ * However, without a lock the value could change afterwards.
*
* Return:
* * false if a suspend is not in progress
@@ -475,6 +477,22 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev)
return kbdev->pm.suspending;
}
+/**
+ * kbase_pm_is_resuming - Check whether System resume of GPU device is in progress.
+ *
+ * @kbdev: The kbase device structure for the device
+ *
+ * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held,
+ * or a dmb was executed recently (to ensure the value is most up-to-date).
+ * However, without a lock the value could change afterwards.
+ *
+ * Return: true if System resume is in progress, otherwise false.
+ */
+static inline bool kbase_pm_is_resuming(struct kbase_device *kbdev)
+{
+ return kbdev->pm.resuming;
+}
+
#ifdef CONFIG_MALI_ARBITER_SUPPORT
/*
* Check whether a gpu lost is in progress
@@ -528,9 +546,11 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
}
/**
- * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can
- * run as using the device tree, and save this
- * within kbdev.
+ * kbase_pm_gpu_freq_init() - Find the lowest frequency that the GPU can
+ * run as using the device tree, then query the
+ * GPU properties to find out the highest GPU
+ * frequency and store both of them within the
+ * @kbase_device.
* @kbdev: Pointer to kbase device.
*
* This function could be called from kbase_clk_rate_trace_manager_init,
@@ -538,9 +558,9 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev)
* dev_pm_opp_of_add_table() has been called to initialize the OPP table,
* which occurs in power_control_init().
*
- * Return: 0 in any case.
+ * Return: 0 on success, negative error code on failure.
*/
-int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev);
+int kbase_pm_gpu_freq_init(struct kbase_device *kbdev);
/**
* kbase_pm_metrics_start - Start the utilization metrics timer
@@ -808,4 +828,108 @@ void kbase_destroy_kworker_stack(struct kthread_worker *worker);
#define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
#endif
+/**
+ * kbase_file_fops_count() - Get the kfile::fops_count value
+ *
+ * @kfile: Pointer to the object representing the mali device file.
+ *
+ * The value is read with kfile::lock held.
+ *
+ * Return: sampled value of kfile::fops_count.
+ */
+static inline u32 kbase_file_fops_count(struct kbase_file *kfile)
+{
+ u32 fops_count;
+
+ spin_lock(&kfile->lock);
+ fops_count = kfile->fops_count;
+ spin_unlock(&kfile->lock);
+
+ return fops_count;
+}
+
+/**
+ * kbase_file_inc_fops_count_unless_closed() - Increment the kfile::fops_count value if the
+ * kfile::owner is still set.
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * Return: true if the increment was done otherwise false.
+ */
+static inline bool kbase_file_inc_fops_count_unless_closed(struct kbase_file *kfile)
+{
+ bool count_incremented = false;
+
+ spin_lock(&kfile->lock);
+ if (kfile->owner) {
+ kfile->fops_count++;
+ count_incremented = true;
+ }
+ spin_unlock(&kfile->lock);
+
+ return count_incremented;
+}
+
+/**
+ * kbase_file_dec_fops_count() - Decrement the kfile::fops_count value
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * This function shall only be called to decrement kfile::fops_count if a successful call
+ * to kbase_file_inc_fops_count_unless_closed() was made previously by the current thread.
+ *
+ * The function would enqueue the kfile::destroy_kctx_work if the process that originally
+ * created the file instance has closed its copy and no Kbase handled file operations are
+ * in progress and no memory mappings are present for the file instance.
+ */
+static inline void kbase_file_dec_fops_count(struct kbase_file *kfile)
+{
+ spin_lock(&kfile->lock);
+ WARN_ON_ONCE(kfile->fops_count <= 0);
+ kfile->fops_count--;
+ if (unlikely(!kfile->fops_count && !kfile->owner && !kfile->map_count)) {
+ queue_work(system_wq, &kfile->destroy_kctx_work);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ wake_up(&kfile->zero_fops_count_wait);
+#endif
+ }
+ spin_unlock(&kfile->lock);
+}
+
+/**
+ * kbase_file_inc_cpu_mapping_count() - Increment the kfile::map_count value.
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * This function shall be called when the memory mapping on /dev/malixx device file
+ * instance is created. The kbase_file::setup_state shall be KBASE_FILE_COMPLETE.
+ */
+static inline void kbase_file_inc_cpu_mapping_count(struct kbase_file *kfile)
+{
+ spin_lock(&kfile->lock);
+ kfile->map_count++;
+ spin_unlock(&kfile->lock);
+}
+
+/**
+ * kbase_file_dec_cpu_mapping_count() - Decrement the kfile::map_count value
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * This function is called to decrement kfile::map_count value when the memory mapping
+ * on /dev/malixx device file is closed.
+ * The function would enqueue the kfile::destroy_kctx_work if the process that originally
+ * created the file instance has closed its copy and there are no mappings present and no
+ * Kbase handled file operations are in progress for the file instance.
+ */
+static inline void kbase_file_dec_cpu_mapping_count(struct kbase_file *kfile)
+{
+ spin_lock(&kfile->lock);
+ WARN_ON_ONCE(kfile->map_count <= 0);
+ kfile->map_count--;
+ if (unlikely(!kfile->map_count && !kfile->owner && !kfile->fops_count))
+ queue_work(system_wq, &kfile->destroy_kctx_work);
+ spin_unlock(&kfile->lock);
+}
+
#endif
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index c99ad52..fa73612 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -183,6 +183,7 @@ enum {
*
* This is also the default timeout to be used when an invalid timeout
* selector is used to retrieve the timeout on CSF GPUs.
+ * This shouldn't be used as a timeout for the CSG suspend request.
*
* Based on 75000ms timeout at nominal 100MHz, as is required for Android - based
* on scaling from a 50MHz GPU system.
@@ -196,17 +197,16 @@ enum {
*/
#define CSF_PM_TIMEOUT_CYCLES (250000000)
-/* Waiting timeout in clock cycles for GPU reset to complete.
+/* Waiting timeout in clock cycles for a CSG to be suspended.
*
- * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system
+ * Based on 30s timeout at 100MHz, scaled from 5s at 600Mhz GPU frequency.
+ * More cycles (1s @ 100Mhz = 100000000) are added up to ensure that
+ * host timeout is always bigger than FW timeout.
*/
-#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000)
+#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull)
-/* Waiting timeout in clock cycles for all active CSGs to be suspended.
- *
- * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system.
- */
-#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000)
+/* Waiting timeout in clock cycles for GPU reset to complete. */
+#define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2)
/* Waiting timeout in clock cycles for GPU firmware to boot.
*
@@ -220,6 +220,19 @@ enum {
*/
#define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull)
+/* Waiting timeout for a KCPU queue's fence signal blocked to long, in clock cycles.
+ *
+ * Based on 10s timeout at 100MHz, scaled from a 50MHz GPU system.
+ */
+#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (1000000000ull)
+
+/* Waiting timeout for task execution on an endpoint. Based on the
+ * DEFAULT_PROGRESS_TIMEOUT.
+ *
+ * Based on 25s timeout at 100Mhz, scaled from a 500MHz GPU system.
+ */
+#define DEFAULT_PROGRESS_TIMEOUT_CYCLES (2500000000ull)
+
#else /* MALI_USE_CSF */
/* A default timeout in clock cycles to be used when an invalid timeout
@@ -242,7 +255,7 @@ enum {
*/
#define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000)
-#endif /* MALI_USE_CSF */
+#endif /* !MALI_USE_CSF */
/* Default timeslice that a context is scheduled in for, in nanoseconds.
*
@@ -286,4 +299,10 @@ enum {
* It corresponds to 0.5s in GPU @ 100Mhz.
*/
#define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024)
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/* Default value of the time interval at which GPU metrics tracepoints are emitted. */
+#define DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS (500000000u) /* 500 ms */
+#endif
+
#endif /* _KBASE_CONFIG_DEFAULTS_H_ */
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index c31994c..28cbcdb 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -170,6 +170,8 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA
static struct mutex kbase_probe_mutex;
#endif
+static void kbase_file_destroy_kctx_worker(struct work_struct *work);
+
/**
* mali_kbase_supports_cap - Query whether a kbase capability is supported
*
@@ -274,6 +276,8 @@ void kbase_destroy_kworker_stack(struct kthread_worker *worker)
*
* Return: Address of an object representing a simulated device file, or NULL
* on failure.
+ *
+ * Note: This function always gets called in Userspace context.
*/
static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
struct file *const filp)
@@ -286,6 +290,16 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev,
kfile->kctx = NULL;
kfile->api_version = 0;
atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN);
+ /* Store the pointer to the file table structure of current process. */
+ kfile->owner = current->files;
+ INIT_WORK(&kfile->destroy_kctx_work, kbase_file_destroy_kctx_worker);
+ spin_lock_init(&kfile->lock);
+ kfile->fops_count = 0;
+ kfile->map_count = 0;
+ typecheck(typeof(kfile->map_count), typeof(current->mm->map_count));
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ init_waitqueue_head(&kfile->zero_fops_count_wait);
+#endif
}
return kfile;
}
@@ -366,6 +380,33 @@ static int kbase_file_create_kctx(struct kbase_file *kfile,
base_context_create_flags flags);
/**
+ * kbase_file_inc_fops_count_if_allowed - Increment the kfile::fops_count value if the file
+ * operation is allowed for the current process.
+ *
+ * @kfile: Pointer to the object representing the /dev/malixx device file instance.
+ *
+ * The function shall be called at the beginning of certain file operation methods
+ * implemented for @kbase_fops, like ioctl, poll, read and mmap.
+ *
+ * kbase_file_dec_fops_count() shall be called if the increment was done.
+ *
+ * Return: true if the increment was done otherwise false.
+ *
+ * Note: This function shall always be called in Userspace context.
+ */
+static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile)
+{
+ /* Disallow file operations from the other process that shares the instance
+ * of /dev/malixx file i.e. 'kfile' or disallow file operations if parent
+ * process has closed the file instance.
+ */
+ if (unlikely(kfile->owner != current->files))
+ return false;
+
+ return kbase_file_inc_fops_count_unless_closed(kfile);
+}
+
+/**
* kbase_file_get_kctx_if_setup_complete - Get a kernel base context
* pointer from a device file
*
@@ -377,6 +418,8 @@ static int kbase_file_create_kctx(struct kbase_file *kfile,
*
* Return: Address of the kernel base context associated with the @kfile, or
* NULL if no context exists.
+ *
+ * Note: This function shall always be called in Userspace context.
*/
static struct kbase_context *kbase_file_get_kctx_if_setup_complete(
struct kbase_file *const kfile)
@@ -390,37 +433,103 @@ static struct kbase_context *kbase_file_get_kctx_if_setup_complete(
}
/**
- * kbase_file_delete - Destroy an object representing a device file
+ * kbase_file_destroy_kctx - Destroy the Kbase context created for @kfile.
*
* @kfile: A device file created by kbase_file_new()
- *
- * If any context was created for the @kfile then it is destroyed.
*/
-static void kbase_file_delete(struct kbase_file *const kfile)
+static void kbase_file_destroy_kctx(struct kbase_file *const kfile)
{
- struct kbase_device *kbdev = NULL;
-
- if (WARN_ON(!kfile))
+ if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_COMPLETE,
+ KBASE_FILE_DESTROY_CTX) != KBASE_FILE_COMPLETE)
return;
- kfile->filp->private_data = NULL;
- kbdev = kfile->kbdev;
-
- if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) {
- struct kbase_context *kctx = kfile->kctx;
-
#if IS_ENABLED(CONFIG_DEBUG_FS)
- kbasep_mem_profile_debugfs_remove(kctx);
+ kbasep_mem_profile_debugfs_remove(kfile->kctx);
+ kbase_context_debugfs_term(kfile->kctx);
#endif
- kbase_context_debugfs_term(kctx);
- kbase_destroy_context(kctx);
+ kbase_destroy_context(kfile->kctx);
+ dev_dbg(kfile->kbdev->dev, "Deleted kbase context");
+}
+
+/**
+ * kbase_file_destroy_kctx_worker - Work item to destroy the Kbase context.
+ *
+ * @work: Pointer to the kfile::destroy_kctx_work.
+ *
+ * The work item shall only be enqueued if the context termination could not
+ * be done from @kbase_flush().
+ */
+static void kbase_file_destroy_kctx_worker(struct work_struct *work)
+{
+ struct kbase_file *kfile =
+ container_of(work, struct kbase_file, destroy_kctx_work);
+
+ WARN_ON_ONCE(kfile->owner);
+ WARN_ON_ONCE(kfile->map_count);
+ WARN_ON_ONCE(kfile->fops_count);
+
+ kbase_file_destroy_kctx(kfile);
+}
+
+/**
+ * kbase_file_destroy_kctx_on_flush - Try destroy the Kbase context from the flush()
+ * method of @kbase_fops.
+ *
+ * @kfile: A device file created by kbase_file_new()
+ */
+static void kbase_file_destroy_kctx_on_flush(struct kbase_file *const kfile)
+{
+ bool can_destroy_context = false;
+
+ spin_lock(&kfile->lock);
+ kfile->owner = NULL;
+ /* To destroy the context from flush() method, unlike the release()
+ * method, need to synchronize manually against the other threads in
+ * the current process that could be operating on the /dev/malixx file.
+ *
+ * Only destroy the context if all the memory mappings on the
+ * /dev/malixx file instance have been closed. If there are mappings
+ * present then the context would be destroyed later when the last
+ * mapping is closed.
+ * Also, only destroy the context if no file operations are in progress.
+ */
+ can_destroy_context = !kfile->map_count && !kfile->fops_count;
+ spin_unlock(&kfile->lock);
- dev_dbg(kbdev->dev, "deleted base context\n");
+ if (likely(can_destroy_context)) {
+ WARN_ON_ONCE(work_pending(&kfile->destroy_kctx_work));
+ kbase_file_destroy_kctx(kfile);
}
+}
- kbase_release_device(kbdev);
+/**
+ * kbase_file_delete - Destroy an object representing a device file
+ *
+ * @kfile: A device file created by kbase_file_new()
+ *
+ * If any context was created for the @kfile and is still alive, then it is destroyed.
+ */
+static void kbase_file_delete(struct kbase_file *const kfile)
+{
+ if (WARN_ON(!kfile))
+ return;
+
+ /* All the CPU mappings on the device file should have been closed */
+ WARN_ON_ONCE(kfile->map_count);
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ /* There could still be file operations due to the debugfs file (mem_view) */
+ wait_event(kfile->zero_fops_count_wait, !kbase_file_fops_count(kfile));
+#else
+ /* There shall not be any file operations in progress on the device file */
+ WARN_ON_ONCE(kfile->fops_count);
+#endif
+ kfile->filp->private_data = NULL;
+ cancel_work_sync(&kfile->destroy_kctx_work);
+ /* Destroy the context if it wasn't done earlier from the flush() method. */
+ kbase_file_destroy_kctx(kfile);
+ kbase_release_device(kfile->kbdev);
kfree(kfile);
}
@@ -676,7 +785,7 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile,
kbdev = kfile->kbdev;
kctx = kbase_create_context(kbdev, in_compat_syscall(),
- flags, kfile->api_version, kfile->filp);
+ flags, kfile->api_version, kfile);
/* if bad flags, will stay stuck in setup mode */
if (!kctx)
@@ -762,6 +871,36 @@ static int kbase_release(struct inode *inode, struct file *filp)
return 0;
}
+/**
+ * kbase_flush - Function implementing the flush() method of @kbase_fops.
+ *
+ * @filp: Pointer to the /dev/malixx device file instance.
+ * @id: Pointer to the file table structure of current process.
+ * If @filp is being shared by multiple processes then @id can differ
+ * from kfile::owner.
+ *
+ * This function is called everytime the copy of @filp is closed. So if 3 processes
+ * are sharing the @filp then this function would be called 3 times and only after
+ * that kbase_release() would get called.
+ *
+ * Return: 0 if successful, otherwise a negative error code.
+ *
+ * Note: This function always gets called in Userspace context when the
+ * file is closed.
+ */
+static int kbase_flush(struct file *filp, fl_owner_t id)
+{
+ struct kbase_file *const kfile = filp->private_data;
+
+ /* Try to destroy the context if the flush() method has been called for the
+ * process that created the instance of /dev/malixx file i.e. 'kfile'.
+ */
+ if (kfile->owner == id)
+ kbase_file_destroy_kctx_on_flush(kfile);
+
+ return 0;
+}
+
static int kbase_api_set_flags(struct kbase_file *kfile,
struct kbase_ioctl_set_flags *flags)
{
@@ -1485,6 +1624,7 @@ static int kbasep_cs_queue_group_create_1_6(
struct kbase_context *kctx,
union kbase_ioctl_cs_queue_group_create_1_6 *create)
{
+ int ret, i;
union kbase_ioctl_cs_queue_group_create
new_create = { .in = {
.tiler_mask = create->in.tiler_mask,
@@ -1498,16 +1638,61 @@ static int kbasep_cs_queue_group_create_1_6(
.compute_max = create->in.compute_max,
} };
- int ret = kbase_csf_queue_group_create(kctx, &new_create);
+ for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
+ if (create->in.padding[i] != 0) {
+ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = kbase_csf_queue_group_create(kctx, &new_create);
+
+ create->out.group_handle = new_create.out.group_handle;
+ create->out.group_uid = new_create.out.group_uid;
+
+ return ret;
+}
+
+static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx,
+ union kbase_ioctl_cs_queue_group_create_1_18 *create)
+{
+ int ret, i;
+ union kbase_ioctl_cs_queue_group_create
+ new_create = { .in = {
+ .tiler_mask = create->in.tiler_mask,
+ .fragment_mask = create->in.fragment_mask,
+ .compute_mask = create->in.compute_mask,
+ .cs_min = create->in.cs_min,
+ .priority = create->in.priority,
+ .tiler_max = create->in.tiler_max,
+ .fragment_max = create->in.fragment_max,
+ .compute_max = create->in.compute_max,
+ .csi_handlers = create->in.csi_handlers,
+ .dvs_buf = create->in.dvs_buf,
+ } };
+
+ for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) {
+ if (create->in.padding[i] != 0) {
+ dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = kbase_csf_queue_group_create(kctx, &new_create);
create->out.group_handle = new_create.out.group_handle;
create->out.group_uid = new_create.out.group_uid;
return ret;
}
+
static int kbasep_cs_queue_group_create(struct kbase_context *kctx,
union kbase_ioctl_cs_queue_group_create *create)
{
+ if (create->in.reserved != 0) {
+ dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n");
+ return -EINVAL;
+ }
return kbase_csf_queue_group_create(kctx, create);
}
@@ -1765,9 +1950,8 @@ static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx,
return 0;
}
-static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsigned long arg)
{
- struct kbase_file *const kfile = filp->private_data;
struct kbase_context *kctx = NULL;
struct kbase_device *kbdev = kfile->kbdev;
void __user *uarg = (void __user *)arg;
@@ -2081,6 +2265,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
kbasep_cs_queue_group_create_1_6,
union kbase_ioctl_cs_queue_group_create_1_6, kctx);
break;
+ case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18:
+ KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18,
+ kbasep_cs_queue_group_create_1_18,
+ union kbase_ioctl_cs_queue_group_create_1_18, kctx);
+ break;
case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE:
KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE,
kbasep_cs_queue_group_create,
@@ -2179,22 +2368,44 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
+static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+ struct kbase_file *const kfile = filp->private_data;
+ long ioctl_ret;
+
+ if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
+ return -EPERM;
+
+ ioctl_ret = kbase_kfile_ioctl(kfile, cmd, arg);
+ kbase_file_dec_fops_count(kfile);
+
+ return ioctl_ret;
+}
+
#if MALI_USE_CSF
static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
{
struct kbase_file *const kfile = filp->private_data;
- struct kbase_context *const kctx =
- kbase_file_get_kctx_if_setup_complete(kfile);
+ struct kbase_context *kctx;
struct base_csf_notification event_data = {
.type = BASE_CSF_NOTIFICATION_EVENT };
const size_t data_size = sizeof(event_data);
bool read_event = false, read_error = false;
+ ssize_t err = 0;
- if (unlikely(!kctx))
+ if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
return -EPERM;
- if (count < data_size)
- return -ENOBUFS;
+ kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+ if (unlikely(!kctx)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ if (count < data_size) {
+ err = -ENOBUFS;
+ goto out;
+ }
if (atomic_read(&kctx->event_count))
read_event = true;
@@ -2218,28 +2429,39 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
if (copy_to_user(buf, &event_data, data_size) != 0) {
dev_warn(kctx->kbdev->dev,
"Failed to copy data\n");
- return -EFAULT;
+ err = -EFAULT;
+ goto out;
}
if (read_event)
atomic_set(&kctx->event_count, 0);
- return data_size;
+out:
+ kbase_file_dec_fops_count(kfile);
+ return err ? err : data_size;
}
#else /* MALI_USE_CSF */
static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
{
struct kbase_file *const kfile = filp->private_data;
- struct kbase_context *const kctx =
- kbase_file_get_kctx_if_setup_complete(kfile);
+ struct kbase_context *kctx;
struct base_jd_event_v2 uevent;
int out_count = 0;
+ ssize_t err = 0;
- if (unlikely(!kctx))
+ if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
return -EPERM;
- if (count < sizeof(uevent))
- return -ENOBUFS;
+ kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+ if (unlikely(!kctx)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ if (count < sizeof(uevent)) {
+ err = -ENOBUFS;
+ goto out;
+ }
memset(&uevent, 0, sizeof(uevent));
@@ -2248,56 +2470,78 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof
if (out_count > 0)
goto out;
- if (filp->f_flags & O_NONBLOCK)
- return -EAGAIN;
+ if (filp->f_flags & O_NONBLOCK) {
+ err = -EAGAIN;
+ goto out;
+ }
if (wait_event_interruptible(kctx->event_queue,
- kbase_event_pending(kctx)) != 0)
- return -ERESTARTSYS;
+ kbase_event_pending(kctx)) != 0) {
+ err = -ERESTARTSYS;
+ goto out;
+ }
}
if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) {
- if (out_count == 0)
- return -EPIPE;
+ if (out_count == 0) {
+ err = -EPIPE;
+ goto out;
+ }
goto out;
}
- if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0)
- return -EFAULT;
+ if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) {
+ err = -EFAULT;
+ goto out;
+ }
buf += sizeof(uevent);
out_count++;
count -= sizeof(uevent);
} while (count >= sizeof(uevent));
- out:
- return out_count * sizeof(uevent);
+out:
+ kbase_file_dec_fops_count(kfile);
+ return err ? err : (out_count * sizeof(uevent));
}
#endif /* MALI_USE_CSF */
static __poll_t kbase_poll(struct file *filp, poll_table *wait)
{
struct kbase_file *const kfile = filp->private_data;
- struct kbase_context *const kctx =
- kbase_file_get_kctx_if_setup_complete(kfile);
+ struct kbase_context *kctx;
+ __poll_t ret = 0;
+
+ if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) {
+#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
+ ret = POLLNVAL;
+#else
+ ret = EPOLLNVAL;
+#endif
+ return ret;
+ }
+ kctx = kbase_file_get_kctx_if_setup_complete(kfile);
if (unlikely(!kctx)) {
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
- return POLLERR;
+ ret = POLLERR;
#else
- return EPOLLERR;
+ ret = EPOLLERR;
#endif
+ goto out;
}
poll_wait(filp, &kctx->event_queue, wait);
if (kbase_event_pending(kctx)) {
#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE)
- return POLLIN | POLLRDNORM;
+ ret = POLLIN | POLLRDNORM;
#else
- return EPOLLIN | EPOLLRDNORM;
+ ret = EPOLLIN | EPOLLRDNORM;
#endif
}
- return 0;
+out:
+ kbase_file_dec_fops_count(kfile);
+ return ret;
}
void _kbase_event_wakeup(struct kbase_context *kctx, bool sync)
@@ -2347,13 +2591,20 @@ KBASE_EXPORT_TEST_API(kbase_event_pending);
static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma)
{
struct kbase_file *const kfile = filp->private_data;
- struct kbase_context *const kctx =
- kbase_file_get_kctx_if_setup_complete(kfile);
+ struct kbase_context *kctx;
+ int ret;
- if (unlikely(!kctx))
+ if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
return -EPERM;
- return kbase_context_mmap(kctx, vma);
+ kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+ if (likely(kctx))
+ ret = kbase_context_mmap(kctx, vma);
+ else
+ ret = -EPERM;
+
+ kbase_file_dec_fops_count(kfile);
+ return ret;
}
static int kbase_check_flags(int flags)
@@ -2372,18 +2623,26 @@ static unsigned long kbase_get_unmapped_area(struct file *const filp,
const unsigned long pgoff, const unsigned long flags)
{
struct kbase_file *const kfile = filp->private_data;
- struct kbase_context *const kctx =
- kbase_file_get_kctx_if_setup_complete(kfile);
+ struct kbase_context *kctx;
+ unsigned long address;
- if (unlikely(!kctx))
+ if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile)))
return -EPERM;
- return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
+ kctx = kbase_file_get_kctx_if_setup_complete(kfile);
+ if (likely(kctx))
+ address = kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags);
+ else
+ address = -EPERM;
+
+ kbase_file_dec_fops_count(kfile);
+ return address;
}
static const struct file_operations kbase_fops = {
.owner = THIS_MODULE,
.open = kbase_open,
+ .flush = kbase_flush,
.release = kbase_release,
.read = kbase_read,
.poll = kbase_poll,
@@ -3306,10 +3565,8 @@ static ssize_t gpuinfo_show(struct device *dev,
.name = "Mali-G510" },
{ .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
.name = "Mali-G310" },
- { .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
- .name = "Mali-TTIX" },
{ .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT,
- .name = "Mali-LTIX" },
+ .name = "Mali-G620" },
};
const char *product_name = "(Unknown Mali GPU)";
struct kbase_device *kbdev;
@@ -3361,6 +3618,21 @@ static ssize_t gpuinfo_show(struct device *dev,
dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name,
nr_cores);
}
+
+ if ((product_id & product_id_mask) ==
+ ((GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) {
+ const bool rt_supported =
+ GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features);
+ const u8 nr_cores = gpu_props->num_cores;
+
+ if ((nr_cores >= 10) && rt_supported)
+ product_name = "Mali-G720-Immortalis";
+ else
+ product_name = (nr_cores >= 6) ? "Mali-G720" : "Mali-G620";
+
+ dev_dbg(kbdev->dev, "GPU ID_Name: %s (ID: 0x%x), nr_cores(%u)\n", product_name,
+ nr_cores, product_id & product_id_mask);
+ }
#endif /* MALI_USE_CSF */
return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name,
@@ -3435,8 +3707,9 @@ static ssize_t dvfs_period_show(struct device *dev,
static DEVICE_ATTR_RW(dvfs_period);
-int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev)
+int kbase_pm_gpu_freq_init(struct kbase_device *kbdev)
{
+ int err;
/* Uses default reference frequency defined in below macro */
u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ;
@@ -3471,7 +3744,16 @@ int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev)
#endif
kbdev->lowest_gpu_freq_khz = lowest_freq_khz;
+
+ err = kbase_device_populate_max_freq(kbdev);
+ if (unlikely(err < 0))
+ return -1;
+
dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz);
+ dev_dbg(kbdev->dev,
+ "Setting default highest frequency to %u kHz (pending devfreq initialization",
+ kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
+
return 0;
}
@@ -3584,21 +3866,32 @@ static ssize_t reset_timeout_store(struct device *dev,
{
struct kbase_device *kbdev;
int ret;
- int reset_timeout;
+ u32 reset_timeout;
+ u32 default_reset_timeout;
kbdev = to_kbase_device(dev);
if (!kbdev)
return -ENODEV;
- ret = kstrtoint(buf, 0, &reset_timeout);
- if (ret || reset_timeout <= 0) {
+ ret = kstrtou32(buf, 0, &reset_timeout);
+ if (ret || reset_timeout == 0) {
dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n"
"Use format <reset_timeout_ms>\n");
return -EINVAL;
}
+#if MALI_USE_CSF
+ default_reset_timeout = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT);
+#else /* MALI_USE_CSF */
+ default_reset_timeout = JM_DEFAULT_RESET_TIMEOUT_MS;
+#endif /* !MALI_USE_CSF */
+
+ if (reset_timeout < default_reset_timeout)
+ dev_warn(kbdev->dev, "requested reset_timeout(%u) is smaller than default(%u)",
+ reset_timeout, default_reset_timeout);
+
kbdev->reset_timeout_ms = reset_timeout;
- dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout);
+ dev_dbg(kbdev->dev, "Reset timeout: %ums\n", reset_timeout);
return count;
}
@@ -4482,8 +4775,10 @@ static bool kbase_is_pm_enabled(const struct device_node *gpu_node)
const void *operating_point_node;
bool is_pm_enable = false;
- power_model_node = of_get_child_by_name(gpu_node,
- "power_model");
+ power_model_node = of_get_child_by_name(gpu_node, "power-model");
+ if (!power_model_node)
+ power_model_node = of_get_child_by_name(gpu_node, "power_model");
+
if (power_model_node)
is_pm_enable = true;
@@ -4504,8 +4799,9 @@ static bool kbase_is_pv_enabled(const struct device_node *gpu_node)
{
const void *arbiter_if_node;
- arbiter_if_node = of_get_property(gpu_node,
- "arbiter_if", NULL);
+ arbiter_if_node = of_get_property(gpu_node, "arbiter-if", NULL);
+ if (!arbiter_if_node)
+ arbiter_if_node = of_get_property(gpu_node, "arbiter_if", NULL);
return arbiter_if_node ? true : false;
}
@@ -5409,7 +5705,10 @@ static ssize_t idle_hysteresis_time_store(struct device *dev,
return -EINVAL;
}
- kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
+ /* In sysFs, The unit of the input value of idle_hysteresis_time is us.
+ * But the unit of the input parameter of this function is ns, so multiply by 1000
+ */
+ kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur * NSEC_PER_USEC);
return count;
}
@@ -5436,7 +5735,8 @@ static ssize_t idle_hysteresis_time_show(struct device *dev,
if (!kbdev)
return -ENODEV;
- dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
+ /* The unit of return value of idle_hysteresis_time_show is us, So divide by 1000.*/
+ dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev) / NSEC_PER_USEC;
ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
return ret;
@@ -5445,6 +5745,74 @@ static ssize_t idle_hysteresis_time_show(struct device *dev,
static DEVICE_ATTR_RW(idle_hysteresis_time);
/**
+ * idle_hysteresis_time_ns_store - Store callback for CSF
+ * idle_hysteresis_time_ns sysfs file.
+ *
+ * @dev: The device with sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes written to the sysfs file
+ *
+ * This function is called when the idle_hysteresis_time_ns sysfs
+ * file is written to.
+ *
+ * This file contains values of the idle hysteresis duration in ns.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t idle_hysteresis_time_ns_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kbase_device *kbdev;
+ u32 dur = 0;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ if (kstrtou32(buf, 0, &dur)) {
+ dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time_ns write operation.\n"
+ "Use format <idle_hysteresis_time_ns>\n");
+ return -EINVAL;
+ }
+
+ kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur);
+
+ return count;
+}
+
+/**
+ * idle_hysteresis_time_ns_show - Show callback for CSF
+ * idle_hysteresis_time_ns sysfs entry.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer to receive the GPU information.
+ *
+ * This function is called to get the current idle hysteresis duration in ns.
+ *
+ * Return: The number of bytes output to @buf.
+ */
+static ssize_t idle_hysteresis_time_ns_show(struct device *dev, struct device_attribute *attr,
+ char *const buf)
+{
+ struct kbase_device *kbdev;
+ ssize_t ret;
+ u32 dur;
+
+ kbdev = to_kbase_device(dev);
+ if (!kbdev)
+ return -ENODEV;
+
+ dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev);
+ ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur);
+
+ return ret;
+}
+
+static DEVICE_ATTR_RW(idle_hysteresis_time_ns);
+
+/**
* mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value.
*
* @dev: The device this sysfs file is for.
@@ -5466,7 +5834,8 @@ static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_
if (!kbdev)
return -ENODEV;
- pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+ /* The unit of return value of the function is us, So divide by 1000.*/
+ pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev) / NSEC_PER_USEC;
return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff);
}
@@ -5490,19 +5859,97 @@ static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device
struct kbase_device *kbdev = dev_get_drvdata(dev);
u32 dur;
+ const struct kbase_pm_policy *current_policy;
+ bool always_on;
+
if (!kbdev)
return -ENODEV;
if (kstrtouint(buf, 0, &dur))
return -EINVAL;
- kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur);
+ current_policy = kbase_pm_get_policy(kbdev);
+ always_on = current_policy == &kbase_pm_always_on_policy_ops;
+ if (dur == 0 && !always_on)
+ return -EINVAL;
+
+ /* In sysFs, The unit of the input value of mcu_shader_pwroff_timeout is us.
+ * But the unit of the input parameter of this function is ns, so multiply by 1000
+ */
+ kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur * NSEC_PER_USEC);
return count;
}
static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout);
+/**
+ * mcu_shader_pwroff_timeout_ns_show - Get the MCU shader Core power-off time value.
+ *
+ * @dev: The device this sysfs file is for.
+ * @attr: The attributes of the sysfs file.
+ * @buf: The output buffer for the sysfs file contents
+ *
+ * Get the internally recorded MCU shader Core power-off (nominal) timeout value.
+ * The unit of the value is in nanoseconds.
+ *
+ * Return: The number of bytes output to @buf if the
+ * function succeeded. A Negative value on failure.
+ */
+static ssize_t mcu_shader_pwroff_timeout_ns_show(struct device *dev, struct device_attribute *attr,
+ char *const buf)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+ u32 pwroff;
+
+ if (!kbdev)
+ return -ENODEV;
+
+ pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev);
+ return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff);
+}
+
+/**
+ * mcu_shader_pwroff_timeout_ns_store - Set the MCU shader core power-off time value.
+ *
+ * @dev: The device with sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
+ *
+ * The duration value (unit: nanoseconds) for configuring MCU Shader Core
+ * timer, when the shader cores' power transitions are delegated to the
+ * MCU (normal operational mode)
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t mcu_shader_pwroff_timeout_ns_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kbase_device *kbdev = dev_get_drvdata(dev);
+ u32 dur;
+
+ const struct kbase_pm_policy *current_policy;
+ bool always_on;
+
+ if (!kbdev)
+ return -ENODEV;
+
+ if (kstrtouint(buf, 0, &dur))
+ return -EINVAL;
+
+ current_policy = kbase_pm_get_policy(kbdev);
+ always_on = current_policy == &kbase_pm_always_on_policy_ops;
+ if (dur == 0 && !always_on)
+ return -EINVAL;
+
+ kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout_ns);
+
#endif /* MALI_USE_CSF */
static struct attribute *kbase_scheduling_attrs[] = {
@@ -5563,7 +6010,9 @@ static struct attribute *kbase_attrs[] = {
&dev_attr_csg_scheduling_period.attr,
&dev_attr_fw_timeout.attr,
&dev_attr_idle_hysteresis_time.attr,
+ &dev_attr_idle_hysteresis_time_ns.attr,
&dev_attr_mcu_shader_pwroff_timeout.attr,
+ &dev_attr_mcu_shader_pwroff_timeout_ns.attr,
#endif /* !MALI_USE_CSF */
&dev_attr_power_policy.attr,
&dev_attr_core_mask.attr,
@@ -5725,6 +6174,14 @@ static int kbase_platform_device_probe(struct platform_device *pdev)
mutex_unlock(&kbase_probe_mutex);
#endif
} else {
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+ /* Since upstream is not exporting mmap_min_addr, kbase at the
+ * moment is unable to track possible kernel changes via sysfs.
+ * Flag this out in a device info message.
+ */
+ dev_info(kbdev->dev, KBASE_COMPILED_MMAP_MIN_ADDR_MSG);
+#endif
+
dev_info(kbdev->dev,
"Probed as %s\n", dev_name(kbdev->mdev.this_device));
kbase_increment_device_id();
@@ -5950,7 +6407,7 @@ static struct platform_driver kbase_platform_driver = {
.probe = kbase_platform_device_probe,
.remove = kbase_platform_device_remove,
.driver = {
- .name = kbase_drv_name,
+ .name = KBASE_DRV_NAME,
.pm = &kbase_pm_ops,
.of_match_table = of_match_ptr(kbase_dt_ids),
.probe_type = PROBE_PREFER_ASYNCHRONOUS,
diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c
index dc6feb9..ea4f300 100644
--- a/mali_kbase/mali_kbase_ctx_sched.c
+++ b/mali_kbase/mali_kbase_ctx_sched.c
@@ -239,10 +239,11 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev)
WARN_ON(!kbdev->pm.backend.gpu_powered);
+ kbdev->mmu_unresponsive = false;
+
for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) {
struct kbase_context *kctx;
- kbdev->as[i].is_unresponsive = false;
#if MALI_USE_CSF
if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) {
kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu,
@@ -292,7 +293,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount(
found_kctx = kbdev->as_to_kctx[as_nr];
- if (!WARN_ON(found_kctx == NULL))
+ if (found_kctx)
kbase_ctx_sched_retain_ctx_refcount(found_kctx);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
diff --git a/mali_kbase/mali_kbase_debug_mem_allocs.c b/mali_kbase/mali_kbase_debug_mem_allocs.c
index 418bb19..0592187 100644
--- a/mali_kbase/mali_kbase_debug_mem_allocs.c
+++ b/mali_kbase/mali_kbase_debug_mem_allocs.c
@@ -34,8 +34,7 @@
/**
* debug_zone_mem_allocs_show - Show information from specific rbtree
- * @zone: Name of GPU virtual memory zone
- * @rbtree: Pointer to the root of the rbtree associated with @zone
+ * @zone: The memory zone to be displayed
* @sfile: The debugfs entry
*
* This function is called to show information about all the GPU allocations of a
@@ -43,9 +42,10 @@
* The information like the start virtual address and size (in bytes) is shown for
* every GPU allocation mapped in the zone.
*/
-static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile)
+static void debug_zone_mem_allocs_show(struct kbase_reg_zone *zone, struct seq_file *sfile)
{
struct rb_node *p;
+ struct rb_root *rbtree = &zone->reg_rbtree;
struct kbase_va_region *reg;
const char *type_names[5] = {
"Native",
@@ -57,7 +57,7 @@ static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struc
#define MEM_ALLOCS_HEADER \
" VA, VA size, Commit size, Flags, Mem type\n"
- seq_printf(sfile, "Zone name: %s\n:", zone);
+ seq_printf(sfile, "Zone name: %s\n:", kbase_reg_zone_get_name(zone->id));
seq_printf(sfile, MEM_ALLOCS_HEADER);
for (p = rb_first(rbtree); p; p = rb_next(p)) {
reg = rb_entry(p, struct kbase_va_region, rblink);
@@ -82,18 +82,15 @@ static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struc
static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data)
{
struct kbase_context *const kctx = sfile->private;
+ enum kbase_memory_zone zone_idx;
kbase_gpu_vm_lock(kctx);
+ for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+ struct kbase_reg_zone *zone;
- debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile);
- debug_zone_mem_allocs_show("CUSTOM_VA:", &kctx->reg_rbtree_custom, sfile);
- debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile);
-
-#if MALI_USE_CSF
- debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile);
- debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile);
-#endif /* MALI_USE_CSF */
-
+ zone = &kctx->reg_zone[zone_idx];
+ debug_zone_mem_allocs_show(zone, sfile);
+ }
kbase_gpu_vm_unlock(kctx);
return 0;
}
diff --git a/mali_kbase/mali_kbase_debug_mem_view.c b/mali_kbase/mali_kbase_debug_mem_view.c
index ce87a00..7086c6b 100644
--- a/mali_kbase/mali_kbase_debug_mem_view.c
+++ b/mali_kbase/mali_kbase_debug_mem_view.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -189,13 +189,13 @@ static const struct seq_operations ops = {
.show = debug_mem_show,
};
-static int debug_mem_zone_open(struct rb_root *rbtree,
- struct debug_mem_data *mem_data)
+static int debug_mem_zone_open(struct kbase_reg_zone *zone, struct debug_mem_data *mem_data)
{
int ret = 0;
struct rb_node *p;
struct kbase_va_region *reg;
struct debug_mem_mapping *mapping;
+ struct rb_root *rbtree = &zone->reg_rbtree;
for (p = rb_first(rbtree); p; p = rb_next(p)) {
reg = rb_entry(p, struct kbase_va_region, rblink);
@@ -233,8 +233,9 @@ static int debug_mem_open(struct inode *i, struct file *file)
struct kbase_context *const kctx = i->i_private;
struct debug_mem_data *mem_data;
int ret;
+ enum kbase_memory_zone idx;
- if (get_file_rcu(kctx->filp) == 0)
+ if (!kbase_file_inc_fops_count_unless_closed(kctx->kfile))
return -ENOENT;
/* Check if file was opened in write mode. GPU memory contents
@@ -263,37 +264,15 @@ static int debug_mem_open(struct inode *i, struct file *file)
mem_data->column_width = kctx->mem_view_column_width;
- ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data);
- if (ret != 0) {
- kbase_gpu_vm_unlock(kctx);
- goto out;
- }
-
- ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data);
- if (ret != 0) {
- kbase_gpu_vm_unlock(kctx);
- goto out;
- }
-
- ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data);
- if (ret != 0) {
- kbase_gpu_vm_unlock(kctx);
- goto out;
- }
+ for (idx = 0; idx < CONTEXT_ZONE_MAX; idx++) {
+ struct kbase_reg_zone *zone = &kctx->reg_zone[idx];
-#if MALI_USE_CSF
- ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data);
- if (ret != 0) {
- kbase_gpu_vm_unlock(kctx);
- goto out;
- }
-
- ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data);
- if (ret != 0) {
- kbase_gpu_vm_unlock(kctx);
- goto out;
+ ret = debug_mem_zone_open(zone, mem_data);
+ if (ret != 0) {
+ kbase_gpu_vm_unlock(kctx);
+ goto out;
+ }
}
-#endif
kbase_gpu_vm_unlock(kctx);
@@ -316,7 +295,7 @@ out:
}
seq_release(i, file);
open_fail:
- fput(kctx->filp);
+ kbase_file_dec_fops_count(kctx->kfile);
return ret;
}
@@ -346,7 +325,7 @@ static int debug_mem_release(struct inode *inode, struct file *file)
kfree(mem_data);
}
- fput(kctx->filp);
+ kbase_file_dec_fops_count(kctx->kfile);
return 0;
}
diff --git a/mali_kbase/mali_kbase_debug_mem_zones.c b/mali_kbase/mali_kbase_debug_mem_zones.c
index 1f8db32..115c9c3 100644
--- a/mali_kbase/mali_kbase_debug_mem_zones.c
+++ b/mali_kbase/mali_kbase_debug_mem_zones.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -47,30 +47,29 @@
static int debug_mem_zones_show(struct seq_file *sfile, void *data)
{
struct kbase_context *const kctx = sfile->private;
- size_t i;
-
- const char *zone_names[KBASE_REG_ZONE_MAX] = {
- "SAME_VA",
- "CUSTOM_VA",
- "EXEC_VA"
-#if MALI_USE_CSF
- ,
- "MCU_SHARED_VA",
- "EXEC_FIXED_VA",
- "FIXED_VA"
-#endif
- };
+ struct kbase_reg_zone *reg_zone;
+ enum kbase_memory_zone zone_idx;
kbase_gpu_vm_lock(kctx);
- for (i = 0; i < KBASE_REG_ZONE_MAX; i++) {
- struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i];
+ for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+ reg_zone = &kctx->reg_zone[zone_idx];
if (reg_zone->base_pfn) {
- seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i,
- reg_zone->base_pfn, reg_zone->va_size_pages);
+ seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n",
+ kbase_reg_zone_get_name(zone_idx), zone_idx, reg_zone->base_pfn,
+ reg_zone->va_size_pages);
}
}
+#if MALI_USE_CSF
+ reg_zone = &kctx->kbdev->csf.mcu_shared_zone;
+
+ if (reg_zone && reg_zone->base_pfn) {
+ seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n",
+ kbase_reg_zone_get_name(MCU_SHARED_ZONE), MCU_SHARED_ZONE,
+ reg_zone->base_pfn, reg_zone->va_size_pages);
+ }
+#endif
kbase_gpu_vm_unlock(kctx);
return 0;
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 12e90ac..efe690d 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -183,6 +183,60 @@ struct kbase_as;
struct kbase_mmu_setup;
struct kbase_kinstr_jm;
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/**
+ * struct kbase_gpu_metrics - Object containing members that are used to emit
+ * GPU metrics tracepoints for all applications that
+ * created Kbase context(s) for a GPU.
+ *
+ * @active_list: List of applications that did some GPU activity in the recent work period.
+ * @inactive_list: List of applications that didn't do any GPU activity in the recent work period.
+ */
+struct kbase_gpu_metrics {
+ struct list_head active_list;
+ struct list_head inactive_list;
+};
+
+/**
+ * struct kbase_gpu_metrics_ctx - Object created for every application, that created
+ * Kbase context(s), containing members that are used
+ * to emit GPU metrics tracepoints for the application.
+ *
+ * @link: Links the object in kbase_device::gpu_metrics::active_list
+ * or kbase_device::gpu_metrics::inactive_list.
+ * @first_active_start_time: Records the time at which the application first became
+ * active in the current work period.
+ * @last_active_start_time: Records the time at which the application last became
+ * active in the current work period.
+ * @last_active_end_time: Records the time at which the application last became
+ * inactive in the current work period.
+ * @total_active: Tracks the time for which application has been active
+ * in the current work period.
+ * @prev_wp_active_end_time: Records the time at which the application last became
+ * inactive in the previous work period.
+ * @aid: Unique identifier for an application.
+ * @kctx_count: Counter to keep a track of the number of Kbase contexts
+ * created for an application. There may be multiple Kbase
+ * contexts contributing GPU activity data to a single GPU
+ * metrics context.
+ * @active_cnt: Counter that is updated every time the GPU activity starts
+ * and ends in the current work period for an application.
+ * @flags: Flags to track the state of GPU metrics context.
+ */
+struct kbase_gpu_metrics_ctx {
+ struct list_head link;
+ u64 first_active_start_time;
+ u64 last_active_start_time;
+ u64 last_active_end_time;
+ u64 total_active;
+ u64 prev_wp_active_end_time;
+ unsigned int aid;
+ unsigned int kctx_count;
+ u8 active_cnt;
+ u8 flags;
+};
+#endif
+
/**
* struct kbase_io_access - holds information about 1 register access
*
@@ -317,7 +371,7 @@ struct kbase_mmu_table {
u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
} teardown_pages;
/**
- * @free_pgds: Scratch memory user for insertion, update and teardown
+ * @free_pgds: Scratch memory used for insertion, update and teardown
* operations to store a temporary list of PGDs to be freed
* at the end of the operation.
*/
@@ -331,18 +385,69 @@ struct kbase_mmu_table {
};
/**
- * struct kbase_reg_zone - Information about GPU memory region zones
+ * enum kbase_memory_zone - Kbase memory zone identifier
+ * @SAME_VA_ZONE: Memory zone for allocations where the GPU and CPU VA coincide.
+ * @CUSTOM_VA_ZONE: When operating in compatibility mode, this zone is used to
+ * allow 32-bit userspace (either on a 32-bit device or a
+ * 32-bit application on a 64-bit device) to address the entirety
+ * of the GPU address space. The @CUSTOM_VA_ZONE is also used
+ * for JIT allocations: on 64-bit systems, the zone is created
+ * by reducing the size of the SAME_VA zone by a user-controlled
+ * amount, whereas on 32-bit systems, it is created as part of
+ * the existing CUSTOM_VA_ZONE
+ * @EXEC_VA_ZONE: Memory zone used to track GPU-executable memory. The start
+ * and end of this zone depend on the individual platform,
+ * and it is initialized upon user process request.
+ * @EXEC_FIXED_VA_ZONE: Memory zone used to contain GPU-executable memory
+ * that also permits FIXED/FIXABLE allocations.
+ * @FIXED_VA_ZONE: Memory zone used to allocate memory at userspace-supplied
+ * addresses.
+ * @MCU_SHARED_ZONE: Memory zone created for mappings shared between the MCU
+ * and Kbase. Currently this is the only zone type that is
+ * created on a per-device, rather than a per-context
+ * basis.
+ * @MEMORY_ZONE_MAX: Sentinel value used for iterating over all the memory zone
+ * identifiers.
+ * @CONTEXT_ZONE_MAX: Sentinel value used to keep track of the last per-context
+ * zone for iteration.
+ */
+enum kbase_memory_zone {
+ SAME_VA_ZONE,
+ CUSTOM_VA_ZONE,
+ EXEC_VA_ZONE,
+#if IS_ENABLED(MALI_USE_CSF)
+ EXEC_FIXED_VA_ZONE,
+ FIXED_VA_ZONE,
+ MCU_SHARED_ZONE,
+#endif
+ MEMORY_ZONE_MAX,
+#if IS_ENABLED(MALI_USE_CSF)
+ CONTEXT_ZONE_MAX = FIXED_VA_ZONE + 1
+#else
+ CONTEXT_ZONE_MAX = EXEC_VA_ZONE + 1
+#endif
+};
+
+/**
+ * struct kbase_reg_zone - GPU memory zone information and region tracking
+ * @reg_rbtree: RB tree used to track kbase memory regions.
* @base_pfn: Page Frame Number in GPU virtual address space for the start of
* the Zone
* @va_size_pages: Size of the Zone in pages
+ * @id: Memory zone identifier
+ * @cache: Pointer to a per-device slab allocator to allow for quickly allocating
+ * new regions
*
* Track information about a zone KBASE_REG_ZONE() and related macros.
* In future, this could also store the &rb_root that are currently in
* &kbase_context and &kbase_csf_device.
*/
struct kbase_reg_zone {
+ struct rb_root reg_rbtree;
u64 base_pfn;
u64 va_size_pages;
+ enum kbase_memory_zone id;
+ struct kmem_cache *cache;
};
#if MALI_USE_CSF
@@ -439,7 +544,15 @@ struct kbase_clk_rate_trace_manager {
* Note that some code paths keep shaders/the tiler
* powered whilst this is 0.
* Use kbase_pm_is_active() instead to check for such cases.
- * @suspending: Flag indicating suspending/suspended
+ * @suspending: Flag set to true when System suspend of GPU device begins and
+ * set to false only when System resume of GPU device starts.
+ * So GPU device could be in suspended state while the flag is set.
+ * The flag is updated with @lock held.
+ * @resuming: Flag set to true when System resume of GPU device starts and is set
+ * to false when resume ends. The flag is set to true at the same time
+ * when @suspending is set to false with @lock held.
+ * The flag is currently used only to prevent Kbase context termination
+ * during System resume of GPU device.
* @runtime_active: Flag to track if the GPU is in runtime suspended or active
* state. This ensures that runtime_put and runtime_get
* functions are called in pairs. For example if runtime_get
@@ -450,7 +563,7 @@ struct kbase_clk_rate_trace_manager {
* This structure contains data for the power management framework.
* There is one instance of this structure per device in the system.
* @zero_active_count_wait: Wait queue set when active_count == 0
- * @resume_wait: system resume of GPU device.
+ * @resume_wait: Wait queue to wait for the System suspend/resume of GPU device.
* @debug_core_mask: Bit masks identifying the available shader cores that are
* specified via sysfs. One mask per job slot.
* @debug_core_mask_all: Bit masks identifying the available shader cores that
@@ -471,6 +584,7 @@ struct kbase_pm_device_data {
struct rt_mutex lock;
int active_count;
bool suspending;
+ bool resuming;
#if MALI_USE_CSF
bool runtime_active;
#endif
@@ -823,10 +937,14 @@ struct kbase_mem_migrate {
* to the GPU device. This points to an internal memory
* group manager if no platform-specific memory group
* manager was retrieved through device tree.
+ * @mmu_unresponsive: Flag to indicate MMU is not responding.
+ * Set if a MMU command isn't completed within
+ * &kbase_device:mmu_or_gpu_cache_op_wait_time_ms.
+ * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes.
* @as: Array of objects representing address spaces of GPU.
- * @as_free: Bitpattern of free/available GPU address spaces.
* @as_to_kctx: Array of pointers to struct kbase_context, having
* GPU adrress spaces assigned to them.
+ * @as_free: Bitpattern of free/available GPU address spaces.
* @mmu_mask_change: Lock to serialize the access to MMU interrupt mask
* register used in the handling of Bus & Page faults.
* @pagesize_2mb: Boolean to determine whether 2MiB page sizes are
@@ -1082,9 +1200,11 @@ struct kbase_mem_migrate {
* KCPU queue. These structures may outlive kbase module
* itself. Therefore, in such a case, a warning should be
* be produced.
- * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
- * a MMU operation
+ * @mmu_or_gpu_cache_op_wait_time_ms: Maximum waiting time in ms for the completion of
+ * a cache operation via MMU_AS_CONTROL or GPU_CONTROL.
* @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures.
+ * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking
+ * is enabled.
*/
struct kbase_device {
u32 hw_quirks_sc;
@@ -1135,9 +1255,10 @@ struct kbase_device {
struct memory_group_manager_device *mgm_dev;
+ bool mmu_unresponsive;
struct kbase_as as[BASE_MAX_NR_AS];
- u16 as_free;
struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
+ u16 as_free;
spinlock_t mmu_mask_change;
@@ -1196,9 +1317,7 @@ struct kbase_device {
u64 lowest_gpu_freq_khz;
-#if MALI_USE_CSF
struct kbase_backend_time backend_time;
-#endif
bool cache_clean_in_progress;
u32 cache_clean_queued;
@@ -1396,8 +1515,18 @@ struct kbase_device {
#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
atomic_t live_fence_metadata;
#endif
- u32 mmu_as_inactive_wait_time_ms;
+ u32 mmu_or_gpu_cache_op_wait_time_ms;
struct kmem_cache *va_region_slab;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ /**
+ * @gpu_metrics: GPU device wide structure used for emitting GPU metrics tracepoints.
+ */
+ struct kbase_gpu_metrics gpu_metrics;
+#endif
+#if MALI_USE_CSF
+ atomic_t fence_signal_timeout_enabled;
+#endif
};
/**
@@ -1414,6 +1543,9 @@ struct kbase_device {
* @KBASE_FILE_COMPLETE: Indicates if the setup for context has
* completed, i.e. flags have been set for the
* context.
+ * @KBASE_FILE_DESTROY_CTX: Indicates that destroying of context has begun or
+ * is complete. This state can only be reached after
+ * @KBASE_FILE_COMPLETE.
*
* The driver allows only limited interaction with user-space until setup
* is complete.
@@ -1423,7 +1555,8 @@ enum kbase_file_state {
KBASE_FILE_VSN_IN_PROGRESS,
KBASE_FILE_NEED_CTX,
KBASE_FILE_CTX_IN_PROGRESS,
- KBASE_FILE_COMPLETE
+ KBASE_FILE_COMPLETE,
+ KBASE_FILE_DESTROY_CTX
};
/**
@@ -1433,6 +1566,12 @@ enum kbase_file_state {
* allocated from the probe method of the Mali driver.
* @filp: Pointer to the struct file corresponding to device file
* /dev/malixx instance, passed to the file's open method.
+ * @owner: Pointer to the file table structure of a process that
+ * created the instance of /dev/malixx device file. Set to
+ * NULL when that process closes the file instance. No more
+ * file operations would be allowed once set to NULL.
+ * It would be updated only in the Userspace context, i.e.
+ * when @kbase_open or @kbase_flush is called.
* @kctx: Object representing an entity, among which GPU is
* scheduled and which gets its own GPU address space.
* Invalid until @setup_state is KBASE_FILE_COMPLETE.
@@ -1441,13 +1580,40 @@ enum kbase_file_state {
* @setup_state is KBASE_FILE_NEED_CTX.
* @setup_state: Initialization state of the file. Values come from
* the kbase_file_state enumeration.
+ * @destroy_kctx_work: Work item for destroying the @kctx, enqueued only when
+ * @fops_count and @map_count becomes zero after /dev/malixx
+ * file was previously closed by the @owner.
+ * @lock: Lock to serialize the access to members like @owner, @fops_count,
+ * @map_count.
+ * @fops_count: Counter that is incremented at the beginning of a method
+ * defined for @kbase_fops and is decremented at the end.
+ * So the counter keeps a track of the file operations in progress
+ * for /dev/malixx file, that are being handled by the Kbase.
+ * The counter is needed to defer the context termination as
+ * Userspace can close the /dev/malixx file and flush() method
+ * can get called when some other file operation is in progress.
+ * @map_count: Counter to keep a track of the memory mappings present on
+ * /dev/malixx file instance. The counter is needed to defer the
+ * context termination as Userspace can close the /dev/malixx
+ * file and flush() method can get called when mappings are still
+ * present.
+ * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0.
+ * Currently needed only for the "mem_view" debugfs file.
*/
struct kbase_file {
struct kbase_device *kbdev;
struct file *filp;
+ fl_owner_t owner;
struct kbase_context *kctx;
unsigned long api_version;
atomic_t setup_state;
+ struct work_struct destroy_kctx_work;
+ spinlock_t lock;
+ int fops_count;
+ int map_count;
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+ wait_queue_head_t zero_fops_count_wait;
+#endif
};
#if MALI_JIT_PRESSURE_LIMIT_BASE
/**
@@ -1617,8 +1783,8 @@ struct kbase_sub_alloc {
/**
* struct kbase_context - Kernel base context
*
- * @filp: Pointer to the struct file corresponding to device file
- * /dev/malixx instance, passed to the file's open method.
+ * @kfile: Pointer to the object representing the /dev/malixx device
+ * file instance.
* @kbdev: Pointer to the Kbase device for which the context is created.
* @kctx_list_link: Node into Kbase device list of contexts.
* @mmu: Structure holding details of the MMU tables for this
@@ -1653,22 +1819,6 @@ struct kbase_sub_alloc {
* for the allocations >= 2 MB in size.
* @reg_lock: Lock used for GPU virtual address space management operations,
* like adding/freeing a memory region in the address space.
- * Can be converted to a rwlock ?.
- * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA
- * zone of the GPU virtual address space. Used for allocations
- * having the same value for GPU & CPU virtual address.
- * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA
- * zone of the GPU virtual address space.
- * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA
- * zone of the GPU virtual address space. Used for GPU-executable
- * allocations which don't need the SAME_VA property.
- * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the
- * EXEC_FIXED_VA zone of the GPU virtual address space. Used for
- * GPU-executable allocations with FIXED/FIXABLE GPU virtual
- * addresses.
- * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone
- * of the GPU virtual address space. Used for allocations with
- * FIXED/FIXABLE GPU virtual addresses.
* @num_fixable_allocs: A count for the number of memory allocations with the
* BASE_MEM_FIXABLE property.
* @num_fixed_allocs: A count for the number of memory allocations with the
@@ -1881,6 +2031,7 @@ struct kbase_sub_alloc {
* that created the Kbase context. It would be set only for the
* contexts created by the Userspace and not for the contexts
* created internally by the Kbase.
+ * @comm: Record the process name
*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
@@ -1889,7 +2040,7 @@ struct kbase_sub_alloc {
* is made on the device file.
*/
struct kbase_context {
- struct file *filp;
+ struct kbase_file *kfile;
struct kbase_device *kbdev;
struct list_head kctx_list_link;
struct kbase_mmu_table mmu;
@@ -1914,17 +2065,11 @@ struct kbase_context {
struct list_head mem_partials;
struct mutex reg_lock;
-
- struct rb_root reg_rbtree_same;
- struct rb_root reg_rbtree_custom;
- struct rb_root reg_rbtree_exec;
#if MALI_USE_CSF
- struct rb_root reg_rbtree_exec_fixed;
- struct rb_root reg_rbtree_fixed;
atomic64_t num_fixable_allocs;
atomic64_t num_fixed_allocs;
#endif
- struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];
+ struct kbase_reg_zone reg_zone[CONTEXT_ZONE_MAX];
#if MALI_USE_CSF
struct kbase_csf_context csf;
@@ -2031,6 +2176,16 @@ struct kbase_context {
void *platform_data;
struct task_struct *task;
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ /**
+ * @gpu_metrics_ctx: Pointer to the GPU metrics context corresponding to the
+ * application that created the Kbase context.
+ */
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+#endif
+
+ char comm[TASK_COMM_LEN];
};
#ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c
index 35934b9..c3c6046 100644
--- a/mali_kbase/mali_kbase_dummy_job_wa.c
+++ b/mali_kbase/mali_kbase_dummy_job_wa.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -183,9 +183,9 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) {
/* wait for power-ups */
- wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true);
+ wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), true);
if (cores >> 32)
- wait(kbdev, SHADER_READY_HI, (cores >> 32), true);
+ wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), true);
}
if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) {
@@ -218,11 +218,11 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32));
/* wait for power off complete */
- wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false);
- wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false);
+ wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), false);
+ wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO), (cores & U32_MAX), false);
if (cores >> 32) {
- wait(kbdev, SHADER_READY_HI, (cores >> 32), false);
- wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false);
+ wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), false);
+ wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI), (cores >> 32), false);
}
kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX);
}
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index f4507ac..ea2ac34 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -33,6 +33,7 @@
#include "mali_kbase_fence_defs.h"
#include "mali_kbase.h"
#include "mali_kbase_refcount_defs.h"
+#include <linux/version_compat_defs.h>
#if MALI_USE_CSF
/* Maximum number of characters in DMA fence timeline name. */
@@ -160,16 +161,8 @@ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom)
static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom,
int status)
{
- if (status) {
-#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \
- KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE)
- fence_set_error(katom->dma_fence.fence, status);
-#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE)
- dma_fence_set_error(katom->dma_fence.fence, status);
-#else
- katom->dma_fence.fence->status = status;
-#endif
- }
+ if (status)
+ dma_fence_set_error_helper(katom->dma_fence.fence, status);
return dma_fence_signal(katom->dma_fence.fence);
}
diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c
index 25b4c9c..f14a55e 100644
--- a/mali_kbase/mali_kbase_fence_ops.c
+++ b/mali_kbase/mali_kbase_fence_ops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -31,7 +31,7 @@ kbase_fence_get_driver_name(struct fence *fence)
kbase_fence_get_driver_name(struct dma_fence *fence)
#endif
{
- return kbase_drv_name;
+ return KBASE_DRV_NAME;
}
static const char *
@@ -46,7 +46,7 @@ kbase_fence_get_timeline_name(struct dma_fence *fence)
return kcpu_fence->metadata->timeline_name;
#else
- return kbase_timeline_name;
+ return KBASE_TIMELINE_NAME;
#endif /* MALI_USE_CSF */
}
diff --git a/mali_kbase/mali_kbase_gpu_metrics.c b/mali_kbase/mali_kbase_gpu_metrics.c
new file mode 100644
index 0000000..af3a08d
--- /dev/null
+++ b/mali_kbase/mali_kbase_gpu_metrics.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include "mali_power_gpu_work_period_trace.h"
+#include <mali_kbase_gpu_metrics.h>
+
+/**
+ * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context
+ *
+ * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in
+ * the current work period.
+ *
+ * @INSIDE_ACTIVE_LIST: Flag to track if object is in kbase_device::gpu_metrics::active_list
+ *
+ * All members need to be separate bits. This enum is intended for use in a
+ * bitmask where multiple values get OR-ed together.
+ */
+enum gpu_metrics_ctx_flags {
+ ACTIVE_INTERVAL_IN_WP = 1 << 0,
+ INSIDE_ACTIVE_LIST = 1 << 1,
+};
+
+static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+ enum gpu_metrics_ctx_flags flag)
+{
+ return (gpu_metrics_ctx->flags & flag);
+}
+
+static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+ enum gpu_metrics_ctx_flags flag)
+{
+ gpu_metrics_ctx->flags |= flag;
+}
+
+static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+ enum gpu_metrics_ctx_flags flag)
+{
+ gpu_metrics_ctx->flags &= ~flag;
+}
+
+static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx,
+ u64 start_time, u64 end_time, u64 total_active)
+{
+#ifdef CONFIG_MALI_DEBUG
+ WARN(total_active > NSEC_PER_SEC,
+ "total_active %llu > 1 second for aid %u active_cnt %u",
+ total_active, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+
+ WARN(start_time >= end_time,
+ "start_time %llu >= end_time %llu for aid %u active_cnt %u",
+ start_time, end_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+
+ WARN(total_active > (end_time - start_time),
+ "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u",
+ total_active, end_time, start_time,
+ gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+
+ WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time,
+ "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u",
+ gpu_metrics_ctx->prev_wp_active_end_time, start_time,
+ gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt);
+#endif
+}
+
+static void emit_tracepoint_for_active_gpu_metrics_ctx(struct kbase_device *kbdev,
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time)
+{
+ const u64 start_time = gpu_metrics_ctx->first_active_start_time;
+ u64 total_active = gpu_metrics_ctx->total_active;
+ u64 end_time;
+
+ /* Check if the GPU activity is currently ongoing */
+ if (gpu_metrics_ctx->active_cnt) {
+ end_time = current_time;
+ total_active +=
+ end_time - gpu_metrics_ctx->last_active_start_time;
+
+ gpu_metrics_ctx->first_active_start_time = current_time;
+ gpu_metrics_ctx->last_active_start_time = current_time;
+ } else {
+ end_time = gpu_metrics_ctx->last_active_end_time;
+ gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP);
+ }
+
+ trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid,
+ start_time, end_time, total_active);
+
+ validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active);
+ gpu_metrics_ctx->prev_wp_active_end_time = end_time;
+ gpu_metrics_ctx->total_active = 0;
+}
+
+void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev,
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx)
+{
+ WARN_ON(list_empty(&gpu_metrics_ctx->link));
+ WARN_ON(!gpu_metrics_ctx->kctx_count);
+
+ gpu_metrics_ctx->kctx_count--;
+ if (gpu_metrics_ctx->kctx_count)
+ return;
+
+ if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP))
+ emit_tracepoint_for_active_gpu_metrics_ctx(kbdev,
+ gpu_metrics_ctx, ktime_get_raw_ns());
+
+ list_del_init(&gpu_metrics_ctx->link);
+ kfree(gpu_metrics_ctx);
+}
+
+struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid)
+{
+ struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics;
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+
+ list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->active_list, link) {
+ if (gpu_metrics_ctx->aid == aid) {
+ WARN_ON(!gpu_metrics_ctx->kctx_count);
+ gpu_metrics_ctx->kctx_count++;
+ return gpu_metrics_ctx;
+ }
+ }
+
+ list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->inactive_list, link) {
+ if (gpu_metrics_ctx->aid == aid) {
+ WARN_ON(!gpu_metrics_ctx->kctx_count);
+ gpu_metrics_ctx->kctx_count++;
+ return gpu_metrics_ctx;
+ }
+ }
+
+ return NULL;
+}
+
+void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev,
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid)
+{
+ gpu_metrics_ctx->aid = aid;
+ gpu_metrics_ctx->total_active = 0;
+ gpu_metrics_ctx->kctx_count = 1;
+ gpu_metrics_ctx->active_cnt = 0;
+ gpu_metrics_ctx->prev_wp_active_end_time = 0;
+ gpu_metrics_ctx->flags = 0;
+ list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list);
+}
+
+void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns)
+{
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx;
+
+ gpu_metrics_ctx->active_cnt++;
+ if (gpu_metrics_ctx->active_cnt == 1)
+ gpu_metrics_ctx->last_active_start_time = timestamp_ns;
+
+ if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) {
+ gpu_metrics_ctx->first_active_start_time = timestamp_ns;
+ gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP);
+ }
+
+ if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) {
+ list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list);
+ gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST);
+ }
+}
+
+void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns)
+{
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx;
+
+ if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt))
+ return;
+
+ if (--gpu_metrics_ctx->active_cnt)
+ return;
+
+ if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) {
+ gpu_metrics_ctx->last_active_end_time = timestamp_ns;
+ gpu_metrics_ctx->total_active +=
+ timestamp_ns - gpu_metrics_ctx->last_active_start_time;
+ return;
+ }
+
+ /* Due to conversion from system timestamp to CPU timestamp (which involves rounding)
+ * the value for start and end timestamp could come as same.
+ */
+ if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) {
+ gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1;
+ gpu_metrics_ctx->total_active += 1;
+ return;
+ }
+
+ /* The following check is to detect the situation where 'ACT=0' event was not visible to
+ * the Kbase even though the system timestamp value sampled by FW was less than the system
+ * timestamp value sampled by Kbase just before the draining of trace buffer.
+ */
+ if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time &&
+ gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) {
+ WARN_ON_ONCE(gpu_metrics_ctx->total_active);
+ gpu_metrics_ctx->last_active_end_time =
+ gpu_metrics_ctx->prev_wp_active_end_time + 1;
+ gpu_metrics_ctx->total_active = 1;
+ return;
+ }
+
+ WARN_ON_ONCE(1);
+}
+
+void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts)
+{
+ struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics;
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp;
+
+ list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) {
+ if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) {
+ WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST));
+ WARN_ON(gpu_metrics_ctx->active_cnt);
+ list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list);
+ gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST);
+ continue;
+ }
+
+ emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts);
+ }
+}
+
+int kbase_gpu_metrics_init(struct kbase_device *kbdev)
+{
+ INIT_LIST_HEAD(&kbdev->gpu_metrics.active_list);
+ INIT_LIST_HEAD(&kbdev->gpu_metrics.inactive_list);
+
+ dev_info(kbdev->dev, "GPU metrics tracepoint support enabled");
+ return 0;
+}
+
+void kbase_gpu_metrics_term(struct kbase_device *kbdev)
+{
+ WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.active_list));
+ WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.inactive_list));
+}
+
+#endif
diff --git a/mali_kbase/mali_kbase_gpu_metrics.h b/mali_kbase/mali_kbase_gpu_metrics.h
new file mode 100644
index 0000000..adc8816
--- /dev/null
+++ b/mali_kbase/mali_kbase_gpu_metrics.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+/**
+ * DOC: GPU metrics frontend APIs
+ */
+
+#ifndef _KBASE_GPU_METRICS_H_
+#define _KBASE_GPU_METRICS_H_
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase.h>
+
+/**
+ * kbase_gpu_metrics_get_emit_interval() - Return the trace point emission interval.
+ *
+ * Return: The time interval in nanosecond for GPU metrics trace point emission.
+ */
+unsigned long kbase_gpu_metrics_get_emit_interval(void);
+
+/**
+ * kbase_gpu_metrics_ctx_put() - Decrement the Kbase context count for the GPU metrics
+ * context and free it if the count becomes 0.
+ *
+ * @kbdev: Pointer to the GPU device.
+ * @gpu_metrics_ctx: Pointer to the GPU metrics context.
+ *
+ * This function must be called when a Kbase context is destroyed.
+ * The function would decrement the Kbase context count for the GPU metrics context and
+ * free the memory if the count becomes 0.
+ * The function would emit a power/gpu_work_period tracepoint for the GPU metrics context
+ * if there was some GPU activity done for it since the last tracepoint was emitted.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ * call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev,
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx);
+
+/**
+ * kbase_gpu_metrics_ctx_get() - Increment the Kbase context count for the GPU metrics
+ * context if it exists.
+ *
+ * @kbdev: Pointer to the GPU device.
+ * @aid: Unique identifier of the Application that is creating the Kbase context.
+ *
+ * This function must be called when a Kbase context is created.
+ * The function would increment the Kbase context count for the GPU metrics context,
+ * corresponding to the @aid, if it exists.
+ *
+ * Return: Pointer to the GPU metrics context corresponding to the @aid if it already
+ * exists otherwise NULL.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ * call to other GPU metrics functions declared in this file.
+ * The caller shall allocate memory for GPU metrics context structure if the
+ * function returns NULL.
+ */
+struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid);
+
+/**
+ * kbase_gpu_metrics_ctx_init() - Initialise the GPU metrics context
+ *
+ * @kbdev: Pointer to the GPU device.
+ * @gpu_metrics_ctx: Pointer to the GPU metrics context.
+ * @aid: Unique identifier of the Application for which GPU metrics
+ * context needs to be initialized.
+ *
+ * This function must be called when a Kbase context is created, after the call to
+ * kbase_gpu_metrics_ctx_get() returned NULL and memory for the GPU metrics context
+ * structure was allocated.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ * call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev,
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u32 aid);
+
+/**
+ * kbase_gpu_metrics_ctx_start_activity() - Report the start of some GPU activity
+ * for GPU metrics context.
+ *
+ * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context.
+ * @timestamp_ns: CPU timestamp at which the GPU activity started.
+ *
+ * The provided timestamp would be later used as the "start_time_ns" for the
+ * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU
+ * metrics context in the current work period.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ * call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns);
+
+/**
+ * kbase_gpu_metrics_ctx_end_activity() - Report the end of some GPU activity
+ * for GPU metrics context.
+ *
+ * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context.
+ * @timestamp_ns: CPU timestamp at which the GPU activity ended.
+ *
+ * The provided timestamp would be later used as the "end_time_ns" for the
+ * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU
+ * metrics context in the current work period.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ * call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns);
+
+/**
+ * kbase_gpu_metrics_emit_tracepoint() - Emit power/gpu_work_period tracepoint
+ * for active GPU metrics contexts.
+ *
+ * @kbdev: Pointer to the GPU device.
+ * @ts: Timestamp at which the tracepoint is being emitted.
+ *
+ * This function would loop through all the active GPU metrics contexts and emit a
+ * power/gpu_work_period tracepoint for them.
+ * The GPU metrics context that is found to be inactive since the last tracepoint
+ * was emitted would be moved to the inactive list.
+ * The current work period would be considered as over and a new work period would
+ * begin whenever any application does the GPU activity.
+ *
+ * Note: The caller must appropriately serialize the call to this function with the
+ * call to other GPU metrics functions declared in this file.
+ */
+void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts);
+
+/**
+ * kbase_gpu_metrics_init() - Initialise a gpu_metrics instance for a GPU
+ *
+ * @kbdev: Pointer to the GPU device.
+ *
+ * This function is called once for each @kbdev.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_gpu_metrics_init(struct kbase_device *kbdev);
+
+/**
+ * kbase_gpu_metrics_term() - Terminate a gpu_metrics instance
+ *
+ * @kbdev: Pointer to the GPU device.
+ */
+void kbase_gpu_metrics_term(struct kbase_device *kbdev);
+
+#endif
+#endif /* _KBASE_GPU_METRICS_H_ */
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index afbba3d..02d6bb2 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -49,7 +49,7 @@ static void kbase_gpuprops_construct_coherent_groups(
props->coherency_info.coherency = props->raw_props.mem_features;
props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
- if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
+ if (props->coherency_info.coherency & MEM_FEATURES_COHERENT_CORE_GROUP_MASK) {
/* Group is l2 coherent */
group_present = props->raw_props.l2_present;
} else {
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index 0eba889..32c9241 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -53,9 +53,9 @@ static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx,
unsigned long flag)
{
kbase_gpu_gwt_setup_page_permission(kctx, flag,
- rb_first(&(kctx->reg_rbtree_same)));
+ rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree));
kbase_gpu_gwt_setup_page_permission(kctx, flag,
- rb_first(&(kctx->reg_rbtree_custom)));
+ rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree));
}
diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h
index ac2a26d..f16348f 100644
--- a/mali_kbase/mali_kbase_hwaccess_time.h
+++ b/mali_kbase/mali_kbase_hwaccess_time.h
@@ -22,13 +22,16 @@
#ifndef _KBASE_BACKEND_TIME_H_
#define _KBASE_BACKEND_TIME_H_
-#if MALI_USE_CSF
/**
* struct kbase_backend_time - System timestamp attributes.
*
* @multiplier: Numerator of the converter's fraction.
* @divisor: Denominator of the converter's fraction.
* @offset: Converter's offset term.
+ * @device_scaled_timeouts: Timeouts in milliseconds that were scaled to be
+ * consistent with the minimum MCU frequency. This
+ * array caches the results of all of the conversions
+ * for ease of use later on.
*
* According to Generic timer spec, system timer:
* - Increments at a fixed frequency
@@ -49,11 +52,15 @@
*
*/
struct kbase_backend_time {
+#if MALI_USE_CSF
u64 multiplier;
u64 divisor;
s64 offset;
+#endif
+ unsigned int device_scaled_timeouts[KBASE_TIMEOUT_SELECTOR_COUNT];
};
+#if MALI_USE_CSF
/**
* kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp.
*
@@ -89,6 +96,40 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev,
u64 *cycle_counter,
u64 *system_time,
struct timespec64 *ts);
+
+/**
+ * kbase_device_set_timeout_ms - Set an unscaled device timeout in milliseconds,
+ * subject to the maximum timeout constraint.
+ *
+ * @kbdev: KBase device pointer.
+ * @selector: The specific timeout that should be scaled.
+ * @timeout_ms: The timeout in cycles which should be scaled.
+ *
+ * This function writes the absolute timeout in milliseconds to the table of
+ * precomputed device timeouts, while estabilishing an upped bound on the individual
+ * timeout of UINT_MAX milliseconds.
+ */
+void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+ unsigned int timeout_ms);
+
+/**
+ * kbase_device_set_timeout - Calculate the given timeout using the provided
+ * timeout cycles and multiplier.
+ *
+ * @kbdev: KBase device pointer.
+ * @selector: The specific timeout that should be scaled.
+ * @timeout_cycles: The timeout in cycles which should be scaled.
+ * @cycle_multiplier: A multiplier applied to the number of cycles, allowing
+ * the callsite to scale the minimum timeout based on the
+ * host device.
+ *
+ * This function writes the scaled timeout to the per-device table to avoid
+ * having to recompute the timeouts every single time that the related methods
+ * are called.
+ */
+void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector,
+ u64 timeout_cycles, u32 cycle_multiplier);
+
/**
* kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled
* GPU frequency, using a choice from
diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c
index 5dd7813..d7facb9 100644
--- a/mali_kbase/mali_kbase_js.c
+++ b/mali_kbase/mali_kbase_js.c
@@ -36,6 +36,20 @@
#include "mali_kbase_hwaccess_jm.h"
#include <mali_kbase_hwaccess_time.h>
#include <linux/priority_control_manager.h>
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#include <mali_kbase_gpu_metrics.h>
+
+static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS;
+
+module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444);
+MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns,
+ "Time interval in nano seconds at which GPU metrics tracepoints are emitted");
+
+unsigned long kbase_gpu_metrics_get_emit_interval(void)
+{
+ return gpu_metrics_tp_emit_interval_ns;
+}
+#endif
/*
* Private types
@@ -101,6 +115,118 @@ static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx)
* Private functions
*/
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+/**
+ * gpu_metrics_timer_callback() - Callback function for the GPU metrics hrtimer
+ *
+ * @timer: Pointer to the GPU metrics hrtimer
+ *
+ * This function will emit power/gpu_work_period tracepoint for all the active
+ * GPU metrics contexts. The timer will be restarted if needed.
+ *
+ * Return: enum value to indicate that timer should not be restarted.
+ */
+static enum hrtimer_restart gpu_metrics_timer_callback(struct hrtimer *timer)
+{
+ struct kbasep_js_device_data *js_devdata =
+ container_of(timer, struct kbasep_js_device_data, gpu_metrics_timer);
+ struct kbase_device *kbdev =
+ container_of(js_devdata, struct kbase_device, js_data);
+ unsigned long flags;
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns());
+ WARN_ON_ONCE(!js_devdata->gpu_metrics_timer_running);
+ if (js_devdata->gpu_metrics_timer_needed) {
+ hrtimer_start(&js_devdata->gpu_metrics_timer,
+ HR_TIMER_DELAY_NSEC(gpu_metrics_tp_emit_interval_ns),
+ HRTIMER_MODE_REL);
+ } else
+ js_devdata->gpu_metrics_timer_running = false;
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists,
+ * otherwise allocate and initialise one.
+ *
+ * @kctx: Pointer to the Kbase context.
+ *
+ * The GPU metrics context represents an "Application" for the purposes of GPU metrics
+ * reporting. There may be multiple kbase_contexts contributing data to a single GPU
+ * metrics context.
+ * This function takes a reference on GPU metrics context if it already exists
+ * corresponding to the Application that is creating the Kbase context, otherwise
+ * memory is allocated for it and initialised.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+static inline int gpu_metrics_ctx_init(struct kbase_context *kctx)
+{
+ struct kbase_gpu_metrics_ctx *gpu_metrics_ctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ unsigned long flags;
+ int ret = 0;
+
+ const struct cred *cred = get_current_cred();
+ const unsigned int aid = cred->euid.val;
+
+ put_cred(cred);
+
+ /* Return early if this is not a Userspace created context */
+ if (unlikely(!kctx->kfile))
+ return 0;
+
+ /* Serialize against the other threads trying to create/destroy Kbase contexts. */
+ mutex_lock(&kbdev->kctx_list_lock);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ if (!gpu_metrics_ctx) {
+ gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL);
+
+ if (gpu_metrics_ctx) {
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ } else {
+ dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed");
+ ret = -ENOMEM;
+ }
+ }
+
+ kctx->gpu_metrics_ctx = gpu_metrics_ctx;
+ mutex_unlock(&kbdev->kctx_list_lock);
+
+ return ret;
+}
+
+/**
+ * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it
+ * if the refcount becomes 0.
+ *
+ * @kctx: Pointer to the Kbase context.
+ */
+static inline void gpu_metrics_ctx_term(struct kbase_context *kctx)
+{
+ unsigned long flags;
+
+ /* Return early if this is not a Userspace created context */
+ if (unlikely(!kctx->kfile))
+ return;
+
+ /* Serialize against the other threads trying to create/destroy Kbase contexts. */
+ mutex_lock(&kctx->kbdev->kctx_list_lock);
+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+ kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx);
+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+ mutex_unlock(&kctx->kbdev->kctx_list_lock);
+}
+#endif
+
/**
* core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements
* @features: JSn_FEATURE register value
@@ -602,6 +728,21 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev)
}
}
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ if (!gpu_metrics_tp_emit_interval_ns || (gpu_metrics_tp_emit_interval_ns > NSEC_PER_SEC)) {
+ dev_warn(
+ kbdev->dev,
+ "Invalid value (%lu ns) for module param gpu_metrics_tp_emit_interval_ns. Using default value: %u ns",
+ gpu_metrics_tp_emit_interval_ns, DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS);
+ gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS;
+ }
+
+ hrtimer_init(&jsdd->gpu_metrics_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ jsdd->gpu_metrics_timer.function = gpu_metrics_timer_callback;
+ jsdd->gpu_metrics_timer_needed = false;
+ jsdd->gpu_metrics_timer_running = false;
+#endif
+
return 0;
}
@@ -626,16 +767,29 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev)
zero_ctx_attr_ref_count,
sizeof(zero_ctx_attr_ref_count)) == 0);
CSTD_UNUSED(zero_ctx_attr_ref_count);
+
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ js_devdata->gpu_metrics_timer_needed = false;
+ hrtimer_cancel(&js_devdata->gpu_metrics_timer);
+#endif
}
int kbasep_js_kctx_init(struct kbase_context *const kctx)
{
struct kbasep_js_kctx_info *js_kctx_info;
int i, j;
+ int ret;
CSTD_UNUSED(js_kctx_info);
KBASE_DEBUG_ASSERT(kctx != NULL);
+ CSTD_UNUSED(ret);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ ret = gpu_metrics_ctx_init(kctx);
+ if (ret)
+ return ret;
+#endif
+
kbase_ctx_sched_init_ctx(kctx);
for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i)
@@ -715,6 +869,9 @@ void kbasep_js_kctx_term(struct kbase_context *kctx)
}
kbase_ctx_sched_remove_ctx(kctx);
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+ gpu_metrics_ctx_term(kctx);
+#endif
}
/*
diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c
index cfafd11..f0c4da7 100644
--- a/mali_kbase/mali_kbase_kinstr_prfcnt.c
+++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,7 +36,6 @@
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/slab.h>
-#include <linux/overflow.h>
#include <linux/version_compat_defs.h>
#include <linux/workqueue.h>
@@ -1267,8 +1266,10 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
{
- if (WARN_ON(!kinstr_ctx))
+ if (!kinstr_ctx) {
+ pr_warn("%s: kinstr_ctx is NULL\n", __func__);
return;
+ }
mutex_lock(&kinstr_ctx->lock);
@@ -1297,8 +1298,10 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
{
- if (WARN_ON(!kinstr_ctx))
+ if (!kinstr_ctx) {
+ pr_warn("%s: kinstr_ctx is NULL\n", __func__);
return;
+ }
mutex_lock(&kinstr_ctx->lock);
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 8912783..c07d520 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -43,7 +43,7 @@
#include <mmu/mali_kbase_mmu.h>
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_trace_gpu_mem.h>
-
+#include <linux/version_compat_defs.h>
#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
@@ -101,56 +101,66 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
return cpu_va_bits;
}
-/* This function finds out which RB tree the given pfn from the GPU VA belongs
- * to based on the memory zone the pfn refers to
- */
-static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
- u64 gpu_pfn)
+unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone)
{
- struct rb_root *rbtree = NULL;
+ return ((((unsigned long)zone) & ((1 << KBASE_REG_ZONE_BITS) - 1ul))
+ << KBASE_REG_ZONE_SHIFT);
+}
- struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits)
+{
+ return (enum kbase_memory_zone)(((zone_bits) & KBASE_REG_ZONE_MASK)
+ >> KBASE_REG_ZONE_SHIFT);
+}
+char *kbase_reg_zone_get_name(enum kbase_memory_zone zone)
+{
+ switch (zone) {
+ case SAME_VA_ZONE:
+ return "SAME_VA";
+ case CUSTOM_VA_ZONE:
+ return "CUSTOM_VA";
+ case EXEC_VA_ZONE:
+ return "EXEC_VA";
#if MALI_USE_CSF
- struct kbase_reg_zone *fixed_va_zone =
- kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA);
-
- struct kbase_reg_zone *exec_fixed_va_zone =
- kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA);
-
- if (gpu_pfn >= fixed_va_zone->base_pfn) {
- rbtree = &kctx->reg_rbtree_fixed;
- return rbtree;
- } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) {
- rbtree = &kctx->reg_rbtree_exec_fixed;
- return rbtree;
- }
+ case MCU_SHARED_ZONE:
+ return "MCU_SHARED";
+ case EXEC_FIXED_VA_ZONE:
+ return "EXEC_FIXED_VA";
+ case FIXED_VA_ZONE:
+ return "FIXED_VA";
#endif
- if (gpu_pfn >= exec_va_zone->base_pfn)
- rbtree = &kctx->reg_rbtree_exec;
- else {
- u64 same_va_end;
+ default:
+ return NULL;
+ }
+}
- if (kbase_ctx_compat_mode(kctx)) {
- same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
- } else {
- struct kbase_reg_zone *same_va_zone =
- kbase_ctx_reg_zone_get(kctx,
- KBASE_REG_ZONE_SAME_VA);
- same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
- }
+/**
+ * kbase_gpu_pfn_to_rbtree - find the rb-tree tracking the region with the indicated GPU
+ * page frame number
+ * @kctx: kbase context
+ * @gpu_pfn: GPU PFN address
+ *
+ * Context: any context.
+ *
+ * Return: reference to the rb-tree root, NULL if not found
+ */
+static struct rb_root *kbase_gpu_pfn_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn)
+{
+ enum kbase_memory_zone zone_idx;
+ struct kbase_reg_zone *zone;
- if (gpu_pfn >= same_va_end)
- rbtree = &kctx->reg_rbtree_custom;
- else
- rbtree = &kctx->reg_rbtree_same;
+ for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+ zone = &kctx->reg_zone[zone_idx];
+ if ((gpu_pfn >= zone->base_pfn) && (gpu_pfn < kbase_reg_zone_end_pfn(zone)))
+ return &zone->reg_rbtree;
}
- return rbtree;
+ return NULL;
}
/* This function inserts a region into the tree. */
-static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
+void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
{
u64 start_pfn = new_reg->start_pfn;
struct rb_node **link = NULL;
@@ -251,7 +261,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
lockdep_assert_held(&kctx->reg_lock);
- rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+ rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn);
+ if (unlikely(!rbtree))
+ return NULL;
return kbase_find_region_enclosing_address(rbtree, gpu_addr);
}
@@ -289,7 +301,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
lockdep_assert_held(&kctx->reg_lock);
- rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+ rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn);
+ if (unlikely(!rbtree))
+ return NULL;
return kbase_find_region_base_address(rbtree, gpu_addr);
}
@@ -376,6 +390,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
struct kbase_va_region *reg)
{
struct rb_node *rbprev;
+ struct kbase_reg_zone *zone = container_of(reg->rbtree, struct kbase_reg_zone, reg_rbtree);
struct kbase_va_region *prev = NULL;
struct rb_node *rbnext;
struct kbase_va_region *next = NULL;
@@ -400,8 +415,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
*/
u64 prev_end_pfn = prev->start_pfn + prev->nr_pages;
- WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
- (reg->flags & KBASE_REG_ZONE_MASK));
+ WARN_ON((kbase_bits_to_zone(prev->flags)) !=
+ (kbase_bits_to_zone(reg->flags)));
if (!WARN_ON(reg->start_pfn < prev_end_pfn))
prev->nr_pages += reg->start_pfn - prev_end_pfn;
prev->nr_pages += reg->nr_pages;
@@ -422,8 +437,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
*/
u64 reg_end_pfn = reg->start_pfn + reg->nr_pages;
- WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
- (reg->flags & KBASE_REG_ZONE_MASK));
+ WARN_ON((kbase_bits_to_zone(next->flags)) !=
+ (kbase_bits_to_zone(reg->flags)));
if (!WARN_ON(next->start_pfn < reg_end_pfn))
next->nr_pages += next->start_pfn - reg_end_pfn;
next->start_pfn = reg->start_pfn;
@@ -445,8 +460,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
*/
struct kbase_va_region *free_reg;
- free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
- reg->flags & KBASE_REG_ZONE_MASK);
+ free_reg = kbase_alloc_free_region(zone, reg->start_pfn, reg->nr_pages);
if (!free_reg) {
/* In case of failure, we cannot allocate a replacement
* free region, so we will be left with a 'gap' in the
@@ -507,6 +521,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
size_t nr_pages)
{
struct rb_root *reg_rbtree = NULL;
+ struct kbase_reg_zone *zone =
+ container_of(at_reg->rbtree, struct kbase_reg_zone, reg_rbtree);
int err = 0;
reg_rbtree = at_reg->rbtree;
@@ -548,9 +564,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
else {
struct kbase_va_region *new_front_reg;
- new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
- start_pfn - at_reg->start_pfn,
- at_reg->flags & KBASE_REG_ZONE_MASK);
+ new_front_reg = kbase_alloc_free_region(zone, at_reg->start_pfn,
+ start_pfn - at_reg->start_pfn);
if (new_front_reg) {
at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -603,9 +618,9 @@ int kbase_add_va_region(struct kbase_context *kctx,
#endif
if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
#if MALI_USE_CSF
- ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) &&
+ ((kbase_bits_to_zone(reg->flags)) != EXEC_FIXED_VA_ZONE) &&
#endif
- ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
+ ((kbase_bits_to_zone(reg->flags)) != EXEC_VA_ZONE)) {
if (cpu_va_bits > gpu_pc_bits) {
align = max(align, (size_t)((1ULL << gpu_pc_bits)
>> PAGE_SHIFT));
@@ -623,8 +638,7 @@ int kbase_add_va_region(struct kbase_context *kctx,
* then don't retry, we're out of VA and there is
* nothing which can be done about it.
*/
- if ((reg->flags & KBASE_REG_ZONE_MASK) !=
- KBASE_REG_ZONE_CUSTOM_VA)
+ if ((kbase_bits_to_zone(reg->flags)) != CUSTOM_VA_ZONE)
break;
} while (kbase_jit_evict(kctx));
@@ -728,119 +742,27 @@ exit:
return err;
}
-/*
- * @brief Initialize the internal region tracker data structure.
+/**
+ * kbase_reg_to_kctx - Obtain the kbase context tracking a VA region.
+ * @reg: VA region
+ *
+ * Return:
+ * * pointer to kbase context of the memory allocation
+ * * NULL if the region does not belong to a kbase context (for instance,
+ * if the allocation corresponds to a shared MCU region on CSF).
*/
-#if MALI_USE_CSF
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
- struct kbase_va_region *same_va_reg,
- struct kbase_va_region *custom_va_reg,
- struct kbase_va_region *exec_va_reg,
- struct kbase_va_region *exec_fixed_va_reg,
- struct kbase_va_region *fixed_va_reg)
-{
- u64 last_zone_end_pfn;
-
- kctx->reg_rbtree_same = RB_ROOT;
- kbase_region_tracker_insert(same_va_reg);
-
- last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages;
-
- /* Although custom_va_reg doesn't always exist, initialize
- * unconditionally because of the mem_view debugfs
- * implementation which relies on it being empty.
- */
- kctx->reg_rbtree_custom = RB_ROOT;
- kctx->reg_rbtree_exec = RB_ROOT;
-
- if (custom_va_reg) {
- WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(custom_va_reg);
- last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages;
- }
-
- /* Initialize exec, fixed and exec_fixed. These are always
- * initialized at this stage, if they will exist at all.
- */
- kctx->reg_rbtree_fixed = RB_ROOT;
- kctx->reg_rbtree_exec_fixed = RB_ROOT;
-
- if (exec_va_reg) {
- WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(exec_va_reg);
- last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages;
- }
-
- if (exec_fixed_va_reg) {
- WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(exec_fixed_va_reg);
- last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages;
- }
-
- if (fixed_va_reg) {
- WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(fixed_va_reg);
- last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages;
- }
-}
-#else
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
- struct kbase_va_region *same_va_reg,
- struct kbase_va_region *custom_va_reg)
-{
- kctx->reg_rbtree_same = RB_ROOT;
- kbase_region_tracker_insert(same_va_reg);
-
- /* Although custom_va_reg and exec_va_reg don't always exist,
- * initialize unconditionally because of the mem_view debugfs
- * implementation which relies on them being empty.
- *
- * The difference between the two is that the EXEC_VA region
- * is never initialized at this stage.
- */
- kctx->reg_rbtree_custom = RB_ROOT;
- kctx->reg_rbtree_exec = RB_ROOT;
-
- if (custom_va_reg)
- kbase_region_tracker_insert(custom_va_reg);
-}
-#endif /* MALI_USE_CSF */
-
-static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
+static struct kbase_context *kbase_reg_to_kctx(struct kbase_va_region *reg)
{
- struct kbase_context *kctx = NULL;
struct rb_root *rbtree = reg->rbtree;
+ struct kbase_reg_zone *zone = container_of(rbtree, struct kbase_reg_zone, reg_rbtree);
- switch (reg->flags & KBASE_REG_ZONE_MASK) {
- case KBASE_REG_ZONE_CUSTOM_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
- break;
- case KBASE_REG_ZONE_SAME_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
- break;
- case KBASE_REG_ZONE_EXEC_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
- break;
-#if MALI_USE_CSF
- case KBASE_REG_ZONE_EXEC_FIXED_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
- break;
- case KBASE_REG_ZONE_FIXED_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
- break;
- case KBASE_REG_ZONE_MCU_SHARED:
- /* This is only expected to be called on driver unload. */
- break;
-#endif
- default:
- WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
- break;
- }
+ if (!kbase_is_ctx_reg_zone(zone->id))
+ return NULL;
- return kctx;
+ return container_of(zone - zone->id, struct kbase_context, reg_zone[0]);
}
-static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
{
struct rb_node *rbnode;
struct kbase_va_region *reg;
@@ -851,8 +773,12 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
rb_erase(rbnode, rbtree);
reg = rb_entry(rbnode, struct kbase_va_region, rblink);
WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
- if (kbase_page_migration_enabled)
- kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
+ if (kbase_is_page_migration_enabled()) {
+ struct kbase_context *kctx = kbase_reg_to_kctx(reg);
+
+ if (kctx)
+ kbase_gpu_munmap(kctx, reg);
+ }
/* Reset the start_pfn - as the rbtree is being
* destroyed and we've already erased this region, there
* is no further need to attempt to remove it.
@@ -867,209 +793,261 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
} while (rbnode);
}
-void kbase_region_tracker_term(struct kbase_context *kctx)
-{
- WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
- "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
- kctx->tgid, kctx->id);
-
- kbase_gpu_vm_lock(kctx);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
-#if MALI_USE_CSF
- WARN_ON(!list_empty(&kctx->csf.event_pages_head));
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed);
-
-#endif
- kbase_gpu_vm_unlock(kctx);
-}
-
-void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
-{
- kbase_region_tracker_erase_rbtree(rbtree);
-}
-
static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
{
return min_t(size_t, kbase_get_num_cpu_va_bits(kctx),
kctx->kbdev->gpu_props.mmu.va_bits);
}
-int kbase_region_tracker_init(struct kbase_context *kctx)
+static int kbase_reg_zone_same_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
{
- struct kbase_va_region *same_va_reg;
- struct kbase_va_region *custom_va_reg = NULL;
- size_t same_va_bits = kbase_get_same_va_bits(kctx);
- u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
- u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
- u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
- u64 same_va_pages;
- u64 same_va_base = 1u;
int err;
-#if MALI_USE_CSF
- struct kbase_va_region *exec_va_reg;
- struct kbase_va_region *exec_fixed_va_reg;
- struct kbase_va_region *fixed_va_reg;
-
- u64 exec_va_base;
- u64 fixed_va_end;
- u64 exec_fixed_va_base;
- u64 fixed_va_base;
- u64 fixed_va_pages;
-#endif
-
- /* Take the lock as kbase_free_alloced_region requires it */
- kbase_gpu_vm_lock(kctx);
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
+ const size_t same_va_bits = kbase_get_same_va_bits(kctx);
+ const u64 base_pfn = 1u;
+ u64 nr_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - base_pfn;
- same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
+ lockdep_assert_held(&kctx->reg_lock);
#if MALI_USE_CSF
- if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
+ if ((base_pfn + nr_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
/* Depending on how the kernel is configured, it's possible (eg on aarch64) for
* same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone
* doesn't cross into the exec_va zone.
*/
- same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base;
+ nr_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - base_pfn;
}
#endif
+ err = kbase_reg_zone_init(kctx->kbdev, zone, SAME_VA_ZONE, base_pfn, nr_pages);
+ if (err)
+ return -ENOMEM;
- /* all have SAME_VA */
- same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
- same_va_pages, KBASE_REG_ZONE_SAME_VA);
+ kctx->gpu_va_end = base_pfn + nr_pages;
- if (!same_va_reg) {
- err = -ENOMEM;
- goto fail_unlock;
- }
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
- same_va_pages);
+ return 0;
+}
- if (kbase_ctx_compat_mode(kctx)) {
- if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
- err = -EINVAL;
- goto fail_free_same_va;
- }
- /* If the current size of TMEM is out of range of the
- * virtual address space addressable by the MMU then
- * we should shrink it to fit
- */
- if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
- custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+static void kbase_reg_zone_same_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
- custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
- custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+ kbase_reg_zone_term(zone);
+}
- if (!custom_va_reg) {
- err = -ENOMEM;
- goto fail_free_same_va;
- }
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
- custom_va_size);
- } else {
- custom_va_size = 0;
- }
+static int kbase_reg_zone_custom_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
+ u64 nr_pages = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
-#if MALI_USE_CSF
- /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
- exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+ /* If the context does not support CUSTOM_VA zones, then we don't need to
+ * proceed past this point, and can pretend that it was initialized properly.
+ * In practice, this will mean that the zone metadata structure will be zero
+ * initialized and not contain a valid zone ID.
+ */
+ if (!kbase_ctx_compat_mode(kctx))
+ return 0;
+
+ if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+ return -EINVAL;
- /* Similarly the end of the FIXED_VA zone also depends on whether the client
- * is 32 or 64-bits.
+ /* If the current size of TMEM is out of range of the
+ * virtual address space addressable by the MMU then
+ * we should shrink it to fit
*/
- fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+ if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+ nr_pages = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
- if (kbase_ctx_compat_mode(kctx)) {
- exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
- fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
- }
+ if (kbase_reg_zone_init(kctx->kbdev, zone, CUSTOM_VA_ZONE, KBASE_REG_ZONE_CUSTOM_VA_BASE,
+ nr_pages))
+ return -ENOMEM;
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
- KBASE_REG_ZONE_EXEC_VA_SIZE);
+ /* On JM systems, this is the last memory zone that gets initialized,
+ * so the GPU VA ends right after the end of the CUSTOM_VA zone. On CSF,
+ * setting here is harmless, as the FIXED_VA initializer will overwrite
+ * it
+ */
+ kctx->gpu_va_end += nr_pages;
- exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
- KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
+ return 0;
+}
- if (!exec_va_reg) {
- err = -ENOMEM;
- goto fail_free_custom_va;
- }
+static void kbase_reg_zone_custom_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
- exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE;
+ kbase_reg_zone_term(zone);
+}
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base,
- KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+static inline u64 kbase_get_exec_va_zone_base(struct kbase_context *kctx)
+{
+ u64 base_pfn;
- exec_fixed_va_reg =
- kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
- exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
- KBASE_REG_ZONE_EXEC_FIXED_VA);
+#if MALI_USE_CSF
+ base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+ if (kbase_ctx_compat_mode(kctx))
+ base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_32;
+#else
+ /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
+ * initially U64_MAX
+ */
+ base_pfn = U64_MAX;
+#endif
- if (!exec_fixed_va_reg) {
- err = -ENOMEM;
- goto fail_free_exec_va;
- }
+ return base_pfn;
+}
+
+static inline int kbase_reg_zone_exec_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
+ const u64 base_pfn = kbase_get_exec_va_zone_base(kctx);
+ u64 nr_pages = KBASE_REG_ZONE_EXEC_VA_SIZE;
+
+#if !MALI_USE_CSF
+ nr_pages = 0;
+#endif
- fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
- fixed_va_pages = fixed_va_end - fixed_va_base;
+ return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_VA_ZONE, base_pfn, nr_pages);
+}
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
+static void kbase_reg_zone_exec_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
- fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
- fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
+ kbase_reg_zone_term(zone);
+}
+
+#if MALI_USE_CSF
+static inline u64 kbase_get_exec_fixed_va_zone_base(struct kbase_context *kctx)
+{
+ return kbase_get_exec_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_VA_SIZE;
+}
+
+static int kbase_reg_zone_exec_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE);
+ const u64 base_pfn = kbase_get_exec_fixed_va_zone_base(kctx);
+
+ return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_FIXED_VA_ZONE, base_pfn,
+ KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+}
+
+static void kbase_reg_zone_exec_fixed_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE);
+
+ WARN_ON(!list_empty(&kctx->csf.event_pages_head));
+ kbase_reg_zone_term(zone);
+}
+
+static int kbase_reg_zone_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE);
+ const u64 base_pfn =
+ kbase_get_exec_fixed_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
+ u64 fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+ u64 nr_pages;
+
+ if (kbase_ctx_compat_mode(kctx))
+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
+
+ nr_pages = fixed_va_end - base_pfn;
+
+ if (kbase_reg_zone_init(kctx->kbdev, zone, FIXED_VA_ZONE, base_pfn, nr_pages))
+ return -ENOMEM;
kctx->gpu_va_end = fixed_va_end;
- if (!fixed_va_reg) {
- err = -ENOMEM;
- goto fail_free_exec_fixed_va;
- }
+ return 0;
+}
- kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg,
- exec_fixed_va_reg, fixed_va_reg);
+static void kbase_reg_zone_fixed_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE);
- INIT_LIST_HEAD(&kctx->csf.event_pages_head);
-#else
- /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
- * initially U64_MAX
- */
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u);
- /* Other zones are 0: kbase_create_context() uses vzalloc */
+ kbase_reg_zone_term(zone);
+}
+#endif
+
+typedef int kbase_memory_zone_init(struct kbase_context *kctx, u64 gpu_va_limit);
+typedef void kbase_memory_zone_term(struct kbase_context *kctx);
- kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
- kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
+struct kbase_memory_zone_init_meta {
+ kbase_memory_zone_init *init;
+ kbase_memory_zone_term *term;
+ char *error_msg;
+};
+
+static const struct kbase_memory_zone_init_meta zones_init[] = {
+ [SAME_VA_ZONE] = { kbase_reg_zone_same_va_init, kbase_reg_zone_same_va_term,
+ "Could not initialize SAME_VA zone" },
+ [CUSTOM_VA_ZONE] = { kbase_reg_zone_custom_va_init, kbase_reg_zone_custom_va_term,
+ "Could not initialize CUSTOM_VA zone" },
+ [EXEC_VA_ZONE] = { kbase_reg_zone_exec_va_init, kbase_reg_zone_exec_va_term,
+ "Could not initialize EXEC_VA zone" },
+#if MALI_USE_CSF
+ [EXEC_FIXED_VA_ZONE] = { kbase_reg_zone_exec_fixed_va_init,
+ kbase_reg_zone_exec_fixed_va_term,
+ "Could not initialize EXEC_FIXED_VA zone" },
+ [FIXED_VA_ZONE] = { kbase_reg_zone_fixed_va_init, kbase_reg_zone_fixed_va_term,
+ "Could not initialize FIXED_VA zone" },
#endif
- kctx->jit_va = false;
+};
- kbase_gpu_vm_unlock(kctx);
- return 0;
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+ const u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
+ const u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
+ int err;
+ unsigned int i;
+ /* Take the lock as kbase_free_alloced_region requires it */
+ kbase_gpu_vm_lock(kctx);
+
+ for (i = 0; i < ARRAY_SIZE(zones_init); i++) {
+ err = zones_init[i].init(kctx, gpu_va_limit);
+ if (unlikely(err)) {
+ dev_err(kctx->kbdev->dev, "%s, err = %d\n", zones_init[i].error_msg, err);
+ goto term;
+ }
+ }
#if MALI_USE_CSF
-fail_free_exec_fixed_va:
- kbase_free_alloced_region(exec_fixed_va_reg);
-fail_free_exec_va:
- kbase_free_alloced_region(exec_va_reg);
-fail_free_custom_va:
- if (custom_va_reg)
- kbase_free_alloced_region(custom_va_reg);
+ INIT_LIST_HEAD(&kctx->csf.event_pages_head);
#endif
+ kctx->jit_va = false;
+
+ kbase_gpu_vm_unlock(kctx);
+
+ return 0;
+term:
+ while (i-- > 0)
+ zones_init[i].term(kctx);
-fail_free_same_va:
- kbase_free_alloced_region(same_va_reg);
-fail_unlock:
kbase_gpu_vm_unlock(kctx);
return err;
}
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+ unsigned int i;
+
+ WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
+ kctx->tgid, kctx->id);
+
+ kbase_gpu_vm_lock(kctx);
+
+ for (i = 0; i < ARRAY_SIZE(zones_init); i++)
+ zones_init[i].term(kctx);
+
+ kbase_gpu_vm_unlock(kctx);
+}
+
static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx)
{
struct kbase_reg_zone *exec_va_zone;
lockdep_assert_held(&kctx->reg_lock);
- exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+ exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
return (exec_va_zone->base_pfn != U64_MAX);
}
@@ -1109,16 +1087,16 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
lockdep_assert_held(&kctx->reg_lock);
- for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) {
+ for (zone_idx = 0; zone_idx < MEMORY_ZONE_MAX; zone_idx++) {
struct kbase_reg_zone *zone;
struct kbase_va_region *reg;
u64 zone_base_addr;
- unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
- unsigned long reg_zone;
+ enum kbase_memory_zone reg_zone;
- if (!kbase_is_ctx_reg_zone(zone_bits))
+ if (!kbase_is_ctx_reg_zone(zone_idx))
continue;
- zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
+
+ zone = kbase_ctx_reg_zone_get(kctx, zone_idx);
zone_base_addr = zone->base_pfn << PAGE_SHIFT;
reg = kbase_region_tracker_find_region_base_address(
@@ -1126,21 +1104,21 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
if (!zone->va_size_pages) {
WARN(reg,
- "Should not have found a region that starts at 0x%.16llx for zone 0x%lx",
- (unsigned long long)zone_base_addr, zone_bits);
+ "Should not have found a region that starts at 0x%.16llx for zone %s",
+ (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx));
continue;
}
if (WARN(!reg,
- "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it",
- (unsigned long long)zone_base_addr, zone_bits))
+ "There should always be a region that starts at 0x%.16llx for zone %s, couldn't find it",
+ (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx)))
return true; /* Safest return value */
- reg_zone = reg->flags & KBASE_REG_ZONE_MASK;
- if (WARN(reg_zone != zone_bits,
- "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx",
- (unsigned long long)zone_base_addr, zone_bits,
- reg_zone))
+ reg_zone = kbase_bits_to_zone(reg->flags);
+ if (WARN(reg_zone != zone_idx,
+ "The region that starts at 0x%.16llx should be in zone %s but was found in the wrong zone %s",
+ (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx),
+ kbase_reg_zone_get_name(reg_zone)))
return true; /* Safest return value */
/* Unless the region is completely free, of the same size as
@@ -1161,10 +1139,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
u64 jit_va_pages)
{
struct kbase_va_region *same_va_reg;
- struct kbase_reg_zone *same_va_zone;
+ struct kbase_reg_zone *same_va_zone, *custom_va_zone;
u64 same_va_zone_base_addr;
- const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA;
- struct kbase_va_region *custom_va_reg;
u64 jit_va_start;
lockdep_assert_held(&kctx->reg_lock);
@@ -1175,14 +1151,14 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
* cause an overlap to happen with existing same VA allocations and the
* custom VA zone.
*/
- same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits);
+ same_va_zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT;
same_va_reg = kbase_region_tracker_find_region_base_address(
kctx, same_va_zone_base_addr);
if (WARN(!same_va_reg,
- "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
- (unsigned long long)same_va_zone_base_addr, same_va_zone_bits))
+ "Already found a free region at the start of every zone, but now cannot find any region for zone SAME_VA base 0x%.16llx",
+ (unsigned long long)same_va_zone_base_addr))
return -ENOMEM;
/* kbase_region_tracker_has_allocs() in the caller has already ensured
@@ -1203,24 +1179,15 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
/*
* Create a custom VA zone at the end of the VA for allocations which
- * JIT can use so it doesn't have to allocate VA from the kernel.
+ * JIT can use so it doesn't have to allocate VA from the kernel. Note
+ * that while the zone has already been zero-initialized during the
+ * region tracker initialization, we can just overwrite it.
*/
- custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
- jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
-
- /*
- * The context will be destroyed if we fail here so no point
- * reverting the change we made to same_va.
- */
- if (!custom_va_reg)
+ custom_va_zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
+ if (kbase_reg_zone_init(kctx->kbdev, custom_va_zone, CUSTOM_VA_ZONE, jit_va_start,
+ jit_va_pages))
return -ENOMEM;
- /* Since this is 64-bit, the custom zone will not have been
- * initialized, so initialize it now
- */
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start,
- jit_va_pages);
- kbase_region_tracker_insert(custom_va_reg);
return 0;
}
@@ -1291,12 +1258,11 @@ exit_unlock:
int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
{
#if !MALI_USE_CSF
- struct kbase_va_region *exec_va_reg;
struct kbase_reg_zone *exec_va_zone;
struct kbase_reg_zone *target_zone;
struct kbase_va_region *target_reg;
u64 target_zone_base_addr;
- unsigned long target_zone_bits;
+ enum kbase_memory_zone target_zone_id;
u64 exec_va_start;
int err;
#endif
@@ -1342,20 +1308,21 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
if (kbase_ctx_compat_mode(kctx)) {
/* 32-bit client: take from CUSTOM_VA zone */
- target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
+ target_zone_id = CUSTOM_VA_ZONE;
} else {
/* 64-bit client: take from SAME_VA zone */
- target_zone_bits = KBASE_REG_ZONE_SAME_VA;
+ target_zone_id = SAME_VA_ZONE;
}
- target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
+ target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_id);
target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
target_reg = kbase_region_tracker_find_region_base_address(
kctx, target_zone_base_addr);
if (WARN(!target_reg,
- "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
- (unsigned long long)target_zone_base_addr, target_zone_bits)) {
+ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone %s",
+ (unsigned long long)target_zone_base_addr,
+ kbase_reg_zone_get_name(target_zone_id))) {
err = -ENOMEM;
goto exit_unlock;
}
@@ -1374,26 +1341,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
/* Taken from the end of the target zone */
exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
-
- exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
- exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
- if (!exec_va_reg) {
- err = -ENOMEM;
- goto exit_unlock;
- }
- /* Update EXEC_VA zone
- *
- * not using kbase_ctx_reg_zone_init() - it was already initialized
- */
- exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
- exec_va_zone->base_pfn = exec_va_start;
- exec_va_zone->va_size_pages = exec_va_pages;
+ exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
+ if (kbase_reg_zone_init(kctx->kbdev, exec_va_zone, EXEC_VA_ZONE, exec_va_start,
+ exec_va_pages))
+ return -ENOMEM;
/* Update target zone and corresponding region */
target_reg->nr_pages -= exec_va_pages;
target_zone->va_size_pages -= exec_va_pages;
-
- kbase_region_tracker_insert(exec_va_reg);
err = 0;
exit_unlock:
@@ -1405,28 +1360,13 @@ exit_unlock:
#if MALI_USE_CSF
void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev)
{
- kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree);
+ kbase_reg_zone_term(&kbdev->csf.mcu_shared_zone);
}
int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
{
- struct kbase_va_region *shared_reg;
- u64 shared_reg_start_pfn;
- u64 shared_reg_size;
-
- shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE;
- shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE;
-
- kbdev->csf.shared_reg_rbtree = RB_ROOT;
-
- shared_reg =
- kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
- shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
- if (!shared_reg)
- return -ENOMEM;
-
- kbase_region_tracker_insert(shared_reg);
- return 0;
+ return kbase_reg_zone_init(kbdev, &kbdev->csf.mcu_shared_zone, MCU_SHARED_ZONE,
+ KBASE_REG_ZONE_MCU_SHARED_BASE, MCU_SHARED_ZONE_SIZE);
}
#endif
@@ -1583,33 +1523,31 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
/**
* kbase_alloc_free_region - Allocate a free region object.
*
- * @kbdev: kbase device
- * @rbtree: Backlink to the red-black tree of memory regions.
+ * @zone: CUSTOM_VA_ZONE or SAME_VA_ZONE
* @start_pfn: The Page Frame Number in GPU virtual address space.
* @nr_pages: The size of the region in pages.
- * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA
*
* The allocated object is not part of any list yet, and is flagged as
* KBASE_REG_FREE. No mapping is allocated yet.
*
- * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
- *
* Return: pointer to the allocated region object on success, NULL otherwise.
*/
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn,
+ size_t nr_pages)
{
struct kbase_va_region *new_reg;
- KBASE_DEBUG_ASSERT(rbtree != NULL);
-
- /* zone argument should only contain zone related region flags */
- KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
KBASE_DEBUG_ASSERT(nr_pages > 0);
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
- new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
+ if (WARN_ON(!zone))
+ return NULL;
+
+ if (unlikely(!zone->base_pfn || !zone->va_size_pages))
+ return NULL;
+
+ new_reg = kmem_cache_zalloc(zone->cache, GFP_KERNEL);
if (!new_reg)
return NULL;
@@ -1618,8 +1556,8 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru
atomic_set(&new_reg->no_user_free_count, 0);
new_reg->cpu_alloc = NULL; /* no alloc bound yet */
new_reg->gpu_alloc = NULL; /* no alloc bound yet */
- new_reg->rbtree = rbtree;
- new_reg->flags = zone | KBASE_REG_FREE;
+ new_reg->rbtree = &zone->reg_rbtree;
+ new_reg->flags = kbase_zone_to_bits(zone->id) | KBASE_REG_FREE;
new_reg->flags |= KBASE_REG_GROWABLE;
@@ -1631,9 +1569,17 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru
return new_reg;
}
-
KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
+struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx,
+ enum kbase_memory_zone id, u64 start_pfn,
+ size_t nr_pages)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, id);
+
+ return kbase_alloc_free_region(zone, start_pfn, nr_pages);
+}
+
/**
* kbase_free_alloced_region - Free a region object.
*
@@ -1645,19 +1591,18 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
* alloc object will be released.
* It is a bug if no alloc object exists for non-free regions.
*
- * If region is KBASE_REG_ZONE_MCU_SHARED it is freed
+ * If region is MCU_SHARED_ZONE it is freed
*/
void kbase_free_alloced_region(struct kbase_va_region *reg)
{
#if MALI_USE_CSF
- if ((reg->flags & KBASE_REG_ZONE_MASK) ==
- KBASE_REG_ZONE_MCU_SHARED) {
+ if (kbase_bits_to_zone(reg->flags) == MCU_SHARED_ZONE) {
kfree(reg);
return;
}
#endif
if (!(reg->flags & KBASE_REG_FREE)) {
- struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+ struct kbase_context *kctx = kbase_reg_to_kctx(reg);
if (WARN_ON(!kctx))
return;
@@ -1665,8 +1610,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
if (WARN_ON(kbase_is_region_invalid(reg)))
return;
- dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n",
- (void *)reg);
+ dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n of zone %s", (void *)reg,
+ kbase_reg_zone_get_name(kbase_bits_to_zone(reg->flags)));
#if MALI_USE_CSF
if (reg->flags & KBASE_REG_CSF_EVENT)
/*
@@ -1802,8 +1747,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
} else {
if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
-
- err = kbase_mmu_insert_imported_pages(
+ err = kbase_mmu_insert_pages_skip_status_update(
kctx->kbdev, &kctx->mmu, reg->start_pfn,
kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
@@ -1812,7 +1756,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
kbase_get_gpu_phy_pages(reg),
kbase_reg_current_backed_size(reg),
reg->flags & gwt_mask, kctx->as_nr, group_id,
- mmu_sync_info, reg, true);
+ mmu_sync_info, reg);
}
if (err)
@@ -1856,8 +1800,7 @@ bad_aliased_insert:
kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
phys_alloc, alloc->imported.alias.aliased[i].length,
- alloc->imported.alias.aliased[i].length, kctx->as_nr,
- false);
+ alloc->imported.alias.aliased[i].length, kctx->as_nr);
}
bad_insert:
kbase_remove_va_region(kctx->kbdev, reg);
@@ -1868,7 +1811,7 @@ bad_insert:
KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
- struct kbase_va_region *reg, bool writeable);
+ struct kbase_va_region *reg);
int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
{
@@ -1889,9 +1832,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
size_t i = 0;
/* Due to the way the number of valid PTEs and ATEs are tracked
* currently, only the GPU virtual range that is backed & mapped
- * should be passed to the kbase_mmu_teardown_pages() function,
- * hence individual aliased regions needs to be unmapped
- * separately.
+ * should be passed to the page teardown function, hence individual
+ * aliased regions needs to be unmapped separately.
*/
for (i = 0; i < alloc->imported.alias.nents; i++) {
struct tagged_addr *phys_alloc = NULL;
@@ -1905,8 +1847,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
kctx->kbdev, &kctx->mmu,
reg->start_pfn + (i * alloc->imported.alias.stride),
phys_alloc, alloc->imported.alias.aliased[i].length,
- alloc->imported.alias.aliased[i].length, kctx->as_nr,
- false);
+ alloc->imported.alias.aliased[i].length, kctx->as_nr);
if (WARN_ON_ONCE(err_loop))
err = err_loop;
@@ -1928,17 +1869,19 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
if (reg->flags & KBASE_REG_IMPORT_PAD)
nr_phys_pages = alloc->nents + 1;
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, nr_phys_pages, nr_virt_pages,
- kctx->as_nr, true);
+ err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn, alloc->pages,
+ nr_phys_pages, nr_virt_pages,
+ kctx->as_nr);
}
break;
case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, nr_reg_pages, nr_reg_pages,
- kctx->as_nr, true);
+ err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn, alloc->pages,
+ nr_reg_pages, nr_reg_pages,
+ kctx->as_nr);
}
break;
default: {
@@ -1946,7 +1889,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
alloc->pages, nr_reg_pages, nr_reg_pages,
- kctx->as_nr, false);
+ kctx->as_nr);
}
break;
}
@@ -1966,9 +1909,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
/* The allocation could still have active mappings. */
if (user_buf->current_mapping_usage_count == 0) {
- kbase_jd_user_buf_unmap(kctx, alloc, reg,
- (reg->flags &
- (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)));
+ kbase_jd_user_buf_unmap(kctx, alloc, reg);
}
}
}
@@ -2112,18 +2053,18 @@ void kbase_sync_single(struct kbase_context *kctx,
dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset;
if (sync_fn == KBASE_SYNC_TO_DEVICE) {
- src = ((unsigned char *)kmap(cpu_page)) + offset;
- dst = ((unsigned char *)kmap(gpu_page)) + offset;
+ src = ((unsigned char *)kbase_kmap(cpu_page)) + offset;
+ dst = ((unsigned char *)kbase_kmap(gpu_page)) + offset;
} else if (sync_fn == KBASE_SYNC_TO_CPU) {
dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size,
DMA_BIDIRECTIONAL);
- src = ((unsigned char *)kmap(gpu_page)) + offset;
- dst = ((unsigned char *)kmap(cpu_page)) + offset;
+ src = ((unsigned char *)kbase_kmap(gpu_page)) + offset;
+ dst = ((unsigned char *)kbase_kmap(cpu_page)) + offset;
}
memcpy(dst, src, size);
- kunmap(gpu_page);
- kunmap(cpu_page);
+ kbase_kunmap(gpu_page, src);
+ kbase_kunmap(cpu_page, dst);
if (sync_fn == KBASE_SYNC_TO_DEVICE)
dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size,
DMA_BIDIRECTIONAL);
@@ -2303,8 +2244,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
}
#if MALI_USE_CSF
- if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) ||
- ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+ if (((kbase_bits_to_zone(reg->flags)) == FIXED_VA_ZONE) ||
+ ((kbase_bits_to_zone(reg->flags)) == EXEC_FIXED_VA_ZONE)) {
if (reg->flags & KBASE_REG_FIXED_ADDRESS)
atomic64_dec(&kctx->num_fixed_allocs);
else
@@ -2381,7 +2322,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
goto out_unlock;
}
- if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+ if ((kbase_bits_to_zone(reg->flags)) == SAME_VA_ZONE) {
/* SAME_VA must be freed through munmap */
dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
gpu_addr);
@@ -2544,6 +2485,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
* allocation is visible to the OOM killer
*/
kbase_process_page_usage_inc(kctx, nr_pages_requested);
+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
tp = alloc->pages + alloc->nents;
@@ -2665,8 +2607,6 @@ no_new_partial:
alloc->nents += nr_pages_requested;
- kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
-
done:
return 0;
@@ -2676,19 +2616,13 @@ alloc_failed:
size_t nr_pages_to_free = nr_pages_requested - nr_left;
alloc->nents += nr_pages_to_free;
-
- kbase_process_page_usage_inc(kctx, nr_pages_to_free);
- atomic_add(nr_pages_to_free, &kctx->used_pages);
- atomic_add(nr_pages_to_free,
- &kctx->kbdev->memdev.used_pages);
-
kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
}
- kbase_process_page_usage_dec(kctx, nr_pages_requested);
- atomic_sub(nr_pages_requested, &kctx->used_pages);
- atomic_sub(nr_pages_requested,
- &kctx->kbdev->memdev.used_pages);
+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_left);
+ kbase_process_page_usage_dec(kctx, nr_left);
+ atomic_sub(nr_left, &kctx->used_pages);
+ atomic_sub(nr_left, &kctx->kbdev->memdev.used_pages);
invalid_request:
return -ENOMEM;
@@ -2737,6 +2671,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
* allocation is visible to the OOM killer
*/
kbase_process_page_usage_inc(kctx, nr_pages_requested);
+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
tp = alloc->pages + alloc->nents;
new_pages = tp;
@@ -2839,8 +2774,6 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
alloc->nents += nr_pages_requested;
- kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
-
done:
return new_pages;
@@ -2877,6 +2810,7 @@ alloc_failed:
}
}
+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_pages_requested);
kbase_process_page_usage_dec(kctx, nr_pages_requested);
atomic_sub(nr_pages_requested, &kctx->used_pages);
atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages);
@@ -4539,7 +4473,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
/* A suitable JIT allocation existed on the evict list, so we need
* to make sure that the NOT_MOVABLE property is cleared.
*/
- if (kbase_page_migration_enabled) {
+ if (kbase_is_page_migration_enabled()) {
kbase_gpu_vm_lock(kctx);
mutex_lock(&kctx->jit_evict_lock);
kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
@@ -4717,14 +4651,14 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
* by page migration. Once freed, they will enter into the page migration
* state machine via the mempools.
*/
- if (kbase_page_migration_enabled)
+ if (kbase_is_page_migration_enabled())
kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
mutex_unlock(&kctx->jit_evict_lock);
}
void kbase_jit_backing_lost(struct kbase_va_region *reg)
{
- struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+ struct kbase_context *kctx = kbase_reg_to_kctx(reg);
if (WARN_ON(!kctx))
return;
@@ -5035,6 +4969,15 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ bool write;
+ enum dma_data_direction dma_dir;
+
+ /* If neither the CPU nor the GPU needs write access, use DMA_TO_DEVICE
+ * to avoid potentially-destructive CPU cache invalidates that could
+ * corruption of user data.
+ */
+ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+ dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
lockdep_assert_held(&kctx->reg_lock);
@@ -5068,9 +5011,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
for (i = 0; i < pinned_pages; i++) {
dma_addr_t dma_addr;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir);
#else
- dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir,
DMA_ATTR_SKIP_CPU_SYNC);
#endif
err = dma_mapping_error(dev, dma_addr);
@@ -5080,7 +5023,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -5088,10 +5031,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
- mmu_sync_info, NULL);
+ err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr,
+ alloc->group_id, mmu_sync_info, NULL);
if (err == 0)
return 0;
@@ -5111,12 +5054,11 @@ unwind:
for (i = 0; i < dma_mapped_pages; i++) {
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir);
#else
- dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC);
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
#endif
}
@@ -5134,17 +5076,113 @@ unwind:
return err;
}
+/* user_buf_sync_read_only_page - This function handles syncing a single page that has read access,
+ * only, on both the CPU and * GPU, so it is ready to be unmapped.
+ * @kctx: kbase context
+ * @imported_size: the number of bytes to sync
+ * @dma_addr: DMA address of the bytes to be sync'd
+ * @offset_within_page: (unused) offset of the bytes within the page. Passed so that the calling
+ * signature is identical to user_buf_sync_writable_page().
+ */
+static void user_buf_sync_read_only_page(struct kbase_context *kctx, unsigned long imported_size,
+ dma_addr_t dma_addr, unsigned long offset_within_page)
+{
+ /* Manual cache synchronization.
+ *
+ * Writes from neither the CPU nor GPU are possible via this mapping,
+ * so we just sync the entire page to the device.
+ */
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, imported_size, DMA_TO_DEVICE);
+}
+
+/* user_buf_sync_writable_page - This function handles syncing a single page that has read
+ * and writable access, from either (or both of) the CPU and GPU,
+ * so it is ready to be unmapped.
+ * @kctx: kbase context
+ * @imported_size: the number of bytes to unmap
+ * @dma_addr: DMA address of the bytes to be unmapped
+ * @offset_within_page: offset of the bytes within the page. This is the offset to the subrange of
+ * the memory that is "imported" and so is intended for GPU access. Areas of
+ * the page outside of this - whilst still GPU accessible - are not intended
+ * for use by GPU work, and should also not be modified as the userspace CPU
+ * threads may be modifying them.
+ */
+static void user_buf_sync_writable_page(struct kbase_context *kctx, unsigned long imported_size,
+ dma_addr_t dma_addr, unsigned long offset_within_page)
+{
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size -
+ * | |/ offset_within_page
+ * | |
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+}
+
/* This function would also perform the work of unpinning pages on Job Manager
* GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
* have a corresponding call to kbase_jd_user_buf_unpin_pages().
*/
static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
- struct kbase_va_region *reg, bool writeable)
+ struct kbase_va_region *reg)
{
long i;
struct page **pages;
unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
unsigned long remaining_size = alloc->imported.user_buf.size;
+ bool writable = (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR));
lockdep_assert_held(&kctx->reg_lock);
@@ -5153,8 +5191,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#if !MALI_USE_CSF
kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents);
-#else
- CSTD_UNUSED(reg);
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
@@ -5173,75 +5209,24 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
* whole memory page.
*/
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+ enum dma_data_direction dma_dir = writable ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
- /* Manual CPU cache synchronization.
- *
- * When the GPU returns ownership of the buffer to the CPU, the driver
- * needs to treat imported and non-imported memory differently.
- *
- * The first case to consider is non-imported sub-regions at the
- * beginning of the first page and at the end of last page. For these
- * sub-regions: CPU cache shall be committed with a clean+invalidate,
- * in order to keep the last CPU write.
- *
- * Imported region prefers the opposite treatment: this memory has been
- * legitimately mapped and used by the GPU, hence GPU writes shall be
- * committed to memory, while CPU cache shall be invalidated to make
- * sure that CPU reads the correct memory content.
- *
- * The following diagram shows the expect value of the variables
- * used in this loop in the corner case of an imported region encloed
- * by a single memory page:
- *
- * page boundary ->|---------- | <- dma_addr (initial value)
- * | |
- * | - - - - - | <- offset_within_page
- * |XXXXXXXXXXX|\
- * |XXXXXXXXXXX| \
- * |XXXXXXXXXXX| }- imported_size
- * |XXXXXXXXXXX| /
- * |XXXXXXXXXXX|/
- * | - - - - - | <- offset_within_page + imported_size
- * | |\
- * | | }- PAGE_SIZE - imported_size - offset_within_page
- * | |/
- * page boundary ->|-----------|
- *
- * If the imported region is enclosed by more than one page, then
- * offset_within_page = 0 for any page after the first.
- */
-
- /* Only for first page: handle non-imported range at the beginning. */
- if (offset_within_page > 0) {
- dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
- DMA_BIDIRECTIONAL);
- dma_addr += offset_within_page;
- }
-
- /* For every page: handle imported range. */
- if (imported_size > 0)
- dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
- DMA_BIDIRECTIONAL);
-
- /* Only for last page (that may coincide with first page):
- * handle non-imported range at the end.
- */
- if ((imported_size + offset_within_page) < PAGE_SIZE) {
- dma_addr += imported_size;
- dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
- PAGE_SIZE - imported_size - offset_within_page,
- DMA_BIDIRECTIONAL);
- }
+ if (writable)
+ user_buf_sync_writable_page(kctx, imported_size, dma_addr,
+ offset_within_page);
+ else
+ user_buf_sync_read_only_page(kctx, imported_size, dma_addr,
+ offset_within_page);
- /* Notice: use the original DMA address to unmap the whole memory page. */
+ /* Notice: use the original DMA address to unmap the whole memory page. */
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
- DMA_BIDIRECTIONAL);
+ dma_dir);
#else
dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
#endif
- if (writeable)
+ if (writable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
kbase_unpin_user_buf_page(pages[i]);
@@ -5260,7 +5245,8 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
void *src_page, size_t *to_copy, unsigned int nr_pages,
unsigned int *target_page_nr, size_t offset)
{
- void *target_page = kmap(dest_pages[*target_page_nr]);
+ void *target_page = kbase_kmap(dest_pages[*target_page_nr]);
+
size_t chunk = PAGE_SIZE-offset;
if (!target_page) {
@@ -5273,13 +5259,13 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
memcpy(target_page + offset, src_page, chunk);
*to_copy -= chunk;
- kunmap(dest_pages[*target_page_nr]);
+ kbase_kunmap(dest_pages[*target_page_nr], target_page);
*target_page_nr += 1;
if (*target_page_nr >= nr_pages || *to_copy == 0)
return 0;
- target_page = kmap(dest_pages[*target_page_nr]);
+ target_page = kbase_kmap(dest_pages[*target_page_nr]);
if (!target_page) {
pr_err("%s: kmap failure", __func__);
return -ENOMEM;
@@ -5291,7 +5277,7 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
memcpy(target_page, src_page + PAGE_SIZE-offset, chunk);
*to_copy -= chunk;
- kunmap(dest_pages[*target_page_nr]);
+ kbase_kunmap(dest_pages[*target_page_nr], target_page);
return 0;
}
@@ -5358,20 +5344,14 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r
alloc->imported.user_buf.current_mapping_usage_count--;
if (alloc->imported.user_buf.current_mapping_usage_count == 0) {
- bool writeable = true;
-
if (!kbase_is_region_invalid_or_free(reg)) {
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages,
- kbase_reg_current_backed_size(reg),
- kbase_reg_current_backed_size(reg),
- kctx->as_nr, true);
+ kbase_mmu_teardown_imported_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+ kbase_reg_current_backed_size(reg),
+ kbase_reg_current_backed_size(reg), kctx->as_nr);
}
- if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
- writeable = false;
-
- kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable);
+ kbase_jd_user_buf_unmap(kctx, alloc, reg);
}
}
break;
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 02e5509..aa67717 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -62,6 +62,186 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx,
#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316)
#define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630)
+/* Free region */
+#define KBASE_REG_FREE (1ul << 0)
+/* CPU write access */
+#define KBASE_REG_CPU_WR (1ul << 1)
+/* GPU write access */
+#define KBASE_REG_GPU_WR (1ul << 2)
+/* No eXecute flag */
+#define KBASE_REG_GPU_NX (1ul << 3)
+/* Is CPU cached? */
+#define KBASE_REG_CPU_CACHED (1ul << 4)
+/* Is GPU cached?
+ * Some components within the GPU might only be able to access memory that is
+ * GPU cacheable. Refer to the specific GPU implementation for more details.
+ */
+#define KBASE_REG_GPU_CACHED (1ul << 5)
+
+#define KBASE_REG_GROWABLE (1ul << 6)
+/* Can grow on pf? */
+#define KBASE_REG_PF_GROW (1ul << 7)
+
+/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
+#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
+
+/* inner shareable coherency */
+#define KBASE_REG_SHARE_IN (1ul << 9)
+/* inner & outer shareable coherency */
+#define KBASE_REG_SHARE_BOTH (1ul << 10)
+
+#if MALI_USE_CSF
+/* Space for 8 different zones */
+#define KBASE_REG_ZONE_BITS 3
+#else
+/* Space for 4 different zones */
+#define KBASE_REG_ZONE_BITS 2
+#endif
+
+/* The bits 11-13 (inclusive) of the kbase_va_region flag are reserved
+ * for information about the zone in which it was allocated.
+ */
+#define KBASE_REG_ZONE_SHIFT (11ul)
+#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << KBASE_REG_ZONE_SHIFT)
+
+#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS)
+#error "Too many zones for the number of zone bits defined"
+#endif
+
+/* GPU read access */
+#define KBASE_REG_GPU_RD (1ul << 14)
+/* CPU read access */
+#define KBASE_REG_CPU_RD (1ul << 15)
+
+/* Index of chosen MEMATTR for this region (0..7) */
+#define KBASE_REG_MEMATTR_MASK (7ul << 16)
+#define KBASE_REG_MEMATTR_INDEX(x) (((x)&7) << 16)
+#define KBASE_REG_MEMATTR_VALUE(x) (((x)&KBASE_REG_MEMATTR_MASK) >> 16)
+
+#define KBASE_REG_PROTECTED (1ul << 19)
+
+/* Region belongs to a shrinker.
+ *
+ * This can either mean that it is part of the JIT/Ephemeral or tiler heap
+ * shrinker paths. Should be removed only after making sure that there are
+ * no references remaining to it in these paths, as it may cause the physical
+ * backing of the region to disappear during use.
+ */
+#define KBASE_REG_DONT_NEED (1ul << 20)
+
+/* Imported buffer is padded? */
+#define KBASE_REG_IMPORT_PAD (1ul << 21)
+
+#if MALI_USE_CSF
+/* CSF event memory */
+#define KBASE_REG_CSF_EVENT (1ul << 22)
+/* Bit 23 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define KBASE_REG_RESERVED_BIT_23 (1ul << 23)
+#else
+/* Bit 22 is reserved.
+ *
+ * Do not remove, use the next unreserved bit for new flags
+ */
+#define KBASE_REG_RESERVED_BIT_22 (1ul << 22)
+/* The top of the initial commit is aligned to extension pages.
+ * Extent must be a power of 2
+ */
+#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23)
+#endif /* MALI_USE_CSF */
+
+/* Bit 24 is currently unused and is available for use for a new flag */
+
+/* Memory has permanent kernel side mapping */
+#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
+
+/* GPU VA region has been freed by the userspace, but still remains allocated
+ * due to the reference held by CPU mappings created on the GPU VA region.
+ *
+ * A region with this flag set has had kbase_gpu_munmap() called on it, but can
+ * still be looked-up in the region tracker as a non-free region. Hence must
+ * not create or update any more GPU mappings on such regions because they will
+ * not be unmapped when the region is finally destroyed.
+ *
+ * Since such regions are still present in the region tracker, new allocations
+ * attempted with BASE_MEM_SAME_VA might fail if their address intersects with
+ * a region with this flag set.
+ *
+ * In addition, this flag indicates the gpu_alloc member might no longer valid
+ * e.g. in infinite cache simulation.
+ */
+#define KBASE_REG_VA_FREED (1ul << 26)
+
+/* If set, the heap info address points to a u32 holding the used size in bytes;
+ * otherwise it points to a u64 holding the lowest address of unused memory.
+ */
+#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27)
+
+/* Allocation is actively used for JIT memory */
+#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28)
+
+#if MALI_USE_CSF
+/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA
+ * memory zones, and it determines whether they were created with a fixed
+ * GPU VA address requested by the user.
+ */
+#define KBASE_REG_FIXED_ADDRESS (1ul << 29)
+#else
+#define KBASE_REG_RESERVED_BIT_29 (1ul << 29)
+#endif
+
+#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT)
+
+#if MALI_USE_CSF
+/* only used with 32-bit clients */
+/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43).
+ */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+#else
+/* only used with 32-bit clients */
+/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the
+ * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44
+ * bytes, see mmap64 man page for reference). So we put the default limit to the
+ * maximum possible on Linux and shrink it down, if required by the GPU, during
+ * initialization.
+ */
+#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
+/* end 32-bit clients only */
+#endif
+
+/* The starting address and size of the GPU-executable zone are dynamic
+ * and depend on the platform and the number of pages requested by the
+ * user process, with an upper limit of 4 GB.
+ */
+#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */
+#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
+
+#if MALI_USE_CSF
+#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT)
+#define MCU_SHARED_ZONE_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - KBASE_REG_ZONE_MCU_SHARED_BASE)
+
+/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit
+ * clients, and 2^43 for 32-bit clients.
+ */
+#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT)
+/* Executable zone supporting FIXED/FIXABLE allocations.
+ * It is always 4GB in size.
+ */
+#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
+
+/* Non-executable zone supporting FIXED/FIXABLE allocations.
+ * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from
+ * (2^43) up to (2^44)-1 for 32-bit userspace clients. For the same reason,
+ * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1.
+ */
+#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT)
+#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT)
+
+#endif
+
/*
* A CPU mapping
*/
@@ -307,6 +487,32 @@ enum kbase_jit_report_flags {
};
/**
+ * kbase_zone_to_bits - Convert a memory zone @zone to the corresponding
+ * bitpattern, for ORing together with other flags.
+ * @zone: Memory zone
+ *
+ * Return: Bitpattern with the appropriate bits set.
+ */
+unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone);
+
+/**
+ * kbase_bits_to_zone - Convert the bitpattern @zone_bits to the corresponding
+ * zone identifier
+ * @zone_bits: Memory allocation flag containing a zone pattern
+ *
+ * Return: Zone identifier for valid zone bitpatterns,
+ */
+enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits);
+
+/**
+ * kbase_mem_zone_get_name - Get the string name for a given memory zone
+ * @zone: Memory zone identifier
+ *
+ * Return: string for valid memory zone, NULL otherwise
+ */
+char *kbase_reg_zone_get_name(enum kbase_memory_zone zone);
+
+/**
* kbase_set_phy_alloc_page_status - Set the page migration status of the underlying
* physical allocation.
* @alloc: the physical allocation containing the pages whose metadata is going
@@ -449,204 +655,6 @@ struct kbase_va_region {
size_t nr_pages;
size_t initial_commit;
size_t threshold_pages;
-
-/* Free region */
-#define KBASE_REG_FREE (1ul << 0)
-/* CPU write access */
-#define KBASE_REG_CPU_WR (1ul << 1)
-/* GPU write access */
-#define KBASE_REG_GPU_WR (1ul << 2)
-/* No eXecute flag */
-#define KBASE_REG_GPU_NX (1ul << 3)
-/* Is CPU cached? */
-#define KBASE_REG_CPU_CACHED (1ul << 4)
-/* Is GPU cached?
- * Some components within the GPU might only be able to access memory that is
- * GPU cacheable. Refer to the specific GPU implementation for more details.
- */
-#define KBASE_REG_GPU_CACHED (1ul << 5)
-
-#define KBASE_REG_GROWABLE (1ul << 6)
-/* Can grow on pf? */
-#define KBASE_REG_PF_GROW (1ul << 7)
-
-/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */
-#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8)
-
-/* inner shareable coherency */
-#define KBASE_REG_SHARE_IN (1ul << 9)
-/* inner & outer shareable coherency */
-#define KBASE_REG_SHARE_BOTH (1ul << 10)
-
-#if MALI_USE_CSF
-/* Space for 8 different zones */
-#define KBASE_REG_ZONE_BITS 3
-#else
-/* Space for 4 different zones */
-#define KBASE_REG_ZONE_BITS 2
-#endif
-
-#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << 11)
-#define KBASE_REG_ZONE(x) (((x) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) << 11)
-#define KBASE_REG_ZONE_IDX(x) (((x) & KBASE_REG_ZONE_MASK) >> 11)
-
-#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS)
-#error "Too many zones for the number of zone bits defined"
-#endif
-
-/* GPU read access */
-#define KBASE_REG_GPU_RD (1ul << 14)
-/* CPU read access */
-#define KBASE_REG_CPU_RD (1ul << 15)
-
-/* Index of chosen MEMATTR for this region (0..7) */
-#define KBASE_REG_MEMATTR_MASK (7ul << 16)
-#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16)
-#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16)
-
-#define KBASE_REG_PROTECTED (1ul << 19)
-
-/* Region belongs to a shrinker.
- *
- * This can either mean that it is part of the JIT/Ephemeral or tiler heap
- * shrinker paths. Should be removed only after making sure that there are
- * no references remaining to it in these paths, as it may cause the physical
- * backing of the region to disappear during use.
- */
-#define KBASE_REG_DONT_NEED (1ul << 20)
-
-/* Imported buffer is padded? */
-#define KBASE_REG_IMPORT_PAD (1ul << 21)
-
-#if MALI_USE_CSF
-/* CSF event memory */
-#define KBASE_REG_CSF_EVENT (1ul << 22)
-#else
-/* Bit 22 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define KBASE_REG_RESERVED_BIT_22 (1ul << 22)
-#endif
-
-#if !MALI_USE_CSF
-/* The top of the initial commit is aligned to extension pages.
- * Extent must be a power of 2
- */
-#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23)
-#else
-/* Bit 23 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define KBASE_REG_RESERVED_BIT_23 (1ul << 23)
-#endif /* !MALI_USE_CSF */
-
-/* Bit 24 is currently unused and is available for use for a new flag */
-
-/* Memory has permanent kernel side mapping */
-#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25)
-
-/* GPU VA region has been freed by the userspace, but still remains allocated
- * due to the reference held by CPU mappings created on the GPU VA region.
- *
- * A region with this flag set has had kbase_gpu_munmap() called on it, but can
- * still be looked-up in the region tracker as a non-free region. Hence must
- * not create or update any more GPU mappings on such regions because they will
- * not be unmapped when the region is finally destroyed.
- *
- * Since such regions are still present in the region tracker, new allocations
- * attempted with BASE_MEM_SAME_VA might fail if their address intersects with
- * a region with this flag set.
- *
- * In addition, this flag indicates the gpu_alloc member might no longer valid
- * e.g. in infinite cache simulation.
- */
-#define KBASE_REG_VA_FREED (1ul << 26)
-
-/* If set, the heap info address points to a u32 holding the used size in bytes;
- * otherwise it points to a u64 holding the lowest address of unused memory.
- */
-#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27)
-
-/* Allocation is actively used for JIT memory */
-#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28)
-
-#if MALI_USE_CSF
-/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA
- * memory zones, and it determines whether they were created with a fixed
- * GPU VA address requested by the user.
- */
-#define KBASE_REG_FIXED_ADDRESS (1ul << 29)
-#else
-#define KBASE_REG_RESERVED_BIT_29 (1ul << 29)
-#endif
-
-#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0)
-
-#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1)
-#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT)
-
-#if MALI_USE_CSF
-/* only used with 32-bit clients */
-/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43).
- */
-#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \
- (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
-#else
-/* only used with 32-bit clients */
-/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the
- * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44
- * bytes, see mmap64 man page for reference). So we put the default limit to the
- * maximum possible on Linux and shrink it down, if required by the GPU, during
- * initialization.
- */
-#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \
- (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE)
-/* end 32-bit clients only */
-#endif
-
-/* The starting address and size of the GPU-executable zone are dynamic
- * and depend on the platform and the number of pages requested by the
- * user process, with an upper limit of 4 GB.
- */
-#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2)
-#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */
-
-#if MALI_USE_CSF
-#define KBASE_REG_ZONE_MCU_SHARED KBASE_REG_ZONE(3)
-#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \
- KBASE_REG_ZONE_MCU_SHARED_BASE)
-
-/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit
- * clients, and 2^43 for 32-bit clients.
- */
-#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
-
-/* Executable zone supporting FIXED/FIXABLE allocations.
- * It is always 4GB in size.
- */
-
-#define KBASE_REG_ZONE_EXEC_FIXED_VA KBASE_REG_ZONE(4)
-#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES
-
-/* Non-executable zone supporting FIXED/FIXABLE allocations.
- * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from
- * (2^43) up to (2^44)-1 for 32-bit userspace clients.
- */
-#define KBASE_REG_ZONE_FIXED_VA KBASE_REG_ZONE(5)
-
-/* Again - 32-bit userspace cannot map addresses beyond 2^44, but 64-bit can - and so
- * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1.
- */
-#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT)
-#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT)
-
-#endif
-
unsigned long flags;
size_t extension;
struct kbase_mem_phy_alloc *cpu_alloc;
@@ -687,20 +695,19 @@ struct kbase_va_region {
};
/**
- * kbase_is_ctx_reg_zone - determine whether a KBASE_REG_ZONE_<...> is for a
- * context or for a device
- * @zone_bits: A KBASE_REG_ZONE_<...> to query
+ * kbase_is_ctx_reg_zone - Determine whether a zone is associated with a
+ * context or with the device
+ * @zone: Zone identifier
*
- * Return: True if the zone for @zone_bits is a context zone, False otherwise
+ * Return: True if @zone is a context zone, False otherwise
*/
-static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits)
+static inline bool kbase_is_ctx_reg_zone(enum kbase_memory_zone zone)
{
- WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits);
- return (zone_bits == KBASE_REG_ZONE_SAME_VA ||
#if MALI_USE_CSF
- zone_bits == KBASE_REG_ZONE_EXEC_FIXED_VA || zone_bits == KBASE_REG_ZONE_FIXED_VA ||
+ return !(zone == MCU_SHARED_ZONE);
+#else
+ return true;
#endif
- zone_bits == KBASE_REG_ZONE_CUSTOM_VA || zone_bits == KBASE_REG_ZONE_EXEC_VA);
}
/* Special marker for failed JIT allocations that still must be marked as
@@ -1359,18 +1366,19 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
void kbase_region_tracker_term(struct kbase_context *kctx);
/**
- * kbase_region_tracker_term_rbtree - Free memory for a region tracker
+ * kbase_region_tracker_erase_rbtree - Free memory for a region tracker
*
* @rbtree: Region tracker tree root
*
* This will free all the regions within the region tracker
*/
-void kbase_region_tracker_term_rbtree(struct rb_root *rbtree);
+void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree);
struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
struct kbase_context *kctx, u64 gpu_addr);
struct kbase_va_region *kbase_find_region_enclosing_address(
struct rb_root *rbtree, u64 gpu_addr);
+void kbase_region_tracker_insert(struct kbase_va_region *new_reg);
/**
* kbase_region_tracker_find_region_base_address - Check that a pointer is
@@ -1387,8 +1395,11 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree,
u64 gpu_addr);
-struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone);
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn,
+ size_t nr_pages);
+struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx,
+ enum kbase_memory_zone id, u64 start_pfn,
+ size_t nr_pages);
void kbase_free_alloced_region(struct kbase_va_region *reg);
int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg,
u64 addr, size_t nr_pages, size_t align);
@@ -1866,7 +1877,7 @@ static inline struct kbase_page_metadata *kbase_page_private(struct page *p)
static inline dma_addr_t kbase_dma_addr(struct page *p)
{
- if (kbase_page_migration_enabled)
+ if (kbase_is_page_migration_enabled())
return kbase_page_private(p)->dma_addr;
return kbase_dma_addr_as_priv(p);
@@ -1876,8 +1887,9 @@ static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa
{
phys_addr_t pa = as_phys_addr_t(tagged_pa);
struct page *page = pfn_to_page(PFN_DOWN(pa));
- dma_addr_t dma_addr =
- is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page);
+ dma_addr_t dma_addr = (is_huge(tagged_pa) || is_partial(tagged_pa)) ?
+ kbase_dma_addr_as_priv(page) :
+ kbase_dma_addr(page);
return dma_addr;
}
@@ -2434,75 +2446,95 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
unsigned int *target_page_nr, size_t offset);
/**
- * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone
- * @zone: zone to query
+ * kbase_ctx_reg_zone_get_nolock - Get a zone from @kctx where the caller does
+ * not have @kctx 's region lock
+ * @kctx: Pointer to kbase context
+ * @zone: Zone identifier
*
- * Return: The end of the zone corresponding to @zone
+ * This should only be used in performance-critical paths where the code is
+ * resilient to a race with the zone changing, and only when the zone is tracked
+ * by the @kctx.
+ *
+ * Return: The zone corresponding to @zone
*/
-static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone)
+static inline struct kbase_reg_zone *kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx,
+ enum kbase_memory_zone zone)
{
- return zone->base_pfn + zone->va_size_pages;
+ WARN_ON(!kbase_is_ctx_reg_zone(zone));
+ return &kctx->reg_zone[zone];
}
/**
- * kbase_ctx_reg_zone_init - initialize a zone in @kctx
+ * kbase_ctx_reg_zone_get - Get a memory zone from @kctx
* @kctx: Pointer to kbase context
- * @zone_bits: A KBASE_REG_ZONE_<...> to initialize
+ * @zone: Zone identifier
+ *
+ * Note that the zone is not refcounted, so there is no corresponding operation to
+ * put the zone back.
+ *
+ * Return: The zone corresponding to @zone
+ */
+static inline struct kbase_reg_zone *kbase_ctx_reg_zone_get(struct kbase_context *kctx,
+ enum kbase_memory_zone zone)
+{
+ lockdep_assert_held(&kctx->reg_lock);
+ return kbase_ctx_reg_zone_get_nolock(kctx, zone);
+}
+
+/**
+ * kbase_reg_zone_init - Initialize a zone in @kctx
+ * @kbdev: Pointer to kbase device in order to initialize the VA region cache
+ * @zone: Memory zone
+ * @id: Memory zone identifier to facilitate lookups
* @base_pfn: Page Frame Number in GPU virtual address space for the start of
* the Zone
* @va_size_pages: Size of the Zone in pages
+ *
+ * Return:
+ * * 0 on success
+ * * -ENOMEM on error
*/
-static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx,
- unsigned long zone_bits,
- u64 base_pfn, u64 va_size_pages)
+static inline int kbase_reg_zone_init(struct kbase_device *kbdev, struct kbase_reg_zone *zone,
+ enum kbase_memory_zone id, u64 base_pfn, u64 va_size_pages)
{
- struct kbase_reg_zone *zone;
+ struct kbase_va_region *reg;
- lockdep_assert_held(&kctx->reg_lock);
- WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
+ *zone = (struct kbase_reg_zone){ .reg_rbtree = RB_ROOT,
+ .base_pfn = base_pfn,
+ .va_size_pages = va_size_pages,
+ .id = id,
+ .cache = kbdev->va_region_slab };
+
+ if (unlikely(!va_size_pages))
+ return 0;
+
+ reg = kbase_alloc_free_region(zone, base_pfn, va_size_pages);
+ if (unlikely(!reg))
+ return -ENOMEM;
+
+ kbase_region_tracker_insert(reg);
- zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
- *zone = (struct kbase_reg_zone){
- .base_pfn = base_pfn, .va_size_pages = va_size_pages,
- };
+ return 0;
}
/**
- * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does
- * not have @kctx 's region lock
- * @kctx: Pointer to kbase context
- * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve
- *
- * This should only be used in performance-critical paths where the code is
- * resilient to a race with the zone changing.
+ * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone
+ * @zone: zone to query
*
- * Return: The zone corresponding to @zone_bits
+ * Return: The end of the zone corresponding to @zone
*/
-static inline struct kbase_reg_zone *
-kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx,
- unsigned long zone_bits)
+static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone)
{
- WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
-
- return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
+ return zone->base_pfn + zone->va_size_pages;
}
/**
- * kbase_ctx_reg_zone_get - get a zone from @kctx
- * @kctx: Pointer to kbase context
- * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve
- *
- * The get is not refcounted - there is no corresponding 'put' operation
- *
- * Return: The zone corresponding to @zone_bits
+ * kbase_reg_zone_term - Terminate the memory zone tracker
+ * @zone: Memory zone
*/
-static inline struct kbase_reg_zone *
-kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
+static inline void kbase_reg_zone_term(struct kbase_reg_zone *zone)
{
- lockdep_assert_held(&kctx->reg_lock);
- WARN_ON(!kbase_is_ctx_reg_zone(zone_bits));
-
- return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
+ kbase_region_tracker_erase_rbtree(&zone->reg_rbtree);
}
/**
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index e8df130..1af833d 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -287,9 +287,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
u64 extension, u64 *flags, u64 *gpu_va,
enum kbase_caller_mmu_sync_info mmu_sync_info)
{
- int zone;
struct kbase_va_region *reg;
- struct rb_root *rbtree;
+ enum kbase_memory_zone zone;
struct device *dev;
KBASE_DEBUG_ASSERT(kctx);
@@ -359,31 +358,25 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
#endif
/* find out which VA zone to use */
- if (*flags & BASE_MEM_SAME_VA) {
- rbtree = &kctx->reg_rbtree_same;
- zone = KBASE_REG_ZONE_SAME_VA;
- }
+ if (*flags & BASE_MEM_SAME_VA)
+ zone = SAME_VA_ZONE;
#if MALI_USE_CSF
/* fixed va_zone always exists */
else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) {
if (*flags & BASE_MEM_PROT_GPU_EX) {
- rbtree = &kctx->reg_rbtree_exec_fixed;
- zone = KBASE_REG_ZONE_EXEC_FIXED_VA;
+ zone = EXEC_FIXED_VA_ZONE;
} else {
- rbtree = &kctx->reg_rbtree_fixed;
- zone = KBASE_REG_ZONE_FIXED_VA;
+ zone = FIXED_VA_ZONE;
}
}
#endif
else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) {
- rbtree = &kctx->reg_rbtree_exec;
- zone = KBASE_REG_ZONE_EXEC_VA;
+ zone = EXEC_VA_ZONE;
} else {
- rbtree = &kctx->reg_rbtree_custom;
- zone = KBASE_REG_ZONE_CUSTOM_VA;
+ zone = CUSTOM_VA_ZONE;
}
- reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone);
+ reg = kbase_ctx_alloc_free_region(kctx, zone, PFN_DOWN(*gpu_va), va_pages);
if (!reg) {
dev_err(dev, "Failed to allocate free region");
@@ -634,8 +627,8 @@ int kbase_mem_query(struct kbase_context *kctx,
#if MALI_USE_CSF
if (KBASE_REG_CSF_EVENT & reg->flags)
*out |= BASE_MEM_CSF_EVENT;
- if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) ||
- ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+ if ((kbase_bits_to_zone(reg->flags) == FIXED_VA_ZONE) ||
+ (kbase_bits_to_zone(reg->flags) == EXEC_FIXED_VA_ZONE)) {
if (KBASE_REG_FIXED_ADDRESS & reg->flags)
*out |= BASE_MEM_FIXED;
else
@@ -680,9 +673,6 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
int evict_nents = atomic_read(&kctx->evict_nents);
unsigned long nr_freeable_items;
- WARN((sc->gfp_mask & __GFP_ATOMIC),
- "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n",
- sc->gfp_mask);
WARN(in_atomic(),
"Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n",
sc->gfp_mask);
@@ -875,7 +865,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc)
/* Indicate to page migration that the memory can be reclaimed by the shrinker.
*/
- if (kbase_page_migration_enabled)
+ if (kbase_is_page_migration_enabled())
kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE);
mutex_unlock(&kctx->jit_evict_lock);
@@ -936,7 +926,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc)
* in which a physical allocation could transition to NOT_MOVABLE
* from.
*/
- if (kbase_page_migration_enabled)
+ if (kbase_is_page_migration_enabled())
kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED);
}
}
@@ -1316,11 +1306,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
- mmu_sync_info, NULL);
+ err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr,
+ alloc->group_id, mmu_sync_info, NULL);
if (err)
goto bad_insert;
@@ -1345,8 +1335,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx,
return 0;
bad_pad_insert:
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
- alloc->nents, alloc->nents, kctx->as_nr, true);
+ kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+ alloc->nents, alloc->nents, kctx->as_nr);
bad_insert:
kbase_mem_umm_unmap_attachment(kctx, alloc);
bad_map_attachment:
@@ -1374,9 +1364,9 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx,
if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) {
int err;
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- alloc->pages, reg->nr_pages, reg->nr_pages,
- kctx->as_nr, true);
+ err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, reg->nr_pages, reg->nr_pages,
+ kctx->as_nr);
WARN_ON(err);
}
@@ -1423,6 +1413,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
struct kbase_va_region *reg;
struct dma_buf *dma_buf;
struct dma_buf_attachment *dma_attachment;
+ enum kbase_memory_zone zone;
bool shared_zone = false;
bool need_sync = false;
int group_id;
@@ -1481,12 +1472,11 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx,
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages,
- KBASE_REG_ZONE_SAME_VA);
- } else {
- reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages,
- KBASE_REG_ZONE_CUSTOM_VA);
- }
+ zone = SAME_VA_ZONE;
+ } else
+ zone = CUSTOM_VA_ZONE;
+
+ reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages);
if (!reg) {
dma_buf_detach(dma_buf, dma_attachment);
@@ -1572,9 +1562,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
{
long i, dma_mapped_pages;
struct kbase_va_region *reg;
- struct rb_root *rbtree;
long faulted_pages;
- int zone = KBASE_REG_ZONE_CUSTOM_VA;
+ enum kbase_memory_zone zone = CUSTOM_VA_ZONE;
bool shared_zone = false;
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
struct kbase_alloc_import_user_buf *user_buf;
@@ -1582,6 +1571,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
struct tagged_addr *pa;
struct device *dev;
int write;
+ enum dma_data_direction dma_dir;
/* Flag supported only for dma-buf imported memory */
if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
@@ -1637,13 +1627,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
if (shared_zone) {
*flags |= BASE_MEM_NEED_MMAP;
- zone = KBASE_REG_ZONE_SAME_VA;
- rbtree = &kctx->reg_rbtree_same;
- } else
- rbtree = &kctx->reg_rbtree_custom;
-
- reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone);
+ zone = SAME_VA_ZONE;
+ }
+ reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages);
if (!reg)
goto no_region;
@@ -1693,6 +1680,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
down_read(kbase_mem_get_process_mmap_lock());
write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+ dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
#if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
faulted_pages = get_user_pages(address, *va_pages,
@@ -1751,10 +1739,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
for (i = 0; i < faulted_pages; i++) {
dma_addr_t dma_addr;
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir);
#else
- dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
- DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
#endif
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
@@ -1762,7 +1750,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1781,12 +1769,11 @@ unwind_dma_map:
for (i = 0; i < dma_mapped_pages; i++) {
dma_addr_t dma_addr = user_buf->dma_addrs[i];
- dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
- dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir);
#else
- dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
- DMA_ATTR_SKIP_CPU_SYNC);
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
#endif
}
fault_mismatch:
@@ -1819,6 +1806,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
size_t i;
bool coherent;
uint64_t max_stride;
+ enum kbase_memory_zone zone;
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
@@ -1870,13 +1858,12 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride,
/* 64-bit tasks must MMAP anyway, but not expose this address to
* clients
*/
+ zone = SAME_VA_ZONE;
*flags |= BASE_MEM_NEED_MMAP;
- reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages,
- KBASE_REG_ZONE_SAME_VA);
- } else {
- reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages,
- KBASE_REG_ZONE_CUSTOM_VA);
- }
+ } else
+ zone = CUSTOM_VA_ZONE;
+
+ reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *num_pages);
if (!reg)
goto no_reg;
@@ -2200,7 +2187,7 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx,
phy_pages = kbase_get_gpu_phy_pages(reg);
ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages,
phy_pages + old_pages, delta, reg->flags, kctx->as_nr,
- reg->gpu_alloc->group_id, mmu_sync_info, reg, false);
+ reg->gpu_alloc->group_id, mmu_sync_info, reg);
return ret;
}
@@ -2215,7 +2202,7 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx,
/* Nothing to do */
return;
- unmap_mapping_range(kctx->filp->f_inode->i_mapping,
+ unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping,
(gpu_va_start + new_pages)<<PAGE_SHIFT,
(old_pages - new_pages)<<PAGE_SHIFT, 1);
}
@@ -2229,7 +2216,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx,
int ret = 0;
ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages,
- alloc->pages + new_pages, delta, delta, kctx->as_nr, false);
+ alloc->pages + new_pages, delta, delta, kctx->as_nr);
return ret;
}
@@ -2388,6 +2375,21 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
return -EINVAL;
delta = old_pages - new_pages;
+ if (kctx->kbdev->pagesize_2mb) {
+ struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages;
+
+ /* Move the end of new committed range to a valid location.
+ * This mirrors the adjustment done inside kbase_free_phy_pages_helper().
+ */
+ while (delta && is_huge(*start_free) && !is_huge_head(*start_free)) {
+ start_free++;
+ new_pages++;
+ delta--;
+ }
+
+ if (!delta)
+ return 0;
+ }
/* Update the GPU mapping */
err = kbase_mem_shrink_gpu_mapping(kctx, reg,
@@ -2400,19 +2402,6 @@ int kbase_mem_shrink(struct kbase_context *const kctx,
kbase_free_phy_pages_helper(reg->cpu_alloc, delta);
if (reg->cpu_alloc != reg->gpu_alloc)
kbase_free_phy_pages_helper(reg->gpu_alloc, delta);
-
- if (kctx->kbdev->pagesize_2mb) {
- if (kbase_reg_current_backed_size(reg) > new_pages) {
- old_pages = new_pages;
- new_pages = kbase_reg_current_backed_size(reg);
-
- /* Update GPU mapping. */
- err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages,
- CALLER_MMU_ASYNC);
- }
- } else {
- WARN_ON(kbase_reg_current_backed_size(reg) != new_pages);
- }
}
return err;
@@ -2446,8 +2435,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
kbase_gpu_vm_lock(map->kctx);
if (map->free_on_close) {
- KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) ==
- KBASE_REG_ZONE_SAME_VA);
+ KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE);
/* Avoid freeing memory on the process death which results in
* GPU Page Fault. Memory will be freed in kbase_destroy_context
*/
@@ -2461,6 +2449,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma)
kbase_gpu_vm_unlock(map->kctx);
kbase_mem_phy_alloc_put(map->alloc);
+ kbase_file_dec_cpu_mapping_count(map->kctx->kfile);
kfree(map);
}
@@ -2519,9 +2508,17 @@ static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf)
KBASE_DEBUG_ASSERT(map->kctx);
KBASE_DEBUG_ASSERT(map->alloc);
+ kbase_gpu_vm_lock(map->kctx);
+
+ /* Reject faults for SAME_VA mapping of UMM allocations */
+ if ((map->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) && map->free_on_close) {
+ dev_warn(map->kctx->kbdev->dev, "Invalid CPU access to UMM memory for ctx %d_%d",
+ map->kctx->tgid, map->kctx->id);
+ goto exit;
+ }
+
map_start_pgoff = vma->vm_pgoff - map->region->start_pfn;
- kbase_gpu_vm_lock(map->kctx);
if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) {
struct kbase_aliased *aliased =
get_aliased_alloc(vma, map->region, &map_start_pgoff, 1);
@@ -2608,7 +2605,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
* See MIDBASE-1057
*/
- vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO);
vma->vm_ops = &kbase_vm_ops;
vma->vm_private_data = map;
@@ -2636,12 +2633,12 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
}
- if (!kaddr) {
- vma->vm_flags |= VM_PFNMAP;
- } else {
+ if (!kaddr)
+ vm_flags_set(vma, VM_PFNMAP);
+ else {
WARN_ON(aligned_offset);
/* MIXEDMAP so we can vfree the kaddr early and not track it after map time */
- vma->vm_flags |= VM_MIXEDMAP;
+ vm_flags_set(vma, VM_MIXEDMAP);
/* vmalloc remaping is easy... */
err = remap_vmalloc_range(vma, kaddr, 0);
WARN_ON(err);
@@ -2662,6 +2659,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx,
map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
list_add(&map->mappings_list, &map->alloc->mappings);
+ kbase_file_inc_cpu_mapping_count(kctx->kfile);
out:
return err;
@@ -2710,8 +2708,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx,
goto out;
}
- new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages,
- KBASE_REG_ZONE_SAME_VA);
+ new_reg = kbase_ctx_alloc_free_region(kctx, SAME_VA_ZONE, 0, nr_pages);
if (!new_reg) {
err = -ENOMEM;
WARN_ON(1);
@@ -2855,9 +2852,9 @@ int kbase_context_mmap(struct kbase_context *const kctx,
dev_dbg(dev, "kbase_mmap\n");
if (!(vma->vm_flags & VM_READ))
- vma->vm_flags &= ~VM_MAYREAD;
+ vm_flags_clear(vma, VM_MAYREAD);
if (!(vma->vm_flags & VM_WRITE))
- vma->vm_flags &= ~VM_MAYWRITE;
+ vm_flags_clear(vma, VM_MAYWRITE);
if (nr_pages == 0) {
err = -EINVAL;
@@ -3070,6 +3067,9 @@ static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pag
{
size_t i;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
+
for (i = 0; i < page_count; i++) {
struct page *p = as_page(pages[i]);
struct kbase_page_metadata *page_md = kbase_page_private(p);
@@ -3119,6 +3119,9 @@ static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *p
{
size_t i;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
+
for (i = 0; i < page_count; i++) {
struct page *p = as_page(pages[i]);
struct kbase_page_metadata *page_md = kbase_page_private(p);
@@ -3219,7 +3222,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi
* of all physical pages. In case of errors, e.g. too many mappings,
* make the page not movable to prevent trouble.
*/
- if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type))
+ if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(reg->gpu_alloc->type))
kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags);
kfree(pages);
@@ -3331,7 +3334,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx,
* for all physical pages. Now is a good time to do it because references
* haven't been released yet.
*/
- if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) {
+ if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(map->gpu_alloc->type)) {
const size_t page_count = PFN_UP(map->offset_in_page + map->size);
struct tagged_addr *pages_array = map->cpu_pages;
@@ -3367,11 +3370,14 @@ KBASE_EXPORT_TEST_API(kbase_vunmap);
static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
{
-#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE)
- /* To avoid the build breakage due to an unexported kernel symbol
- * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards,
- * we inline here the equivalent of 'add_mm_counter()' from linux
- * kernel V5.4.0~8.
+#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE)
+ /* To avoid the build breakage due to the type change in rss_stat,
+ * we inline here the equivalent of 'add_mm_counter()' from linux kernel V6.2.
+ */
+ percpu_counter_add(&mm->rss_stat[member], value);
+#elif (KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE)
+ /* To avoid the build breakage due to an unexported kernel symbol 'mm_trace_rss_stat',
+ * we inline here the equivalent of 'add_mm_counter()' from linux kernel V5.5.
*/
atomic_long_add(value, &mm->rss_stat.count[member]);
#else
@@ -3396,15 +3402,37 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
#endif
}
+static void kbase_special_vm_open(struct vm_area_struct *vma)
+{
+ struct kbase_context *kctx = vma->vm_private_data;
+
+ kbase_file_inc_cpu_mapping_count(kctx->kfile);
+}
+
+static void kbase_special_vm_close(struct vm_area_struct *vma)
+{
+ struct kbase_context *kctx = vma->vm_private_data;
+
+ kbase_file_dec_cpu_mapping_count(kctx->kfile);
+}
+
+static const struct vm_operations_struct kbase_vm_special_ops = {
+ .open = kbase_special_vm_open,
+ .close = kbase_special_vm_close,
+};
+
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
{
if (vma_pages(vma) != 1)
return -EINVAL;
/* no real access */
- vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
- vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+ vm_flags_clear(vma, VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO);
+ vma->vm_ops = &kbase_vm_special_ops;
+ vma->vm_private_data = kctx;
+ kbase_file_inc_cpu_mapping_count(kctx->kfile);
return 0;
}
@@ -3459,6 +3487,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
struct kbase_device *kbdev;
int err;
bool reset_prevented = false;
+ struct kbase_file *kfile;
if (!queue) {
pr_debug("Close method called for the new User IO pages mapping vma\n");
@@ -3467,6 +3496,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
kctx = queue->kctx;
kbdev = kctx->kbdev;
+ kfile = kctx->kfile;
err = kbase_reset_gpu_prevent_and_wait(kbdev);
if (err)
@@ -3484,8 +3514,9 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma)
if (reset_prevented)
kbase_reset_gpu_allow(kbdev);
+ kbase_file_dec_cpu_mapping_count(kfile);
/* Now as the vma is closed, drop the reference on mali device file */
- fput(kctx->filp);
+ fput(kfile->filp);
}
#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE)
@@ -3618,13 +3649,13 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
if (err)
goto map_failed;
- vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO);
/* TODO use VM_MIXEDMAP, since it is more appropriate as both types of
* memory with and without "struct page" backing are being inserted here.
* Hw Doorbell pages comes from the device register area so kernel does
* not use "struct page" for them.
*/
- vma->vm_flags |= VM_PFNMAP;
+ vm_flags_set(vma, VM_PFNMAP);
vma->vm_ops = &kbase_csf_user_io_pages_vm_ops;
vma->vm_private_data = queue;
@@ -3638,6 +3669,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx,
/* Also adjust the vm_pgoff */
vma->vm_pgoff = queue->db_file_offset;
+ kbase_file_inc_cpu_mapping_count(kctx->kfile);
return 0;
map_failed:
@@ -3677,6 +3709,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
{
struct kbase_context *kctx = vma->vm_private_data;
struct kbase_device *kbdev;
+ struct kbase_file *kfile;
if (unlikely(!kctx)) {
pr_debug("Close function called for the unexpected mapping");
@@ -3684,6 +3717,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
}
kbdev = kctx->kbdev;
+ kfile = kctx->kfile;
if (unlikely(!kctx->csf.user_reg.vma))
dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d",
@@ -3695,8 +3729,9 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma)
kctx->csf.user_reg.vma = NULL;
+ kbase_file_dec_cpu_mapping_count(kfile);
/* Now as the VMA is closed, drop the reference on mali device file */
- fput(kctx->filp);
+ fput(kfile->filp);
}
/**
@@ -3801,12 +3836,12 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
/* Map uncached */
vma->vm_page_prot = pgprot_device(vma->vm_page_prot);
- vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO;
+ vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO);
/* User register page comes from the device register area so
* "struct page" isn't available for it.
*/
- vma->vm_flags |= VM_PFNMAP;
+ vm_flags_set(vma, VM_PFNMAP);
kctx->csf.user_reg.vma = vma;
@@ -3826,6 +3861,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx,
vma->vm_ops = &kbase_csf_user_reg_vm_ops;
vma->vm_private_data = kctx;
+ kbase_file_inc_cpu_mapping_count(kctx->kfile);
return 0;
}
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
index 1dc76d0..f2014f6 100644
--- a/mali_kbase/mali_kbase_mem_migrate.c
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -32,10 +32,33 @@
* provided and if page migration feature is enabled.
* Feature is disabled on all platforms by default.
*/
-int kbase_page_migration_enabled;
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+/* If page migration support is explicitly compiled out, there should be no way to change
+ * this int. Its value is automatically 0 as a global.
+ */
+const int kbase_page_migration_enabled;
+/* module_param is not called so this value cannot be changed at insmod when compiled
+ * without support for page migration.
+ */
+#else
+/* -1 as default, 0 when manually set as off and 1 when manually set as on */
+int kbase_page_migration_enabled = -1;
module_param(kbase_page_migration_enabled, int, 0444);
+MODULE_PARM_DESC(kbase_page_migration_enabled,
+ "Explicitly enable or disable page migration with 1 or 0 respectively.");
+#endif /* !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) */
+
KBASE_EXPORT_TEST_API(kbase_page_migration_enabled);
+bool kbase_is_page_migration_enabled(void)
+{
+ /* Handle uninitialised int case */
+ if (kbase_page_migration_enabled < 0)
+ return false;
+ return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled;
+}
+KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled);
+
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
static const struct movable_operations movable_ops;
#endif
@@ -43,9 +66,15 @@ static const struct movable_operations movable_ops;
bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
u8 group_id)
{
- struct kbase_page_metadata *page_md =
- kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
+ struct kbase_page_metadata *page_md;
+
+ /* A check for kbase_page_migration_enabled would help here too but it's already being
+ * checked in the only caller of this function.
+ */
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return false;
+ page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL);
if (!page_md)
return false;
@@ -95,6 +124,8 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p,
struct kbase_page_metadata *page_md;
dma_addr_t dma_addr;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
page_md = kbase_page_private(p);
if (!page_md)
return;
@@ -109,6 +140,10 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p,
ClearPagePrivate(p);
}
+#if IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+/* This function is only called when page migration
+ * support is not explicitly compiled out.
+ */
static void kbase_free_pages_worker(struct work_struct *work)
{
struct kbase_mem_migrate *mem_migrate =
@@ -121,14 +156,13 @@ static void kbase_free_pages_worker(struct work_struct *work)
spin_lock(&mem_migrate->free_pages_lock);
list_splice_init(&mem_migrate->free_pages_list, &free_list);
spin_unlock(&mem_migrate->free_pages_lock);
-
list_for_each_entry_safe(p, tmp, &free_list, lru) {
u8 group_id = 0;
list_del_init(&p->lru);
lock_page(p);
page_md = kbase_page_private(p);
- if (IS_PAGE_MOVABLE(page_md->status)) {
+ if (page_md && IS_PAGE_MOVABLE(page_md->status)) {
__ClearPageMovable(p);
page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
}
@@ -138,11 +172,14 @@ static void kbase_free_pages_worker(struct work_struct *work)
kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0);
}
}
+#endif
void kbase_free_page_later(struct kbase_device *kbdev, struct page *p)
{
struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
spin_lock(&mem_migrate->free_pages_lock);
list_add(&p->lru, &mem_migrate->free_pages_list);
spin_unlock(&mem_migrate->free_pages_lock);
@@ -161,6 +198,9 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p)
* the movable property. The meta data attached to the PGD page is transferred to the
* new (replacement) page.
*
+ * This function returns early with an error if called when not compiled with
+ * CONFIG_PAGE_MIGRATION_SUPPORT.
+ *
* Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure
* and the migration is aborted.
*/
@@ -173,6 +213,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
dma_addr_t new_dma_addr;
int ret;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return -EINVAL;
+
/* Create a new dma map for the new page */
new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
if (dma_mapping_error(kbdev->dev, new_dma_addr))
@@ -227,6 +270,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new
* allocation, which is used to create CPU mappings. Before returning, the new
* page shall be set as movable and not isolated, while the old page shall lose
* the movable property.
+ *
+ * This function returns early with an error if called when not compiled with
+ * CONFIG_PAGE_MIGRATION_SUPPORT.
*/
static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page)
{
@@ -235,6 +281,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
dma_addr_t old_dma_addr, new_dma_addr;
int ret;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return -EINVAL;
old_dma_addr = page_md->dma_addr;
new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr))
@@ -246,7 +294,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
kbase_gpu_vm_lock(kctx);
/* Unmap the old physical range. */
- unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT,
+ unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping,
+ page_md->data.mapped.vpfn << PAGE_SHIFT,
PAGE_SIZE, 1);
ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)),
@@ -290,6 +339,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa
* @mode: LRU Isolation modes.
*
* Callback function for Linux to isolate a page and prepare it for migration.
+ * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT.
*
* Return: true on success, false otherwise.
*/
@@ -299,6 +349,8 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
struct kbase_mem_pool *mem_pool = NULL;
struct kbase_page_metadata *page_md = kbase_page_private(p);
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return false;
CSTD_UNUSED(mode);
if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
@@ -390,6 +442,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode)
*
* Callback function for Linux to migrate the content of the old page to the
* new page provided.
+ * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT.
*
* Return: 0 on success, error code otherwise.
*/
@@ -415,7 +468,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
#endif
CSTD_UNUSED(mode);
- if (!page_md || !IS_PAGE_MOVABLE(page_md->status))
+ if (!kbase_is_page_migration_enabled() || !page_md || !IS_PAGE_MOVABLE(page_md->status))
return -EINVAL;
if (!spin_trylock(&page_md->migrate_lock))
@@ -500,6 +553,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum
* will only be called for a page that has been isolated but failed to
* migrate. This function will put back the given page to the state it was
* in before it was isolated.
+ * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT.
*/
static void kbase_page_putback(struct page *p)
{
@@ -509,6 +563,8 @@ static void kbase_page_putback(struct page *p)
struct kbase_page_metadata *page_md = kbase_page_private(p);
struct kbase_device *kbdev = NULL;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
/* If we don't have page metadata, the page may not belong to the
* driver or may already have been freed, and there's nothing we can do
*/
@@ -585,6 +641,9 @@ static const struct address_space_operations kbase_address_space_ops = {
#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp)
{
+ if (!kbase_is_page_migration_enabled())
+ return;
+
mutex_lock(&kbdev->fw_load_lock);
if (filp) {
@@ -607,10 +666,23 @@ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct
void kbase_mem_migrate_init(struct kbase_device *kbdev)
{
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+ /* Page migration explicitly disabled at compile time - do nothing */
+ return;
+#else
struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+ /* Page migration support compiled in, either explicitly or
+ * by default, so the default behaviour is to follow the choice
+ * of large pages if not selected at insmod. Check insmod parameter
+ * integer for a negative value to see if insmod parameter was
+ * passed in at all (it will override the default negative value).
+ */
if (kbase_page_migration_enabled < 0)
- kbase_page_migration_enabled = 0;
+ kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0;
+ else
+ dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.",
+ kbase_page_migration_enabled ? "enabled" : "disabled");
spin_lock_init(&mem_migrate->free_pages_lock);
INIT_LIST_HEAD(&mem_migrate->free_pages_list);
@@ -621,12 +693,17 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev)
mem_migrate->free_pages_workq =
alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker);
+#endif
}
void kbase_mem_migrate_term(struct kbase_device *kbdev)
{
struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate;
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+ /* Page migration explicitly disabled at compile time - do nothing */
+ return;
+#endif
if (mem_migrate->free_pages_workq)
destroy_workqueue(mem_migrate->free_pages_workq);
#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h
index 76bbc99..e9f3fc4 100644
--- a/mali_kbase/mali_kbase_mem_migrate.h
+++ b/mali_kbase/mali_kbase_mem_migrate.h
@@ -18,6 +18,8 @@
* http://www.gnu.org/licenses/gpl-2.0.html.
*
*/
+#ifndef _KBASE_MEM_MIGRATE_H
+#define _KBASE_MEM_MIGRATE_H
/**
* DOC: Base kernel page migration implementation.
@@ -43,7 +45,11 @@
/* Global integer used to determine if module parameter value has been
* provided and if page migration feature is enabled.
*/
+#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)
+extern const int kbase_page_migration_enabled;
+#else
extern int kbase_page_migration_enabled;
+#endif
/**
* kbase_alloc_page_metadata - Allocate and initialize page metadata
@@ -63,6 +69,8 @@ extern int kbase_page_migration_enabled;
bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr,
u8 group_id);
+bool kbase_is_page_migration_enabled(void);
+
/**
* kbase_free_page_later - Defer freeing of given page.
* @kbdev: Pointer to kbase device
@@ -106,3 +114,5 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev);
* and destroy workqueue associated.
*/
void kbase_mem_migrate_term(struct kbase_device *kbdev);
+
+#endif /* _KBASE_migrate_H */
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index 58716be..d942ff5 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -141,17 +141,21 @@ static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page
* Only update page status and add the page to the memory pool if
* it is not isolated.
*/
- spin_lock(&page_md->migrate_lock);
- if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) {
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
not_movable = true;
- } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) {
- page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL);
- page_md->data.mem_pool.pool = pool;
- page_md->data.mem_pool.kbdev = pool->kbdev;
- list_add(&p->lru, page_list);
- (*list_size)++;
+ else {
+ spin_lock(&page_md->migrate_lock);
+ if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) {
+ not_movable = true;
+ } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) {
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL);
+ page_md->data.mem_pool.pool = pool;
+ page_md->data.mem_pool.kbdev = pool->kbdev;
+ list_add(&p->lru, page_list);
+ (*list_size)++;
+ }
+ spin_unlock(&page_md->migrate_lock);
}
- spin_unlock(&page_md->migrate_lock);
if (not_movable) {
kbase_free_page_later(pool->kbdev, p);
@@ -173,7 +177,7 @@ static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool,
lockdep_assert_held(&pool->pool_lock);
- if (!pool->order && kbase_page_migration_enabled) {
+ if (!pool->order && kbase_is_page_migration_enabled()) {
if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size))
queue_work_to_free = true;
} else {
@@ -204,7 +208,7 @@ static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool,
lockdep_assert_held(&pool->pool_lock);
- if (!pool->order && kbase_page_migration_enabled) {
+ if (!pool->order && kbase_is_page_migration_enabled()) {
struct page *p, *tmp;
list_for_each_entry_safe(p, tmp, page_list, lru) {
@@ -246,7 +250,7 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool,
p = list_first_entry(&pool->page_list, struct page, lru);
- if (!pool->order && kbase_page_migration_enabled) {
+ if (!pool->order && kbase_is_page_migration_enabled()) {
struct kbase_page_metadata *page_md = kbase_page_private(p);
spin_lock(&page_md->migrate_lock);
@@ -322,7 +326,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
if (pool->order)
gfp |= GFP_HIGHUSER | __GFP_NOWARN;
else
- gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
+ gfp |= kbase_is_page_migration_enabled() ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER;
p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev,
pool->group_id, gfp, pool->order);
@@ -339,7 +343,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool)
}
/* Setup page metadata for 4KB pages when page migration is enabled */
- if (!pool->order && kbase_page_migration_enabled) {
+ if (!pool->order && kbase_is_page_migration_enabled()) {
INIT_LIST_HEAD(&p->lru);
if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) {
dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -360,7 +364,7 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool)
{
struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate;
- if (!pool->order && kbase_page_migration_enabled)
+ if (!pool->order && kbase_is_page_migration_enabled())
queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work);
}
@@ -375,7 +379,7 @@ void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p)
kbdev = pool->kbdev;
- if (!pool->order && kbase_page_migration_enabled) {
+ if (!pool->order && kbase_is_page_migration_enabled()) {
kbase_free_page_later(kbdev, p);
pool_dbg(pool, "page to be freed to kernel later\n");
} else {
@@ -677,9 +681,10 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
/* Before returning wait to make sure there are no pages undergoing page isolation
* which will require reference to this pool.
*/
- while (atomic_read(&pool->isolation_in_progress_cnt))
- cpu_relax();
-
+ if (kbase_is_page_migration_enabled()) {
+ while (atomic_read(&pool->isolation_in_progress_cnt))
+ cpu_relax();
+ }
pool_dbg(pool, "terminated\n");
}
KBASE_EXPORT_TEST_API(kbase_mem_pool_term);
diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c
index b65f9e7..b446bd5 100644
--- a/mali_kbase/mali_kbase_pbha.c
+++ b/mali_kbase/mali_kbase_pbha.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -23,7 +23,10 @@
#include <device/mali_kbase_device.h>
#include <mali_kbase.h>
+
+#if MALI_USE_CSF
#define DTB_SET_SIZE 2
+#endif
static bool read_setting_valid(unsigned int id, unsigned int read_setting)
{
@@ -209,6 +212,7 @@ void kbase_pbha_write_settings(struct kbase_device *kbdev)
}
}
+#if MALI_USE_CSF
static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
const struct device_node *pbha_node)
{
@@ -216,17 +220,28 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
int sz, i;
bool valid = true;
- sz = of_property_count_elems_of_size(pbha_node, "int_id_override",
- sizeof(u32));
+ sz = of_property_count_elems_of_size(pbha_node, "int-id-override", sizeof(u32));
+
+ if (sz == -EINVAL) {
+ /* There is no int-id-override field. Fallback to int_id_override instead */
+ sz = of_property_count_elems_of_size(pbha_node, "int_id_override", sizeof(u32));
+ }
+ if (sz == -EINVAL) {
+ /* There is no int_id_override field. This is valid - but there's nothing further
+ * to do here.
+ */
+ return 0;
+ }
if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) {
dev_err(kbdev->dev, "Bad DTB format: pbha.int_id_override\n");
return -EINVAL;
}
- if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data,
- sz) != 0) {
- dev_err(kbdev->dev,
- "Failed to read DTB pbha.int_id_override\n");
- return -EINVAL;
+ if (of_property_read_u32_array(pbha_node, "int-id-override", dtb_data, sz) != 0) {
+ /* There may be no int-id-override field. Fallback to int_id_override instead */
+ if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, sz) != 0) {
+ dev_err(kbdev->dev, "Failed to read DTB pbha.int_id_override\n");
+ return -EINVAL;
+ }
}
for (i = 0; valid && i < sz; i = i + DTB_SET_SIZE) {
@@ -250,17 +265,20 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
return 0;
}
-#if MALI_USE_CSF
static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
const struct device_node *pbha_node)
{
- u32 bits;
+ u32 bits = 0;
int err;
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU))
return 0;
- err = of_property_read_u32(pbha_node, "propagate_bits", &bits);
+ err = of_property_read_u32(pbha_node, "propagate-bits", &bits);
+
+ if (err == -EINVAL) {
+ err = of_property_read_u32(pbha_node, "propagate_bits", &bits);
+ }
if (err < 0) {
if (err != -EINVAL) {
@@ -268,6 +286,10 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
"DTB value for propagate_bits is improperly formed (err=%d)\n",
err);
return err;
+ } else {
+ /* Property does not exist */
+ kbdev->pbha_propagate_bits = 0;
+ return 0;
}
}
@@ -279,10 +301,11 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
kbdev->pbha_propagate_bits = bits;
return 0;
}
-#endif
+#endif /* MALI_USE_CSF */
int kbase_pbha_read_dtb(struct kbase_device *kbdev)
{
+#if MALI_USE_CSF
const struct device_node *pbha_node;
int err;
@@ -295,12 +318,12 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev)
err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node);
-#if MALI_USE_CSF
if (err < 0)
return err;
err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node);
-#endif
-
return err;
+#else
+ return 0;
+#endif
}
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index bfd5b7e..40278a8 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -211,10 +211,28 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev)
kbdev->pm.active_count == 0);
dev_dbg(kbdev->dev, ">wait_event - waiting done\n");
+#if MALI_USE_CSF
+ /* At this point, any kbase context termination should either have run to
+ * completion and any further context termination can only begin after
+ * the system resumes. Therefore, it is now safe to skip taking the context
+ * list lock when traversing the context list.
+ */
+ if (kbase_csf_kcpu_queue_halt_timers(kbdev)) {
+ rt_mutex_lock(&kbdev->pm.lock);
+ kbdev->pm.suspending = false;
+ rt_mutex_unlock(&kbdev->pm.lock);
+ return -1;
+ }
+#endif
+
/* NOTE: We synchronize with anything that was just finishing a
* kbase_pm_context_idle() call by locking the pm.lock below
*/
if (kbase_hwaccess_pm_suspend(kbdev)) {
+#if MALI_USE_CSF
+ /* Resume the timers in case of suspend failure. */
+ kbase_csf_kcpu_queue_resume_timers(kbdev);
+#endif
rt_mutex_lock(&kbdev->pm.lock);
kbdev->pm.suspending = false;
rt_mutex_unlock(&kbdev->pm.lock);
@@ -262,6 +280,8 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
kbasep_js_resume(kbdev);
#else
kbase_csf_scheduler_pm_resume(kbdev);
+
+ kbase_csf_kcpu_queue_resume_timers(kbdev);
#endif
/* Matching idle call, to power off the GPU/cores if we didn't actually
@@ -283,6 +303,10 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start)
/* Resume HW counters intermediaries. */
kbase_vinstr_resume(kbdev->vinstr_ctx);
kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx);
+ /* System resume callback is complete */
+ kbdev->pm.resuming = false;
+ /* Unblock the threads waiting for the completion of System suspend/resume */
+ wake_up_all(&kbdev->pm.resume_wait);
}
int kbase_pm_suspend(struct kbase_device *kbdev)
diff --git a/mali_kbase/mali_kbase_pm.h b/mali_kbase/mali_kbase_pm.h
index 0639762..4ff3699 100644
--- a/mali_kbase/mali_kbase_pm.h
+++ b/mali_kbase/mali_kbase_pm.h
@@ -292,13 +292,14 @@ void kbase_pm_apc_term(struct kbase_device *kbdev);
*/
void kbase_pm_apc_request(struct kbase_device *kbdev, u32 dur_usec);
-/*
- * Print debug message indicating power state of GPU.
+/**
+ * Print debug message indicating power state of GPU
* @kbdev: The kbase device structure for the device (must be a valid pointer)
+ * @timeout_msg: A message to print.
*
* Prerequisite: GPU is powered.
- * Takes and releases kbdev->hwaccess_lock
+ * Takes and releases kbdev->hwaccess_lock on CSF GPUs.
*/
-void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev);
+void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg);
#endif /* _KBASE_PM_H_ */
diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c
index d65ff2d..0ad2bf8 100644
--- a/mali_kbase/mali_kbase_softjobs.c
+++ b/mali_kbase/mali_kbase_softjobs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -41,6 +41,7 @@
#include <linux/kernel.h>
#include <linux/cache.h>
#include <linux/file.h>
+#include <linux/version_compat_defs.h>
#if !MALI_USE_CSF
/**
@@ -751,7 +752,7 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc,
if (page_index == page_num) {
*page = sg_page_iter_page(&sg_iter);
- return kmap(*page);
+ return kbase_kmap(*page);
}
page_index++;
}
@@ -797,14 +798,13 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
for (i = 0; i < buf_data->nr_extres_pages &&
target_page_nr < buf_data->nr_pages; i++) {
struct page *pg = buf_data->extres_pages[i];
- void *extres_page = kmap(pg);
-
+ void *extres_page = kbase_kmap(pg);
if (extres_page) {
ret = kbase_mem_copy_to_pinned_user_pages(
pages, extres_page, &to_copy,
buf_data->nr_pages,
&target_page_nr, offset);
- kunmap(pg);
+ kbase_kunmap(pg, extres_page);
if (ret)
goto out_unlock;
}
@@ -839,7 +839,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx,
&target_page_nr, offset);
#if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE
- kunmap(pg);
+ kbase_kunmap(pg, extres_page);
#else
dma_buf_kunmap(dma_buf, i, extres_page);
#endif
diff --git a/mali_kbase/mali_kbase_strings.h b/mali_kbase/mali_kbase_strings.h
deleted file mode 100644
index c3f94f9..0000000
--- a/mali_kbase/mali_kbase_strings.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-extern const char kbase_drv_name[];
-extern const char kbase_timeline_name[];
diff --git a/mali_kbase/mali_kbase_utility.h b/mali_kbase/mali_kbase_utility.h
deleted file mode 100644
index 2dad49b..0000000
--- a/mali_kbase/mali_kbase_utility.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- *
- * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved.
- *
- * This program is free software and is provided to you under the terms of the
- * GNU General Public License version 2 as published by the Free Software
- * Foundation, and any use by you of this program is subject to the terms
- * of such GNU license.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
- */
-
-#ifndef _KBASE_UTILITY_H
-#define _KBASE_UTILITY_H
-
-#ifndef _KBASE_H_
-#error "Don't include this file directly, use mali_kbase.h instead"
-#endif
-
-static inline void kbase_timer_setup(struct timer_list *timer,
- void (*callback)(struct timer_list *timer))
-{
-#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE
- setup_timer(timer, (void (*)(unsigned long)) callback,
- (unsigned long) timer);
-#else
- timer_setup(timer, callback, 0);
-#endif
-}
-
-#ifndef WRITE_ONCE
- #ifdef ASSIGN_ONCE
- #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x)
- #else
- #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
- #endif
-#endif
-
-#ifndef READ_ONCE
- #define READ_ONCE(x) ACCESS_ONCE(x)
-#endif
-
-#endif /* _KBASE_UTILITY_H */
diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c
index 5f3dabd..3fce09c 100644
--- a/mali_kbase/mali_kbase_vinstr.c
+++ b/mali_kbase/mali_kbase_vinstr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -541,8 +541,10 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx)
void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx)
{
- if (WARN_ON(!vctx))
+ if (!vctx) {
+ pr_warn("%s: vctx is NULL\n", __func__);
return;
+ }
mutex_lock(&vctx->lock);
@@ -571,8 +573,10 @@ void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx)
void kbase_vinstr_resume(struct kbase_vinstr_context *vctx)
{
- if (WARN_ON(!vctx))
+ if (!vctx) {
+ pr_warn("%s:vctx is NULL\n", __func__);
return;
+ }
mutex_lock(&vctx->lock);
diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h
index 49058d3..1293a0b 100644
--- a/mali_kbase/mali_linux_trace.h
+++ b/mali_kbase/mali_linux_trace.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -173,7 +173,7 @@ TRACE_EVENT(mali_total_alloc_pages_change,
((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK)
#define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\
{AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \
- {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \
+ {AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE, "EXECUTE"}, \
{AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \
{AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" })
#define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \
diff --git a/mali_kbase/mali_kbase_strings.c b/mali_kbase/mali_power_gpu_work_period_trace.c
index 84784be..8e7bf6f 100644
--- a/mali_kbase/mali_kbase_strings.c
+++ b/mali_kbase/mali_power_gpu_work_period_trace.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -19,10 +19,10 @@
*
*/
-#include "mali_kbase_strings.h"
-
-#define KBASE_DRV_NAME "mali"
-#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline"
-
-const char kbase_drv_name[] = KBASE_DRV_NAME;
-const char kbase_timeline_name[] = KBASE_TIMELINE_NAME;
+/* Create the trace point if not configured in kernel */
+#ifndef CONFIG_TRACE_POWER_GPU_WORK_PERIOD
+#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
+#define CREATE_TRACE_POINTS
+#include "mali_power_gpu_work_period_trace.h"
+#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+#endif
diff --git a/mali_kbase/mali_power_gpu_work_period_trace.h b/mali_kbase/mali_power_gpu_work_period_trace.h
new file mode 100644
index 0000000..46e86ad
--- /dev/null
+++ b/mali_kbase/mali_power_gpu_work_period_trace.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _TRACE_POWER_GPU_WORK_PERIOD_MALI
+#define _TRACE_POWER_GPU_WORK_PERIOD_MALI
+#endif
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mali_power_gpu_work_period_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+
+#if !defined(_TRACE_POWER_GPU_WORK_PERIOD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_GPU_WORK_PERIOD_H
+
+#include <linux/tracepoint.h>
+
+/**
+ * gpu_work_period - Reports GPU work period metrics
+ *
+ * @gpu_id: Unique GPU Identifier
+ * @uid: UID of an application
+ * @start_time_ns: Start time of a GPU work period in nanoseconds
+ * @end_time_ns: End time of a GPU work period in nanoseconds
+ * @total_active_duration_ns: Total amount of time the GPU was running GPU work for given
+ * UID during the GPU work period, in nanoseconds. This duration does
+ * not double-account parallel GPU work for the same UID.
+ */
+TRACE_EVENT(gpu_work_period,
+
+ TP_PROTO(
+ u32 gpu_id,
+ u32 uid,
+ u64 start_time_ns,
+ u64 end_time_ns,
+ u64 total_active_duration_ns
+ ),
+
+ TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns),
+
+ TP_STRUCT__entry(
+ __field(u32, gpu_id)
+ __field(u32, uid)
+ __field(u64, start_time_ns)
+ __field(u64, end_time_ns)
+ __field(u64, total_active_duration_ns)
+ ),
+
+ TP_fast_assign(
+ __entry->gpu_id = gpu_id;
+ __entry->uid = uid;
+ __entry->start_time_ns = start_time_ns;
+ __entry->end_time_ns = end_time_ns;
+ __entry->total_active_duration_ns = total_active_duration_ns;
+ ),
+
+ TP_printk("gpu_id=%u uid=%u start_time_ns=%llu end_time_ns=%llu total_active_duration_ns=%llu",
+ __entry->gpu_id,
+ __entry->uid,
+ __entry->start_time_ns,
+ __entry->end_time_ns,
+ __entry->total_active_duration_ns)
+);
+
+#endif /* _TRACE_POWER_GPU_WORK_PERIOD_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
index 4cac787..a057d3c 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c
@@ -146,8 +146,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT;
int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >>
GPU_FAULTSTATUS_SOURCE_ID_SHIFT;
- const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ?
- "true" : "false";
+ const char *addr_valid = (status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK) ? "true" : "false";
int as_no = as->number;
unsigned long flags;
const uintptr_t fault_addr = fault->addr;
@@ -247,12 +246,13 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx,
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_mmu_disable(kctx);
kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT);
+ kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT);
+ kbase_csf_ctx_report_page_fault_for_active_groups(kctx, fault);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
/* AS transaction end */
- kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT);
/* Switching to UNMAPPED mode above would have enabled the firmware to
* recover from the fault (if the memory access was made by firmware)
* and it can then respond to CSG termination requests to be sent now.
@@ -368,9 +368,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
/* remember current mask */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
- new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+ new_mask = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
/* mask interrupts for now */
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
while (pf_bits) {
@@ -380,11 +380,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
struct kbase_fault *fault = &as->pf_data;
/* find faulting address */
- fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
- AS_FAULTADDRESS_HI));
+ fault->addr = kbase_reg_read(kbdev,
+ MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_HI)));
fault->addr <<= 32;
- fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no,
- AS_FAULTADDRESS_LO));
+ fault->addr |= kbase_reg_read(
+ kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_LO)));
/* Mark the fault protected or not */
fault->protected_mode = false;
@@ -393,14 +393,14 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
kbase_as_fault_debugfs_new(kbdev, as_no);
/* record the fault status */
- fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
- AS_FAULTSTATUS));
+ fault->status =
+ kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTSTATUS)));
- fault->extra_addr = kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+ fault->extra_addr =
+ kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)));
fault->extra_addr <<= 32;
- fault->extra_addr |= kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
+ fault->extra_addr |=
+ kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)));
/* Mark page fault as handled */
pf_bits &= ~(1UL << as_no);
@@ -432,9 +432,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
/* reenable interrupts */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
- tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+ tmp = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
new_mask |= tmp;
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), new_mask);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
}
@@ -470,19 +470,16 @@ static void kbase_mmu_gpu_fault_worker(struct work_struct *data)
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
fault = &faulting_as->gf_data;
status = fault->status;
- as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG;
+ as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK;
address = fault->addr;
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
dev_warn(kbdev->dev,
"GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n"
"ASID_VALID: %s, ADDRESS_VALID: %s\n",
- status,
- kbase_gpu_exception_name(
- GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)),
- as_nr, address,
- as_valid ? "true" : "false",
- status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false");
+ status, kbase_gpu_exception_name(GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)),
+ as_nr, address, as_valid ? "true" : "false",
+ status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK ? "true" : "false");
kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr);
kbase_csf_ctx_handle_fault(kctx, fault);
@@ -558,9 +555,8 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
kbdev->as[i].bf_data.addr = 0ULL;
kbdev->as[i].pf_data.addr = 0ULL;
kbdev->as[i].gf_data.addr = 0ULL;
- kbdev->as[i].is_unresponsive = false;
- kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i);
+ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 0, i);
if (!kbdev->as[i].pf_wq)
return -ENOMEM;
diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
index d716ce0..5c774c2 100644
--- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
+++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c
@@ -322,9 +322,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
/* remember current mask */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
- new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+ new_mask = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
/* mask interrupts for now */
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
while (bf_bits | pf_bits) {
@@ -355,11 +355,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no);
/* find faulting address */
- fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
- AS_FAULTADDRESS_HI));
+ fault->addr = kbase_reg_read(kbdev,
+ MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_HI)));
fault->addr <<= 32;
- fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no,
- AS_FAULTADDRESS_LO));
+ fault->addr |= kbase_reg_read(
+ kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_LO)));
/* Mark the fault protected or not */
fault->protected_mode = kbdev->protected_mode;
@@ -372,13 +372,13 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
kbase_as_fault_debugfs_new(kbdev, as_no);
/* record the fault status */
- fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no,
- AS_FAULTSTATUS));
- fault->extra_addr = kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_HI));
+ fault->status =
+ kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTSTATUS)));
+ fault->extra_addr =
+ kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)));
fault->extra_addr <<= 32;
- fault->extra_addr |= kbase_reg_read(kbdev,
- MMU_AS_REG(as_no, AS_FAULTEXTRA_LO));
+ fault->extra_addr |=
+ kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)));
if (kbase_as_has_bus_fault(as, fault)) {
/* Mark bus fault as handled.
@@ -406,9 +406,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat)
/* reenable interrupts */
spin_lock_irqsave(&kbdev->mmu_mask_change, flags);
- tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK));
+ tmp = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK));
new_mask |= tmp;
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), new_mask);
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n",
@@ -429,9 +429,8 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i)
kbdev->as[i].number = i;
kbdev->as[i].bf_data.addr = 0ULL;
kbdev->as[i].pf_data.addr = 0ULL;
- kbdev->as[i].is_unresponsive = false;
- kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i);
+ kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 0, i);
if (!kbdev->as[i].pf_wq)
return -ENOMEM;
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index ccbd9c3..f8641a6 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -46,6 +46,7 @@
#if !MALI_USE_CSF
#include <mali_kbase_hwaccess_jm.h>
#endif
+#include <linux/version_compat_defs.h>
#include <mali_kbase_trace_gpu_mem.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
@@ -57,6 +58,11 @@
/* Macro to convert updated PDGs to flags indicating levels skip in flush */
#define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF)
+static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg, bool ignore_page_migration);
+
/* Small wrapper function to factor out GPU-dependent context releasing */
static void release_ctx(struct kbase_device *kbdev,
struct kbase_context *kctx)
@@ -201,7 +207,7 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as
mutex_lock(&kbdev->mmu_hw_mutex);
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
- if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
+ if (kbdev->pm.backend.gpu_ready && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -389,7 +395,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
lockdep_assert_held(&mmut->mmu_lock);
- if (!kbase_page_migration_enabled)
+ if (!kbase_is_page_migration_enabled())
return false;
spin_lock(&page_md->migrate_lock);
@@ -404,8 +410,10 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
page_md->status =
PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
}
+ } else if ((PAGE_STATUS_GET(page_md->status) == FREE_IN_PROGRESS) ||
+ (PAGE_STATUS_GET(page_md->status) == ALLOCATE_IN_PROGRESS)) {
+ /* Nothing to do - fall through */
} else {
- WARN_ON_ONCE(mmut->kctx);
WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
}
spin_unlock(&page_md->migrate_lock);
@@ -431,7 +439,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
* @pgd: Physical address of page directory to be freed.
*
* This function is supposed to be called with mmu_lock held and after
- * ensuring that GPU won't be able to access the page.
+ * ensuring that the GPU won't be able to access the page.
*/
static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
phys_addr_t pgd)
@@ -727,7 +735,7 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
break;
- case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+ case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE:
kbase_mmu_report_fault_and_kill(kctx, faulting_as,
"Execute Permission fault", fault);
break;
@@ -1293,10 +1301,11 @@ page_fault_retry:
* so the no_flush version of insert_pages is used which allows
* us to unlock the MMU as we see fit.
*/
- err = kbase_mmu_insert_pages_no_flush(
- kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
- &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags,
- region->gpu_alloc->group_id, &dirty_pgds, region, false);
+ err = mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
+ &kbase_get_gpu_phy_pages(region)[pfn_offset],
+ new_pages, region->flags,
+ region->gpu_alloc->group_id, &dirty_pgds, region,
+ false);
if (err) {
kbase_free_phy_pages_helper(region->gpu_alloc,
new_pages);
@@ -1480,7 +1489,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
if (!p)
return KBASE_MMU_INVALID_PGD_ADDRESS;
- page = kmap(p);
+ page = kbase_kmap(p);
+
if (page == NULL)
goto alloc_free;
@@ -1513,7 +1523,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
*/
kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE);
- kunmap(p);
+ kbase_kunmap(p, page);
return pgd;
alloc_free:
@@ -1553,7 +1563,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
vpfn &= 0x1FF;
p = pfn_to_page(PFN_DOWN(*pgd));
- page = kmap(p);
+ page = kbase_kmap(p);
if (page == NULL) {
dev_err(kbdev->dev, "%s: kmap failure", __func__);
return -EINVAL;
@@ -1562,7 +1572,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
vpfn);
- kunmap(p);
+ kbase_kunmap(p, page);
return -EFAULT;
} else {
target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
@@ -1570,7 +1580,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *
kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
}
- kunmap(p);
+ kbase_kunmap(p, page);
*pgd = target_pgd;
return 0;
@@ -1700,10 +1710,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
pgds[level] = pgd;
- page = kmap(p);
+ page = kbase_kmap(p);
if (mmu_mode->ate_is_valid(page[idx], level))
break; /* keep the mapping */
- kunmap(p);
+ kbase_kunmap(p, page);
pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx]));
p = phys_to_page(pgd);
@@ -1736,7 +1746,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
mmu_mode->entries_invalidate(&page[idx], pcount);
if (!num_of_valid_entries) {
- kunmap(p);
+ kbase_kunmap(p, page);
kbase_mmu_add_to_free_pgds_list(mmut, p);
@@ -1754,7 +1764,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount,
KBASE_MMU_OP_NONE);
- kunmap(p);
+ kbase_kunmap(p, page);
next:
vpfn += count;
}
@@ -1764,7 +1774,7 @@ next:
* going to happen to these pages at this stage. They might return
* movable once they are returned to a memory pool.
*/
- if (kbase_page_migration_enabled && !ignore_page_migration && phys) {
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) {
const u64 num_pages = to_vpfn - from_vpfn + 1;
u64 i;
@@ -1831,7 +1841,6 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
* The bottom PGD level.
* @insert_level: The level of MMU page table where the chain of newly allocated
* PGDs needs to be linked-in/inserted.
- * The top-most PDG level to be updated.
* @insert_vpfn: The virtual page frame number for the ATE.
* @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
* the physical addresses of newly allocated PGDs from index
@@ -1839,7 +1848,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
* insert_level.
*
* The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
- * at insert_level which already exists in the MMU Page Tables.Migration status is also
+ * at insert_level which already exists in the MMU Page Tables. Migration status is also
* updated for all the newly allocated PGD pages.
*
* Return:
@@ -1873,7 +1882,8 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
goto failure_recovery;
}
- parent_page_va = kmap(parent_page);
+ parent_page_va = kbase_kmap(parent_page);
+
if (unlikely(parent_page_va == NULL)) {
dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -EINVAL;
@@ -1886,7 +1896,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
- kunmap(parent_page);
+ kbase_kunmap(parent_page, parent_page_va);
if (parent_index != insert_level) {
/* Newly allocated PGDs */
@@ -1905,7 +1915,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table
}
/* Update the new target_pgd page to its stable state */
- if (kbase_page_migration_enabled) {
+ if (kbase_is_page_migration_enabled()) {
struct kbase_page_metadata *page_md =
kbase_page_private(phys_to_page(target_pgd));
@@ -1934,11 +1944,11 @@ failure_recovery:
for (; pgd_index < cur_level; pgd_index++) {
phys_addr_t pgd = pgds_to_insert[pgd_index];
struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
- u64 *pgd_page_va = kmap(pgd_page);
+ u64 *pgd_page_va = kbase_kmap(pgd_page);
u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
- kunmap(pgd_page);
+ kbase_kunmap(pgd_page, pgd_page_va);
}
return err;
@@ -2001,10 +2011,11 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta
return 0;
}
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
- struct tagged_addr phys, size_t nr, unsigned long flags,
- int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
- bool ignore_page_migration)
+static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
+ struct tagged_addr phys, size_t nr, unsigned long flags,
+ int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ bool ignore_page_migration)
{
phys_addr_t pgd;
u64 *pgd_page;
@@ -2034,7 +2045,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
/* If page migration is enabled, pages involved in multiple GPU mappings
* are always treated as not movable.
*/
- if (kbase_page_migration_enabled && !ignore_page_migration) {
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration) {
struct page *phys_page = as_page(phys);
struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
@@ -2099,7 +2110,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
}
p = pfn_to_page(PFN_DOWN(pgd));
- pgd_page = kmap(p);
+
+ pgd_page = kbase_kmap(p);
if (!pgd_page) {
dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
@@ -2147,14 +2159,14 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
- kunmap(p);
+ kbase_kunmap(p, pgd_page);
goto fail_unlock_free_pgds;
}
}
insert_vpfn += count;
remain -= count;
- kunmap(p);
+ kbase_kunmap(p, pgd_page);
}
rt_mutex_unlock(&mmut->mmu_lock);
@@ -2211,6 +2223,9 @@ static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
struct page *phys_page = as_page(phys);
struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
+
spin_lock(&page_md->migrate_lock);
/* If no GPU va region is given: the metadata provided are
@@ -2245,6 +2260,9 @@ static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
{
size_t i;
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return;
+
for (i = 0; i < requested_nr; i++) {
struct page *phys_page = as_page(phys[i]);
struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
@@ -2294,10 +2312,10 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
group_id, level, entry);
}
-int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
- unsigned long flags, int const group_id, u64 *dirty_pgds,
- struct kbase_va_region *reg, bool ignore_page_migration)
+static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg, bool ignore_page_migration)
{
phys_addr_t pgd;
u64 *pgd_page;
@@ -2378,7 +2396,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
}
p = pfn_to_page(PFN_DOWN(pgd));
- pgd_page = kmap(p);
+ pgd_page = kbase_kmap(p);
+
if (!pgd_page) {
dev_err(kbdev->dev, "%s: kmap failure", __func__);
err = -ENOMEM;
@@ -2415,7 +2434,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
/* If page migration is enabled, this is the right time
* to update the status of the page.
*/
- if (kbase_page_migration_enabled && !ignore_page_migration &&
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration &&
!is_huge(phys[i]) && !is_partial(phys[i]))
kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
insert_vpfn + i);
@@ -2450,7 +2469,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
- kunmap(p);
+ kbase_kunmap(p, pgd_page);
goto fail_unlock_free_pgds;
}
}
@@ -2458,7 +2477,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
phys += count;
insert_vpfn += count;
remain -= count;
- kunmap(p);
+ kbase_kunmap(p, pgd_page);
}
rt_mutex_unlock(&mmut->mmu_lock);
@@ -2485,6 +2504,23 @@ fail_unlock:
return err;
}
+int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int const group_id, u64 *dirty_pgds,
+ struct kbase_va_region *reg)
+{
+ int err;
+
+ /* Early out if there is nothing to do */
+ if (nr == 0)
+ return 0;
+
+ err = mmu_insert_pages_no_flush(kbdev, mmut, start_vpfn, phys, nr, flags, group_id,
+ dirty_pgds, reg, false);
+
+ return err;
+}
+
/*
* Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
* number 'as_nr'.
@@ -2492,7 +2528,7 @@ fail_unlock:
int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
- struct kbase_va_region *reg, bool ignore_page_migration)
+ struct kbase_va_region *reg)
{
int err;
u64 dirty_pgds = 0;
@@ -2501,8 +2537,8 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
if (nr == 0)
return 0;
- err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
- &dirty_pgds, reg, ignore_page_migration);
+ err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+ reg, false);
if (err)
return err;
@@ -2513,11 +2549,12 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
-int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- u64 vpfn, struct tagged_addr *phys, size_t nr,
- unsigned long flags, int as_nr, int const group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info,
- struct kbase_va_region *reg)
+int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int const group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg)
{
int err;
u64 dirty_pgds = 0;
@@ -2529,8 +2566,8 @@ int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu
/* Imported allocations don't have metadata and therefore always ignore the
* page migration logic.
*/
- err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
- &dirty_pgds, reg, true);
+ err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+ reg, true);
if (err)
return err;
@@ -2555,8 +2592,8 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_
/* Memory aliases are always built on top of existing allocations,
* therefore the state of physical pages shall be updated.
*/
- err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
- &dirty_pgds, reg, false);
+ err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds,
+ reg, false);
if (err)
return err;
@@ -2771,7 +2808,8 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
current_level--) {
phys_addr_t current_pgd = pgds[current_level];
struct page *p = phys_to_page(current_pgd);
- u64 *current_page = kmap(p);
+
+ u64 *current_page = kbase_kmap(p);
unsigned int current_valid_entries =
kbdev->mmu_mode->get_num_valid_entries(current_page);
int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
@@ -2783,7 +2821,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
if (current_valid_entries == 1 &&
current_level != MIDGARD_MMU_LEVEL(0)) {
- kunmap(p);
+ kbase_kunmap(p, current_page);
/* Ensure the cacheline containing the last valid entry
* of PGD is invalidated from the GPU cache, before the
@@ -2800,7 +2838,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
kbdev->mmu_mode->set_num_valid_entries(
current_page, current_valid_entries);
- kunmap(p);
+ kbase_kunmap(p, current_page);
kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64),
@@ -2856,7 +2894,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
for (i = 0; !flush_done && i < phys_page_nr; i++) {
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
- if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+ if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
KBASE_MMU_OP_FLUSH_MEM);
else
@@ -2897,7 +2935,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
phys_addr_t next_pgd;
index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
- page = kmap(p);
+ page = kbase_kmap(p);
if (mmu_mode->ate_is_valid(page[index], level))
break; /* keep the mapping */
else if (!mmu_mode->pte_is_valid(page[index], level)) {
@@ -2923,7 +2961,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
next_pgd = mmu_mode->pte_to_phy_addr(
kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index]));
- kunmap(p);
+ kbase_kunmap(p, page);
pgds[level] = pgd;
pgd = next_pgd;
p = phys_to_page(pgd);
@@ -2934,7 +2972,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
case MIDGARD_MMU_LEVEL(1):
dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
level);
- kunmap(p);
+ kbase_kunmap(p, page);
goto out;
case MIDGARD_MMU_LEVEL(2):
/* can only teardown if count >= 512 */
@@ -2972,7 +3010,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
mmu_mode->entries_invalidate(&page[index], pcount);
if (!num_of_valid_entries) {
- kunmap(p);
+ kbase_kunmap(p, page);
/* Ensure the cacheline(s) containing the last valid entries
* of PGD is invalidated from the GPU cache, before the
@@ -2998,17 +3036,48 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64),
flush_op);
next:
- kunmap(p);
- vpfn += count;
- nr -= count;
+ kbase_kunmap(p, page);
+ vpfn += count;
+ nr -= count;
}
out:
return 0;
}
-int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
- struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
- int as_nr, bool ignore_page_migration)
+/**
+ * mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
+ *
+ * @kbdev: Pointer to kbase device.
+ * @mmut: Pointer to GPU MMU page table.
+ * @vpfn: Start page frame number of the GPU virtual pages to unmap.
+ * @phys: Array of physical pages currently mapped to the virtual
+ * pages to unmap, or NULL. This is used for GPU cache maintenance
+ * and page migration support.
+ * @nr_phys_pages: Number of physical pages to flush.
+ * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
+ * @as_nr: Address space number, for GPU cache maintenance operations
+ * that happen outside a specific kbase context.
+ * @ignore_page_migration: Whether page migration metadata should be ignored.
+ *
+ * We actually discard the ATE and free the page table pages if no valid entries
+ * exist in the PGD.
+ *
+ * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
+ * currently scheduled into the runpool, and so potentially uses a lot of locks.
+ * These locks must be taken in the correct order with respect to others
+ * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
+ * information.
+ *
+ * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
+ * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
+ * GPU cache maintenance will be done as usual; that is, invalidating the whole GPU caches
+ * instead of specific physical address ranges.
+ *
+ * Return: 0 on success, otherwise an error code.
+ */
+static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr, bool ignore_page_migration)
{
u64 start_vpfn = vpfn;
enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
@@ -3089,7 +3158,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
* updated before releasing the lock to protect against concurrent
* requests to migrate the pages, if they have been isolated.
*/
- if (kbase_page_migration_enabled && phys && !ignore_page_migration)
+ if (kbase_is_page_migration_enabled() && phys && !ignore_page_migration)
kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
kbase_mmu_free_pgds_list(kbdev, mmut);
@@ -3098,7 +3167,22 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
return err;
}
-KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
+
+int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
+ int as_nr)
+{
+ return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
+ false);
+}
+
+int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
+ size_t nr_virt_pages, int as_nr)
+{
+ return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
+ true);
+}
/**
* kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
@@ -3162,7 +3246,7 @@ int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
goto fail_unlock;
p = pfn_to_page(PFN_DOWN(pgd));
- pgd_page = kmap(p);
+ pgd_page = kbase_kmap(p);
if (!pgd_page) {
dev_warn(kbdev->dev, "kmap failure on update_pages");
err = -ENOMEM;
@@ -3217,7 +3301,7 @@ int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
vpfn += count;
nr -= count;
- kunmap(p);
+ kbase_kunmap(p, pgd_page);
}
rt_mutex_unlock(&mmut->mmu_lock);
@@ -3339,6 +3423,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
unsigned int num_of_valid_entries;
u8 vmap_count = 0;
+ /* If page migration support is not compiled in, return with fault */
+ if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT))
+ return -EINVAL;
/* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
* here we skip the no kctx case, which is only used with MCU's mmut.
*/
@@ -3356,21 +3443,21 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
/* Create all mappings before copying content.
- * This is done as early as possible because is the only operation that may
+ * This is done as early as possible because it is the only operation that may
* fail. It is possible to do this before taking any locks because the
* pages to migrate are not going to change and even the parent PGD is not
* going to be affected by any other concurrent operation, since the page
* has been isolated before migration and therefore it cannot disappear in
* the middle of this function.
*/
- old_page = kmap(as_page(old_phys));
+ old_page = kbase_kmap(as_page(old_phys));
if (!old_page) {
dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
ret = -EINVAL;
goto old_page_map_error;
}
- new_page = kmap(as_page(new_phys));
+ new_page = kbase_kmap(as_page(new_phys));
if (!new_page) {
dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
ret = -EINVAL;
@@ -3457,14 +3544,13 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
goto get_pgd_at_level_error;
}
- pgd_page = kmap(phys_to_page(pgd));
+ pgd_page = kbase_kmap(phys_to_page(pgd));
if (!pgd_page) {
dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
ret = -EINVAL;
goto pgd_page_map_error;
}
- rt_mutex_lock(&kbdev->pm.lock);
mutex_lock(&kbdev->mmu_hw_mutex);
/* Lock MMU region and flush GPU cache by using GPU control,
@@ -3475,14 +3561,13 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
/* Defer the migration as L2 is in a transitional phase */
spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
mutex_unlock(&kbdev->mmu_hw_mutex);
- rt_mutex_unlock(&kbdev->pm.lock);
dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
ret = -EAGAIN;
goto l2_state_defer_out;
}
/* Prevent transitional phases in L2 by starting the transaction */
mmu_page_migration_transaction_begin(kbdev);
- if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+ if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
int as_nr = mmut->kctx->as_nr;
struct kbase_as *as = &kbdev->as[as_nr];
@@ -3498,7 +3583,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
if (ret < 0) {
mutex_unlock(&kbdev->mmu_hw_mutex);
- rt_mutex_unlock(&kbdev->pm.lock);
dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
goto undo_mappings;
}
@@ -3574,7 +3658,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
* won't have any effect on them.
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
- if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
+ if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) {
int as_nr = mmut->kctx->as_nr;
struct kbase_as *as = &kbdev->as[as_nr];
@@ -3590,7 +3674,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
/* Releasing locks before checking the migration transaction error state */
mutex_unlock(&kbdev->mmu_hw_mutex);
- rt_mutex_unlock(&kbdev->pm.lock);
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
/* Release the transition prevention in L2 by ending the transaction */
@@ -3623,24 +3706,24 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p
set_page_private(as_page(old_phys), 0);
l2_state_defer_out:
- kunmap(phys_to_page(pgd));
+ kbase_kunmap(phys_to_page(pgd), pgd_page);
pgd_page_map_error:
get_pgd_at_level_error:
page_state_change_out:
rt_mutex_unlock(&mmut->mmu_lock);
- kunmap(as_page(new_phys));
+ kbase_kunmap(as_page(new_phys), new_page);
new_page_map_error:
- kunmap(as_page(old_phys));
+ kbase_kunmap(as_page(old_phys), old_page);
old_page_map_error:
return ret;
undo_mappings:
/* Unlock the MMU table and undo mappings. */
rt_mutex_unlock(&mmut->mmu_lock);
- kunmap(phys_to_page(pgd));
- kunmap(as_page(new_phys));
- kunmap(as_page(old_phys));
+ kbase_kunmap(phys_to_page(pgd), pgd_page);
+ kbase_kunmap(as_page(new_phys), new_page);
+ kbase_kunmap(as_page(old_phys), old_page);
return ret;
}
@@ -3657,7 +3740,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
lockdep_assert_held(&mmut->mmu_lock);
- pgd_page = kmap_atomic(p);
+ pgd_page = kbase_kmap_atomic(p);
/* kmap_atomic should NEVER fail. */
if (WARN_ON_ONCE(pgd_page == NULL))
return;
@@ -3673,11 +3756,11 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
* there are no pages left mapped on the GPU for a context. Hence the count
* of valid entries is expected to be zero here.
*/
- if (kbase_page_migration_enabled && mmut->kctx)
+ if (kbase_is_page_migration_enabled() && mmut->kctx)
WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
/* Invalidate page after copying */
mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
- kunmap_atomic(pgd_page);
+ kbase_kunmap_atomic(pgd_page);
pgd_page = pgd_page_buffer;
if (level < MIDGARD_MMU_BOTTOMLEVEL) {
@@ -3696,6 +3779,24 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl
kbase_mmu_free_pgd(kbdev, mmut, pgd);
}
+static void kbase_mmu_mark_non_movable(struct page *page)
+{
+ struct kbase_page_metadata *page_md;
+
+ if (!kbase_is_page_migration_enabled())
+ return;
+
+ page_md = kbase_page_private(page);
+
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
+
+ if (IS_PAGE_MOVABLE(page_md->status))
+ page_md->status = PAGE_MOVABLE_CLEAR(page_md->status);
+
+ spin_unlock(&page_md->migrate_lock);
+}
+
int kbase_mmu_init(struct kbase_device *const kbdev,
struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
int const group_id)
@@ -3729,11 +3830,10 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
return -ENOMEM;
}
- rt_mutex_lock(&mmut->mmu_lock);
mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
- rt_mutex_unlock(&mmut->mmu_lock);
}
+ kbase_mmu_mark_non_movable(pfn_to_page(PFN_DOWN(mmut->pgd)));
return 0;
}
@@ -3769,7 +3869,7 @@ void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *
spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
- kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
+ kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0))
mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
#endif
@@ -3794,7 +3894,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
kbdev = kctx->kbdev;
mmu_mode = kbdev->mmu_mode;
- pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
+ pgd_page = kbase_kmap(pfn_to_page(PFN_DOWN(pgd)));
if (!pgd_page) {
dev_warn(kbdev->dev, "%s: kmap failure", __func__);
return 0;
@@ -3829,7 +3929,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
target_pgd, level + 1,
buffer, size_left);
if (!dump_size) {
- kunmap(pfn_to_page(PFN_DOWN(pgd)));
+ kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
return 0;
}
size += dump_size;
@@ -3837,7 +3937,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
}
}
- kunmap(pfn_to_page(PFN_DOWN(pgd)));
+ kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page);
return size;
}
diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h
index 861a5f4..e13e9b9 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.h
+++ b/mali_kbase/mmu/mali_kbase_mmu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,8 +36,8 @@ struct kbase_va_region;
* A pointer to this type is passed down from the outer-most callers in the kbase
* module - where the information resides as to the synchronous / asynchronous
* nature of the call flow, with respect to MMU operations. ie - does the call flow relate to
- * existing GPU work does it come from requests (like ioctl) from user-space, power management,
- * etc.
+ * existing GPU work or does it come from requests (like ioctl) from user-space, power
+ * management, etc.
*
* @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice
* of a 'valid' value
@@ -154,25 +154,43 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev,
int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
u64 vpfn, struct tagged_addr *phys, size_t nr,
unsigned long flags, int group_id, u64 *dirty_pgds,
- struct kbase_va_region *reg, bool ignore_page_migration);
+ struct kbase_va_region *reg);
int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
- struct kbase_va_region *reg, bool ignore_page_migration);
-int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
- u64 vpfn, struct tagged_addr *phys, size_t nr,
- unsigned long flags, int as_nr, int group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info,
- struct kbase_va_region *reg);
+ struct kbase_va_region *reg);
+
+/**
+ * kbase_mmu_insert_pages_skip_status_update - Map 'nr' pages pointed to by 'phys'
+ * at GPU PFN 'vpfn' for GPU address space number 'as_nr'.
+ *
+ * @kbdev: Instance of GPU platform device, allocated from the probe method.
+ * @mmut: GPU page tables.
+ * @vpfn: Start page frame number of the GPU virtual pages to map.
+ * @phys: Physical address of the page to be mapped.
+ * @nr: The number of pages to map.
+ * @flags: Bitmask of attributes of the GPU memory region being mapped.
+ * @as_nr: The GPU address space number.
+ * @group_id: The physical memory group in which the page was allocated.
+ * @mmu_sync_info: MMU-synchronous caller info.
+ * @reg: The region whose physical allocation is to be mapped.
+ *
+ * Similar to kbase_mmu_insert_pages() but skips updating each pages metadata
+ * for page migration.
+ *
+ * Return: 0 if successful, otherwise a negative error code.
+ */
+int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
+ struct kbase_mmu_table *mmut, u64 vpfn,
+ struct tagged_addr *phys, size_t nr,
+ unsigned long flags, int as_nr, int group_id,
+ enum kbase_caller_mmu_sync_info mmu_sync_info,
+ struct kbase_va_region *reg);
int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
u64 vpfn, struct tagged_addr *phys, size_t nr,
unsigned long flags, int as_nr, int group_id,
enum kbase_caller_mmu_sync_info mmu_sync_info,
struct kbase_va_region *reg);
-int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys,
- size_t nr, unsigned long flags, int group_id,
- enum kbase_caller_mmu_sync_info mmu_sync_info,
- bool ignore_page_migration);
int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr phys, size_t nr, unsigned long flags,
int group_id,
@@ -182,40 +200,16 @@ int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
int group_id,
enum kbase_caller_mmu_sync_info mmu_sync_info);
-/**
- * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table
- *
- * @kbdev: Pointer to kbase device.
- * @mmut: Pointer to GPU MMU page table.
- * @vpfn: Start page frame number of the GPU virtual pages to unmap.
- * @phys: Array of physical pages currently mapped to the virtual
- * pages to unmap, or NULL. This is used for GPU cache maintenance
- * and page migration support.
- * @nr_phys_pages: Number of physical pages to flush.
- * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed.
- * @as_nr: Address space number, for GPU cache maintenance operations
- * that happen outside a specific kbase context.
- * @ignore_page_migration: Whether page migration metadata should be ignored.
- *
- * We actually discard the ATE and free the page table pages if no valid entries
- * exist in PGD.
- *
- * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is
- * currently scheduled into the runpool, and so potentially uses a lot of locks.
- * These locks must be taken in the correct order with respect to others
- * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more
- * information.
- *
- * The @p phys pointer to physical pages is not necessary for unmapping virtual memory,
- * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL,
- * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches
- * instead of specific physical address ranges.
- *
- * Return: 0 on success, otherwise an error code.
- */
int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
- int as_nr, bool ignore_page_migration);
+ int as_nr);
+int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
+ u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
+ size_t nr_virt_pages, int as_nr);
+#define kbase_mmu_teardown_firmware_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, \
+ as_nr) \
+ kbase_mmu_teardown_imported_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, \
+ as_nr)
int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn,
struct tagged_addr *phys, size_t nr,
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h
index d53f928..49e050e 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw.h
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -58,7 +58,7 @@ enum kbase_mmu_fault_type {
* struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions
* @vpfn: MMU Virtual Page Frame Number to start the operation on.
* @nr: Number of pages to work on.
- * @op: Operation type (written to ASn_COMMAND).
+ * @op: Operation type (written to AS_COMMAND).
* @kctx_id: Kernel context ID for MMU command tracepoint.
* @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops.
* @flush_skip_levels: Page table levels to skip flushing. (Only
diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
index ecfa23d..ca9f060 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c
@@ -170,10 +170,10 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr,
static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
{
const ktime_t wait_loop_start = ktime_get_raw();
- const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms;
+ const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms;
s64 diff;
- if (unlikely(kbdev->as[as_nr].is_unresponsive))
+ if (unlikely(kbdev->mmu_unresponsive))
return -EBUSY;
do {
@@ -181,7 +181,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
for (i = 0; i < 1000; i++) {
/* Wait for the MMU status to indicate there is no active command */
- if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) &
+ if (!(kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_STATUS))) &
AS_STATUS_AS_ACTIVE))
return 0;
}
@@ -192,7 +192,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr)
dev_err(kbdev->dev,
"AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system",
as_nr);
- kbdev->as[as_nr].is_unresponsive = true;
+ kbdev->mmu_unresponsive = true;
if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
kbase_reset_gpu_locked(kbdev);
@@ -205,7 +205,7 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd)
const int status = wait_ready(kbdev, as_nr);
if (likely(status == 0))
- kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd);
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_COMMAND)), cmd);
else if (status == -EBUSY) {
dev_dbg(kbdev->dev,
"Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u",
@@ -277,9 +277,8 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c
* the workaround can be safely skipped.
*/
if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) {
- if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) {
- dev_warn(kbdev->dev,
- "Unexpected mmu command received");
+ if (unlikely(*mmu_cmd != AS_COMMAND_FLUSH_MEM)) {
+ dev_warn(kbdev->dev, "Unexpected MMU command(%u) received", *mmu_cmd);
return -EINVAL;
}
@@ -341,19 +340,18 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as)
transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS);
}
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO),
- transcfg);
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI),
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSCFG_LO)), transcfg);
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSCFG_HI)),
(transcfg >> 32) & 0xFFFFFFFFUL);
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO),
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSTAB_LO)),
current_setup->transtab & 0xFFFFFFFFUL);
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI),
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSTAB_HI)),
(current_setup->transtab >> 32) & 0xFFFFFFFFUL);
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO),
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_MEMATTR_LO)),
current_setup->memattr & 0xFFFFFFFFUL);
- kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI),
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_MEMATTR_HI)),
(current_setup->memattr >> 32) & 0xFFFFFFFFUL);
KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as,
@@ -401,9 +399,9 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock
if (!ret) {
/* Set the region that needs to be updated */
- kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO),
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_LOCKADDR_LO)),
*lock_addr & 0xFFFFFFFFUL);
- kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI),
+ kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_LOCKADDR_HI)),
(*lock_addr >> 32) & 0xFFFFFFFFUL);
}
return ret;
@@ -490,9 +488,11 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as *
if (likely(!ret)) {
u64 lock_addr = 0x0;
/* read MMU_AS_CONTROL.LOCKADDR register */
- lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI))
+ lock_addr |= (u64)kbase_reg_read(
+ kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_LOCKADDR_HI)))
<< 32;
- lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO));
+ lock_addr |= (u64)kbase_reg_read(
+ kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_LOCKADDR_LO)));
mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK,
lock_addr, op_param->mmu_sync_info);
@@ -572,8 +572,14 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as,
ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number);
}
- if (ret)
- return ret;
+ if (ret) {
+ dev_warn(
+ kbdev->dev,
+ "Failed to apply WA for HW issue when doing MMU flush op on VA range %llx-%llx for AS %u",
+ op_param->vpfn << PAGE_SHIFT,
+ ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1, as->number);
+ /* Continue with the MMU flush operation */
+ }
}
#endif
@@ -664,7 +670,7 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as,
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
pf_bf_mask |= MMU_BUS_ERROR(as->number);
#endif
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), pf_bf_mask);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
@@ -688,15 +694,15 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as,
if (kbdev->irq_reset_flush)
goto unlock;
- irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) |
- MMU_PAGE_FAULT(as->number);
+ irq_mask =
+ kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)) | MMU_PAGE_FAULT(as->number);
#if !MALI_USE_CSF
if (type == KBASE_MMU_FAULT_TYPE_BUS ||
type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED)
irq_mask |= MMU_BUS_ERROR(as->number);
#endif
- kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask);
+ kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), irq_mask);
unlock:
spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags);
diff --git a/mali_kbase/platform/Kconfig b/mali_kbase/platform/Kconfig
index de4203c..b190e26 100644
--- a/mali_kbase/platform/Kconfig
+++ b/mali_kbase/platform/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
# Add your platform specific Kconfig file here
#
-# "drivers/gpu/arm/midgard/platform/xxx/Kconfig"
+# "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/platform/xxx/Kconfig"
#
# Where xxx is the platform name is the name set in MALI_PLATFORM_NAME
#
diff --git a/mali_kbase/platform/meson/mali_kbase_config_platform.h b/mali_kbase/platform/meson/mali_kbase_config_platform.h
index 06279e2..866a7de 100644
--- a/mali_kbase/platform/meson/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/meson/mali_kbase_config_platform.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2017, 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -20,7 +20,7 @@
*/
/**
- * Power management configuration
+ * POWER_MANAGEMENT_CALLBACKS - Power management configuration
*
* Attached value: pointer to @ref kbase_pm_callback_conf
* Default value: See @ref kbase_pm_callback_conf
@@ -28,7 +28,7 @@
#define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks)
/**
- * Platform specific configuration functions
+ * PLATFORM_FUNCS - Platform specific configuration functions
*
* Attached value: pointer to @ref kbase_platform_funcs_conf
* Default value: See @ref kbase_platform_funcs_conf
@@ -38,7 +38,7 @@
extern struct kbase_pm_callback_conf pm_callbacks;
/**
- * Autosuspend delay
+ * AUTO_SUSPEND_DELAY - Autosuspend delay
*
* The delay time (in milliseconds) to be used for autosuspend
*/
diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
index c65e6ce..75f3c2a 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c
@@ -119,7 +119,7 @@ static void get_fw_trace(struct kbase_device *kbdev, struct sscd_segment *seg)
.version = 1,
};
- tb = kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME);
+ tb = kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME);
if (tb == NULL) {
dev_err(kbdev->dev, "pixel: failed to open firmware trace buffer");
diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild
index 38e4dd4..72ca70a 100644
--- a/mali_kbase/tests/Kbuild
+++ b/mali_kbase/tests/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -17,6 +17,7 @@
# http://www.gnu.org/licenses/gpl-2.0.html.
#
#
+src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src))
ccflags-y += -I$(src)/include \
-I$(src)
@@ -29,3 +30,4 @@ obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/
obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/
obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/
+
diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig
index e6f0376..f100901 100644
--- a/mali_kbase/tests/Kconfig
+++ b/mali_kbase/tests/Kconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2017, 2020-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -65,5 +65,6 @@ config MALI_KUTF_MGM_INTEGRATION_TEST
- mali_kutf_mgm_integration_test.ko
+
comment "Enable MALI_DEBUG for KUTF modules support"
depends on MALI_MIDGARD && !MALI_DEBUG && MALI_KUTF
diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig
index d81c639..aa09274 100644
--- a/mali_kbase/tests/Mconfig
+++ b/mali_kbase/tests/Mconfig
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -65,6 +65,7 @@ config MALI_KUTF_MGM_INTEGRATION_TEST
- mali_kutf_mgm_integration_test.ko
+
# Enable MALI_DEBUG for KUTF modules support
config UNIT_TEST_KERNEL_MODULES
diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
index a6f54b6..8b86fb0 100644
--- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
+++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -442,8 +442,9 @@ static const char *kutf_clk_trace_do_get_platform(
#if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF)
struct kutf_clk_rate_trace_fixture_data *data = context->fixture;
- arbiter_if_node =
- of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL);
+ arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter-if", NULL);
+ if (!arbiter_if_node)
+ arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL);
#endif
if (arbiter_if_node) {
power_node = of_find_compatible_node(NULL, NULL,
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index 1e636b9..20f7496 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -303,8 +303,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
* is no free region at the address found originally by too large a
* same_va_end_addr here, and will fail the allocation gracefully.
*/
- struct kbase_reg_zone *zone =
- kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, SAME_VA_ZONE);
u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
@@ -386,7 +385,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
#ifndef CONFIG_64BIT
} else {
return current->mm->get_unmapped_area(
- kctx->filp, addr, len, pgoff, flags);
+ kctx->kfile->filp, addr, len, pgoff, flags);
#endif
}