diff options
author | Jörg Wagner <jorwag@google.com> | 2023-08-31 19:15:13 +0000 |
---|---|---|
committer | Jörg Wagner <jorwag@google.com> | 2023-09-01 09:13:55 +0000 |
commit | b6fd708b3a4da86a196a61592ea3585f1aca7313 (patch) | |
tree | 1cbe3029a45bf9869c17a5b6954e5ae074b44ac8 | |
parent | 46edf1b5965d872c5f8a09c6dc3dcbff58f78a92 (diff) | |
parent | e61eb93296e9f940b32d4ad4b0c3a5557cbeaf17 (diff) | |
download | gpu-b6fd708b3a4da86a196a61592ea3585f1aca7313.tar.gz |
Merge r44p1-00dev3 from partner/upstream into android13-gs-pixel-5.10-udc-qpr1
Bug: 290882327
Change-Id: I90723cbaa3f294431087587fd8025f0688e51bf2
129 files changed, 7422 insertions, 3458 deletions
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h index c9b1f62..47551f2 100644 --- a/common/include/linux/version_compat_defs.h +++ b/common/include/linux/version_compat_defs.h @@ -23,6 +23,21 @@ #define _VERSION_COMPAT_DEFS_H_ #include <linux/version.h> +#include <linux/highmem.h> +#include <linux/timer.h> + +#if (KERNEL_VERSION(4, 4, 267) < LINUX_VERSION_CODE) +#include <linux/overflow.h> +#endif + +#include <linux/bitops.h> +#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) +#include <linux/bits.h> +#endif + +#ifndef BITS_PER_TYPE +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#endif #if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE typedef unsigned int __poll_t; @@ -62,18 +77,167 @@ typedef unsigned int __poll_t; /* Replace the default definition with CONFIG_LSM_MMAP_MIN_ADDR */ #undef kbase_mmap_min_addr #define kbase_mmap_min_addr CONFIG_LSM_MMAP_MIN_ADDR -#pragma message "kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update!" +#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \ + "* MALI kbase_mmap_min_addr compiled to CONFIG_LSM_MMAP_MIN_ADDR, no runtime update possible! *" #endif /* (CONFIG_LSM_MMAP_MIN_ADDR > CONFIG_DEFAULT_MMAP_MIN_ADDR) */ #endif /* CONFIG_LSM_MMAP_MIN_ADDR */ #if (kbase_mmap_min_addr == CONFIG_DEFAULT_MMAP_MIN_ADDR) -#pragma message "kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update!" +#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \ + "* MALI kbase_mmap_min_addr compiled to CONFIG_DEFAULT_MMAP_MIN_ADDR, no runtime update possible! *" #endif #else /* CONFIG_MMU */ #define kbase_mmap_min_addr (0UL) -#pragma message "kbase_mmap_min_addr compiled to (0UL), no runtime update!" +#define KBASE_COMPILED_MMAP_MIN_ADDR_MSG \ + "* MALI kbase_mmap_min_addr compiled to (0UL), no runtime update possible! *" #endif /* CONFIG_MMU */ #endif /* KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE */ +static inline void kbase_timer_setup(struct timer_list *timer, + void (*callback)(struct timer_list *timer)) +{ +#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE + setup_timer(timer, (void (*)(unsigned long))callback, (unsigned long)timer); +#else + timer_setup(timer, callback, 0); +#endif +} + +#ifndef WRITE_ONCE +#ifdef ASSIGN_ONCE +#define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) +#else +#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) +#endif +#endif + +#ifndef READ_ONCE +#define READ_ONCE(x) ACCESS_ONCE(x) +#endif + +static inline void *kbase_kmap(struct page *p) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + return kmap_local_page(p); +#else + return kmap(p); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +static inline void *kbase_kmap_atomic(struct page *p) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + return kmap_local_page(p); +#else + return kmap_atomic(p); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +static inline void kbase_kunmap(struct page *p, void *address) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + kunmap_local(address); +#else + kunmap(p); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +static inline void kbase_kunmap_atomic(void *address) +{ +#if KERNEL_VERSION(5, 11, 0) <= LINUX_VERSION_CODE + kunmap_local(address); +#else + kunmap_atomic(address); +#endif /* KERNEL_VERSION(5, 11, 0) */ +} + +/* Some of the older 4.4 kernel patch versions do + * not contain the overflow check functions. However, + * they are based on compiler instrinsics, so they + * are simple to reproduce. + */ +#if (KERNEL_VERSION(4, 4, 267) >= LINUX_VERSION_CODE) +/* Some of the older 4.4 kernel patch versions do + * not contain the overflow check functions. However, + * they are based on compiler instrinsics, so they + * are simple to reproduce. + */ +#define check_mul_overflow(a, b, d) __builtin_mul_overflow(a, b, d) +#endif + +/* + * There was a big rename in the 4.10 kernel (fence* -> dma_fence*), + * with most of the related functions keeping the same signatures. + */ + +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + +#include <linux/fence.h> + +#define dma_fence fence +#define dma_fence_ops fence_ops +#define dma_fence_context_alloc(a) fence_context_alloc(a) +#define dma_fence_init(a, b, c, d, e) fence_init(a, b, c, d, e) +#define dma_fence_get(a) fence_get(a) +#define dma_fence_put(a) fence_put(a) +#define dma_fence_signal(a) fence_signal(a) +#define dma_fence_is_signaled(a) fence_is_signaled(a) +#define dma_fence_add_callback(a, b, c) fence_add_callback(a, b, c) +#define dma_fence_remove_callback(a, b) fence_remove_callback(a, b) +#define dma_fence_default_wait fence_default_wait + +#if (KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->error ?: 1 : 0) +#else +#define dma_fence_get_status(a) (fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#endif + +#else + +#include <linux/dma-fence.h> + +#if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) +#define dma_fence_get_status(a) (dma_fence_is_signaled(a) ? (a)->status ?: 1 : 0) +#endif + +#endif /* < 4.10.0 */ + +static inline void dma_fence_set_error_helper( +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence, +#else + struct dma_fence *fence, +#endif + int error) +{ +#if (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) + dma_fence_set_error(fence, error); +#elif (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ + KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) + fence_set_error(fence, error); +#else + fence->status = error; +#endif +} + +#include <linux/mm.h> +#if !((KERNEL_VERSION(6, 3, 0) <= LINUX_VERSION_CODE) || \ + ((KERNEL_VERSION(6, 1, 25) <= LINUX_VERSION_CODE) && defined(__ANDROID_COMMON_KERNEL__))) +static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) +{ + vma->vm_flags |= flags; +} +static inline void vm_flags_clear(struct vm_area_struct *vma, vm_flags_t flags) +{ + vma->vm_flags &= ~flags; +} +#endif + +#if (KERNEL_VERSION(6, 4, 0) <= LINUX_VERSION_CODE) +#define KBASE_CLASS_CREATE(owner, name) class_create(name) +#else +#define KBASE_CLASS_CREATE(owner, name) class_create(owner, name) +#endif + #endif /* _VERSION_COMPAT_DEFS_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h index c6f6ff1..a8e5802 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h @@ -177,7 +177,7 @@ enum base_kcpu_command_type { BASE_KCPU_COMMAND_TYPE_JIT_ALLOC, BASE_KCPU_COMMAND_TYPE_JIT_FREE, BASE_KCPU_COMMAND_TYPE_GROUP_SUSPEND, - BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER + BASE_KCPU_COMMAND_TYPE_ERROR_BARRIER, }; /** diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h index 7c37cfc..c9de5fd 100644 --- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h @@ -82,10 +82,18 @@ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE * before allocating GPU memory for the context. * - CPU mappings of USER_BUFFER imported memory handles must be cached. + * 1.19: + * - Add NE support in queue_group_create IOCTL fields + * - Previous version retained as KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 for + * backward compatibility. + * 1.20: + * - Restrict child process from doing supported file operations (like mmap, ioctl, + * read, poll) on the file descriptor of mali device file that was inherited + * from the parent process. */ #define BASE_UK_VERSION_MAJOR 1 -#define BASE_UK_VERSION_MINOR 18 +#define BASE_UK_VERSION_MINOR 20 /** * struct kbase_ioctl_version_check - Check version compatibility between @@ -258,6 +266,56 @@ union kbase_ioctl_cs_queue_group_create_1_6 { _IOWR(KBASE_IOCTL_TYPE, 42, union kbase_ioctl_cs_queue_group_create_1_6) /** + * union kbase_ioctl_cs_queue_group_create_1_18 - Create a GPU command queue group + * @in: Input parameters + * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. + * @in.fragment_mask: Mask of fragment endpoints the group is allowed to use. + * @in.compute_mask: Mask of compute endpoints the group is allowed to use. + * @in.cs_min: Minimum number of CSs required. + * @in.priority: Queue group's priority within a process. + * @in.tiler_max: Maximum number of tiler endpoints the group is allowed + * to use. + * @in.fragment_max: Maximum number of fragment endpoints the group is + * allowed to use. + * @in.compute_max: Maximum number of compute endpoints the group is allowed + * to use. + * @in.csi_handlers: Flags to signal that the application intends to use CSI + * exception handlers in some linear buffers to deal with + * the given exception types. + * @in.padding: Currently unused, must be zero + * @out: Output parameters + * @out.group_handle: Handle of a newly created queue group. + * @out.padding: Currently unused, must be zero + * @out.group_uid: UID of the queue group available to base. + */ +union kbase_ioctl_cs_queue_group_create_1_18 { + struct { + __u64 tiler_mask; + __u64 fragment_mask; + __u64 compute_mask; + __u8 cs_min; + __u8 priority; + __u8 tiler_max; + __u8 fragment_max; + __u8 compute_max; + __u8 csi_handlers; + __u8 padding[2]; + /** + * @in.dvs_buf: buffer for deferred vertex shader + */ + __u64 dvs_buf; + } in; + struct { + __u8 group_handle; + __u8 padding[3]; + __u32 group_uid; + } out; +}; + +#define KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18 \ + _IOWR(KBASE_IOCTL_TYPE, 58, union kbase_ioctl_cs_queue_group_create_1_18) + +/** * union kbase_ioctl_cs_queue_group_create - Create a GPU command queue group * @in: Input parameters * @in.tiler_mask: Mask of tiler endpoints the group is allowed to use. @@ -291,11 +349,15 @@ union kbase_ioctl_cs_queue_group_create { __u8 fragment_max; __u8 compute_max; __u8 csi_handlers; - __u8 padding[2]; + /** + * @in.reserved: Reserved, currently unused, must be zero. + */ + __u16 reserved; /** * @in.dvs_buf: buffer for deferred vertex shader */ __u64 dvs_buf; + __u64 padding[9]; } in; struct { __u8 group_handle; diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h index 0ca5d90..eaa4b2d 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,11 +22,6 @@ #ifndef _UAPI_KBASE_GPU_REGMAP_CSF_H_ #define _UAPI_KBASE_GPU_REGMAP_CSF_H_ -/* IPA control registers */ -#define IPA_CONTROL_BASE 0x40000 -#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) -#define STATUS 0x004 /* (RO) Status register */ - /* USER base address */ #define USER_BASE 0x0010000 #define USER_REG(r) (USER_BASE + (r)) diff --git a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h index 9bfd6d2..d24afcc 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,29 +22,4 @@ #ifndef _UAPI_KBASE_GPU_REGMAP_JM_H_ #define _UAPI_KBASE_GPU_REGMAP_JM_H_ -/* GPU control registers */ - -#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest clean-and-invalidate operation */ - -/* Job control registers */ - -#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ -#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ -#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ -#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ -#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ -#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ -#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ - -#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ -#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ -#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ -#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ -#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ -#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ - -#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ - -#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) - #endif /* _UAPI_KBASE_GPU_REGMAP_JM_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h index 1f33167..8256191 100644 --- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h +++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,71 +28,4 @@ #include "backend/mali_kbase_gpu_regmap_jm.h" #endif /* !MALI_USE_CSF */ -/* Begin Register Offsets */ -/* GPU control registers */ - -#define GPU_CONTROL_BASE 0x0000 -#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) - -#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ - -#define GPU_IRQ_CLEAR 0x024 /* (WO) */ -#define GPU_IRQ_STATUS 0x02C /* (RO) */ - -#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ -#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ - -#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ -#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ - -#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ -#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ - -#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ -#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ - -#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ -#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ - -#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ -#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ - -/* Job control registers */ - -#define JOB_CONTROL_BASE 0x1000 - -#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) - -#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ -#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ -#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ - -/* MMU control registers */ - -#define MEMORY_MANAGEMENT_BASE 0x2000 - -#define MMU_REG(r) (MEMORY_MANAGEMENT_BASE + (r)) - -#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ -#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ -#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ -#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ - -#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ - -/* MMU address space control registers */ - -#define MMU_AS_REG(n, r) (MMU_REG(MMU_AS0 + ((n) << 6)) + (r)) - -#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ -#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ -#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ -#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ -#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ - -/* (RW) Translation table configuration for address space n, low word */ -#define AS_TRANSCFG_LO 0x30 -/* (RW) Translation table configuration for address space n, high word */ -#define AS_TRANSCFG_HI 0x34 - #endif /* _UAPI_KBASE_GPU_REGMAP_H_ */ diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h index ac6affe..f2329f9 100644 --- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h +++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h @@ -143,9 +143,14 @@ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE * before allocating GPU memory for the context. * - CPU mappings of USER_BUFFER imported memory handles must be cached. + * 11.39: + * - Restrict child process from doing supported file operations (like mmap, ioctl, + * read, poll) on the file descriptor of mali device file that was inherited + * from the parent process. */ + #define BASE_UK_VERSION_MAJOR 11 -#define BASE_UK_VERSION_MINOR 38 +#define BASE_UK_VERSION_MINOR 39 /** * struct kbase_ioctl_version_check - Check version compatibility between diff --git a/mali_kbase/BUILD.bazel b/mali_kbase/BUILD.bazel index e38f617..54dd437 100644 --- a/mali_kbase/BUILD.bazel +++ b/mali_kbase/BUILD.bazel @@ -1,27 +1,45 @@ -# NOTE: THIS FILE IS EXPERIMENTAL FOR THE BAZEL MIGRATION AND NOT USED FOR -# YOUR BUILDS CURRENTLY. +# This program is free software and is provided to you under the terms of the +# GNU General Public License version 2 as published by the Free Software +# Foundation, and any use by you of this program is subject to the terms +# of such GNU license. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# # -# It is not yet the source of truth for your build. If you're looking to modify -# the build file, modify the Android.bp file instead. Do *not* modify this file -# unless you have coordinated with the team managing the Soong to Bazel -# migration. -load("//build/kleaf:kernel.bzl", "kernel_module") +load( + "//build/kernel/kleaf:kernel.bzl", + "kernel_module", +) + +_midgard_modules = [ + "mali_kbase.ko", + "tests/kutf/mali_kutf.ko", + "tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test_portal.ko", +] kernel_module( name = "mali_kbase.cloudripper", srcs = glob([ "**/*.c", "**/*.h", - "**/Kbuild", + "**/*Kbuild", + "**/*Makefile", ]) + [ + "//common:kernel_headers", + "//common-modules/mali:headers", + "//common-modules/mali/drivers/gpu/arm/arbitration", + "//common-modules/mali/drivers/xen/arm:xen", "//private/google-modules/gpu/common:headers", ], - outs = [ - "mali_kbase.ko", - "tests/kutf/mali_kutf.ko", - "tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test_portal.ko", - ], + outs = _midgard_modules, kernel_build = "//private/gs-google:cloudripper", visibility = [ "//private/gs-google:__pkg__", @@ -30,3 +48,14 @@ kernel_module( "//private/google-modules/gpu/mali_pixel", ], ) + +filegroup( + name = "midgard_kconfig.cloudripper", + srcs = glob([ + "**/*Kconfig", + ]), + visibility = [ + "//common:__pkg__", + "//common-modules/mali:__subpackages__", + ], +) diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild index 9da4141..ff0a0de 100644 --- a/mali_kbase/Kbuild +++ b/mali_kbase/Kbuild @@ -68,12 +68,11 @@ endif # Configurations # -# Driver version string which is returned to userspace via an ioctl -MALI_RELEASE_NAME ?= '"r43p0-01eac0"' - # We are building for Pixel CONFIG_MALI_PLATFORM_NAME="pixel" +# Driver version string which is returned to userspace via an ioctl +MALI_RELEASE_NAME ?= '"r44p1-00dev3"' # Set up defaults if not defined by build system ifeq ($(CONFIG_MALI_DEBUG), y) MALI_UNIT_TEST = 1 @@ -191,7 +190,6 @@ mali_kbase-y := \ mali_kbase_mem_pool.o \ mali_kbase_mem_pool_debugfs.o \ mali_kbase_debugfs_helper.o \ - mali_kbase_strings.o \ mali_kbase_as_fault_debugfs.o \ mali_kbase_regs_history_debugfs.o \ mali_kbase_dvfs_debugfs.o \ @@ -208,6 +206,10 @@ mali_kbase-$(CONFIG_SYNC_FILE) += \ mali_kbase_sync_file.o \ mali_kbase_sync_common.o +mali_kbase-$(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) += \ + mali_power_gpu_work_period_trace.o \ + mali_kbase_gpu_metrics.o + ifneq ($(CONFIG_MALI_CSF_SUPPORT),y) mali_kbase-y += \ mali_kbase_jm.o \ diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig index 46e3546..bb25ef4 100644 --- a/mali_kbase/Kconfig +++ b/mali_kbase/Kconfig @@ -65,11 +65,18 @@ config MALI_NO_MALI All calls to the simulated hardware will complete immediately as if the hardware completed the task. +config MALI_NO_MALI_DEFAULT_GPU + string "Default GPU for No Mali" + depends on MALI_NO_MALI + default "tMIx" + help + This option sets the default GPU to identify as for No Mali builds. + endchoice menu "Platform specific options" -source "drivers/gpu/arm/midgard/platform/Kconfig" +source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/platform/Kconfig" endmenu config MALI_CSF_SUPPORT @@ -193,6 +200,22 @@ config LARGE_PAGE_ALLOC If in doubt, say N +config PAGE_MIGRATION_SUPPORT + bool "Enable support for page migration" + depends on MALI_MIDGARD && MALI_EXPERT + default y + default n if ANDROID + help + Compile in support for page migration. + If set to disabled ('n') then page migration cannot + be enabled at all, and related symbols are not compiled in. + If not set, page migration is compiled in by default, and + if not explicitly enabled or disabled with the insmod parameter, + page migration becomes automatically enabled with large pages. + + If in doubt, say Y. To strip out page migration symbols and support, + say N. + config MALI_MEMORY_FULLY_BACKED bool "Enable memory fully physically-backed" depends on MALI_MIDGARD && MALI_EXPERT @@ -395,7 +418,16 @@ config MALI_ARBITRATION virtualization setup for Mali If unsure, say N. +config MALI_TRACE_POWER_GPU_WORK_PERIOD + bool "Enable per-application GPU metrics tracepoints" + depends on MALI_MIDGARD + default y + help + This option enables per-application GPU metrics tracepoints. + + If unsure, say N. + -source "drivers/gpu/arm/midgard/tests/Kconfig" +source "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/tests/Kconfig" endif diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile index d851653..59b306b 100644 --- a/mali_kbase/Makefile +++ b/mali_kbase/Makefile @@ -20,8 +20,6 @@ KERNEL_SRC ?= /lib/modules/$(shell uname -r)/build KDIR ?= $(KERNEL_SRC) - -# Ensure build intermediates are in OUT_DIR instead of alongside the source M ?= $(shell pwd) ifeq ($(KDIR),) @@ -39,6 +37,7 @@ CONFIG_MALI_SYSTEM_TRACE=y # Core kbase configuration options CONFIG_MALI_EXPERT=y CONFIG_MALI_MIDGARD_DVFS=y +CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD = y # Pixel integration specific configuration options CONFIG_MALI_PLATFORM_NAME="pixel" @@ -54,164 +53,176 @@ CONFIG_MALI_PIXEL_GPU_SLC ?= y # Dependency resolution is done through statements as Kconfig # is not supported for out-of-tree builds. # +CONFIGS := +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + CONFIG_MALI_MIDGARD ?= m + ifeq ($(CONFIG_MALI_MIDGARD),m) + CONFIG_MALI_PLATFORM_NAME ?= "devicetree" + CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD ?= y + CONFIG_MALI_GATOR_SUPPORT ?= y + CONFIG_MALI_ARBITRATION ?= n + CONFIG_MALI_PARTITION_MANAGER ?= n + + ifneq ($(CONFIG_MALI_NO_MALI),y) + # Prevent misuse when CONFIG_MALI_NO_MALI=y + CONFIG_MALI_REAL_HW ?= y + CONFIG_MALI_CORESIGHT = n + endif -CONFIG_MALI_MIDGARD ?= m -ifeq ($(CONFIG_MALI_MIDGARD),m) - CONFIG_MALI_PLATFORM_NAME ?= "devicetree" - CONFIG_MALI_GATOR_SUPPORT ?= y - CONFIG_MALI_ARBITRATION ?= n - CONFIG_MALI_PARTITION_MANAGER ?= n - - ifneq ($(CONFIG_MALI_NO_MALI),y) - # Prevent misuse when CONFIG_MALI_NO_MALI=y - CONFIG_MALI_REAL_HW ?= y - CONFIG_MALI_CORESIGHT = n - endif - - ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y) - # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y - CONFIG_MALI_DEVFREQ ?= n - else - CONFIG_MALI_DEVFREQ ?= y - endif + ifeq ($(CONFIG_MALI_MIDGARD_DVFS),y) + # Prevent misuse when CONFIG_MALI_MIDGARD_DVFS=y + CONFIG_MALI_DEVFREQ ?= n + else + CONFIG_MALI_DEVFREQ ?= y + endif - ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) - # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y - CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n - endif + ifeq ($(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND), y) + # Prevent misuse when CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND=y + CONFIG_MALI_DMA_BUF_LEGACY_COMPAT = n + endif - ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) - CONFIG_MALI_CORESIGHT ?= n - endif + ifeq ($(CONFIG_MALI_CSF_SUPPORT), y) + CONFIG_MALI_CORESIGHT ?= n + endif - # - # Expert/Debug/Test released configurations - # - ifeq ($(CONFIG_MALI_EXPERT), y) - ifeq ($(CONFIG_MALI_NO_MALI), y) - CONFIG_MALI_REAL_HW = n + # + # Expert/Debug/Test released configurations + # + ifeq ($(CONFIG_MALI_EXPERT), y) + ifeq ($(CONFIG_MALI_NO_MALI), y) + CONFIG_MALI_REAL_HW = n + CONFIG_MALI_NO_MALI_DEFAULT_GPU ?= "tMIx" - else - # Prevent misuse when CONFIG_MALI_NO_MALI=n - CONFIG_MALI_REAL_HW = y - CONFIG_MALI_ERROR_INJECT = n - endif + else + # Prevent misuse when CONFIG_MALI_NO_MALI=n + CONFIG_MALI_REAL_HW = y + CONFIG_MALI_ERROR_INJECT = n + endif - ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) - # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y - CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n - endif + ifeq ($(CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED), y) + # Prevent misuse when CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED=y + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n + endif - ifeq ($(CONFIG_MALI_DEBUG), y) - CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y - CONFIG_MALI_SYSTEM_TRACE ?= y + ifeq ($(CONFIG_MALI_DEBUG), y) + CONFIG_MALI_MIDGARD_ENABLE_TRACE ?= y + CONFIG_MALI_SYSTEM_TRACE ?= y - ifeq ($(CONFIG_SYNC_FILE), y) - CONFIG_MALI_FENCE_DEBUG ?= y + ifeq ($(CONFIG_SYNC_FILE), y) + CONFIG_MALI_FENCE_DEBUG ?= y + else + CONFIG_MALI_FENCE_DEBUG = n + endif else + # Prevent misuse when CONFIG_MALI_DEBUG=n + CONFIG_MALI_MIDGARD_ENABLE_TRACE = n + CONFIG_MALI_SYSTEM_TRACE = n CONFIG_MALI_FENCE_DEBUG = n endif else - # Prevent misuse when CONFIG_MALI_DEBUG=n + # Prevent misuse when CONFIG_MALI_EXPERT=n + CONFIG_MALI_CORESTACK = n + CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n + CONFIG_LARGE_PAGE_ALLOC = n + CONFIG_MALI_PWRSOFT_765 = n + CONFIG_MALI_MEMORY_FULLY_BACKED = n + CONFIG_MALI_JOB_DUMP = n + CONFIG_MALI_NO_MALI = n + CONFIG_MALI_REAL_HW = y + CONFIG_MALI_ERROR_INJECT = n + CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n + CONFIG_MALI_HOST_CONTROLS_SC_RAILS = n + CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n + CONFIG_MALI_DEBUG = n CONFIG_MALI_MIDGARD_ENABLE_TRACE = n CONFIG_MALI_FENCE_DEBUG = n endif - else - # Prevent misuse when CONFIG_MALI_EXPERT=n - CONFIG_MALI_CORESTACK = n - CONFIG_LARGE_PAGE_ALLOC_OVERRIDE = n - CONFIG_LARGE_PAGE_ALLOC = n - CONFIG_MALI_PWRSOFT_765 = n - CONFIG_MALI_MEMORY_FULLY_BACKED = n - CONFIG_MALI_JOB_DUMP = n - CONFIG_MALI_NO_MALI = n - CONFIG_MALI_REAL_HW = y - CONFIG_MALI_ERROR_INJECT = n - CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n - CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n - CONFIG_MALI_HOST_CONTROLS_SC_RAILS = n - CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n - CONFIG_MALI_DEBUG = n - CONFIG_MALI_MIDGARD_ENABLE_TRACE = n - CONFIG_MALI_FENCE_DEBUG = n - endif - ifeq ($(CONFIG_MALI_DEBUG), y) - CONFIG_MALI_KUTF ?= y - ifeq ($(CONFIG_MALI_KUTF), y) - CONFIG_MALI_KUTF_IRQ_TEST ?= y - CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y - CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y + ifeq ($(CONFIG_MALI_DEBUG), y) + CONFIG_MALI_KUTF ?= y + ifeq ($(CONFIG_MALI_KUTF), y) + CONFIG_MALI_KUTF_IRQ_TEST ?= y + CONFIG_MALI_KUTF_CLK_RATE_TRACE ?= y + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST ?= y + ifeq ($(CONFIG_MALI_DEVFREQ), y) + ifeq ($(CONFIG_MALI_NO_MALI), y) + CONFIG_MALI_KUTF_IPA_UNIT_TEST ?= y + endif + endif + + else + # Prevent misuse when CONFIG_MALI_KUTF=n + CONFIG_MALI_KUTF_IRQ_TEST = n + CONFIG_MALI_KUTF_CLK_RATE_TRACE = n + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n + endif else - # Prevent misuse when CONFIG_MALI_KUTF=n + # Prevent misuse when CONFIG_MALI_DEBUG=n + CONFIG_MALI_KUTF = y CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = n CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n endif else - # Prevent misuse when CONFIG_MALI_DEBUG=n - CONFIG_MALI_KUTF = y + # Prevent misuse when CONFIG_MALI_MIDGARD=n + CONFIG_MALI_ARBITRATION = n + CONFIG_MALI_KUTF = n CONFIG_MALI_KUTF_IRQ_TEST = n CONFIG_MALI_KUTF_CLK_RATE_TRACE = y CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n endif -else - # Prevent misuse when CONFIG_MALI_MIDGARD=n - CONFIG_MALI_ARBITRATION = n - CONFIG_MALI_KUTF = n - CONFIG_MALI_KUTF_IRQ_TEST = n - CONFIG_MALI_KUTF_CLK_RATE_TRACE = n - CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST = n -endif -# All Mali CONFIG should be listed here -CONFIGS := \ - CONFIG_MALI_MIDGARD \ - CONFIG_MALI_GATOR_SUPPORT \ - CONFIG_MALI_ARBITER_SUPPORT \ - CONFIG_MALI_ARBITRATION \ - CONFIG_MALI_PARTITION_MANAGER \ - CONFIG_MALI_REAL_HW \ - CONFIG_MALI_DEVFREQ \ - CONFIG_MALI_MIDGARD_DVFS \ - CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ - CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ - CONFIG_MALI_EXPERT \ - CONFIG_MALI_CORESTACK \ - CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \ - CONFIG_LARGE_PAGE_ALLOC \ - CONFIG_MALI_PWRSOFT_765 \ - CONFIG_MALI_MEMORY_FULLY_BACKED \ - CONFIG_MALI_JOB_DUMP \ - CONFIG_MALI_NO_MALI \ - CONFIG_MALI_ERROR_INJECT \ - CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ - CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ - CONFIG_MALI_HOST_CONTROLS_SC_RAILS \ - CONFIG_MALI_PRFCNT_SET_PRIMARY \ - CONFIG_MALI_PRFCNT_SET_SECONDARY \ - CONFIG_MALI_PRFCNT_SET_TERTIARY \ - CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ - CONFIG_MALI_DEBUG \ - CONFIG_MALI_MIDGARD_ENABLE_TRACE \ - CONFIG_MALI_SYSTEM_TRACE \ - CONFIG_MALI_FENCE_DEBUG \ - CONFIG_MALI_KUTF \ - CONFIG_MALI_KUTF_IRQ_TEST \ - CONFIG_MALI_KUTF_CLK_RATE_TRACE \ - CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ - CONFIG_MALI_XEN \ - CONFIG_MALI_CORESIGHT - -# Pixel integration CONFIG options -CONFIGS += \ - CONFIG_MALI_PIXEL_GPU_QOS \ - CONFIG_MALI_PIXEL_GPU_BTS \ - CONFIG_MALI_PIXEL_GPU_THERMAL \ - CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING \ - CONFIG_MALI_HOST_CONTROLS_SC_RAILS \ - CONFIG_MALI_PIXEL_GPU_SLC + # All Mali CONFIG should be listed here + CONFIGS := \ + CONFIG_MALI_MIDGARD \ + CONFIG_MALI_GATOR_SUPPORT \ + CONFIG_MALI_ARBITER_SUPPORT \ + CONFIG_MALI_ARBITRATION \ + CONFIG_MALI_PARTITION_MANAGER \ + CONFIG_MALI_REAL_HW \ + CONFIG_MALI_DEVFREQ \ + CONFIG_MALI_MIDGARD_DVFS \ + CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND \ + CONFIG_MALI_DMA_BUF_LEGACY_COMPAT \ + CONFIG_MALI_EXPERT \ + CONFIG_MALI_CORESTACK \ + CONFIG_LARGE_PAGE_ALLOC_OVERRIDE \ + CONFIG_LARGE_PAGE_ALLOC \ + CONFIG_MALI_PWRSOFT_765 \ + CONFIG_MALI_MEMORY_FULLY_BACKED \ + CONFIG_MALI_JOB_DUMP \ + CONFIG_MALI_NO_MALI \ + CONFIG_MALI_ERROR_INJECT \ + CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \ + CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \ + CONFIG_MALI_HOST_CONTROLS_SC_RAILS \ + CONFIG_MALI_PRFCNT_SET_PRIMARY \ + CONFIG_MALI_PRFCNT_SET_SECONDARY \ + CONFIG_MALI_PRFCNT_SET_TERTIARY \ + CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS \ + CONFIG_MALI_DEBUG \ + CONFIG_MALI_MIDGARD_ENABLE_TRACE \ + CONFIG_MALI_SYSTEM_TRACE \ + CONFIG_MALI_FENCE_DEBUG \ + CONFIG_MALI_KUTF \ + CONFIG_MALI_KUTF_IRQ_TEST \ + CONFIG_MALI_KUTF_CLK_RATE_TRACE \ + CONFIG_MALI_KUTF_MGM_INTEGRATION_TEST \ + CONFIG_MALI_XEN \ + CONFIG_MALI_CORESIGHT \ + CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD + + # Pixel integration CONFIG options + CONFIGS += \ + CONFIG_MALI_PIXEL_GPU_QOS \ + CONFIG_MALI_PIXEL_GPU_BTS \ + CONFIG_MALI_PIXEL_GPU_THERMAL \ + CONFIG_MALI_PIXEL_GPU_SECURE_RENDERING \ + CONFIG_MALI_PIXEL_GPU_SLC + +endif THIS_DIR := $(dir $(lastword $(MAKEFILE_LIST))) -include $(THIS_DIR)/../arbitration/Makefile @@ -227,7 +238,9 @@ MAKE_ARGS := $(foreach config,$(CONFIGS), \ $(value config)=$(value $(value config)), \ $(value config)=n)) -MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + MAKE_ARGS += CONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) +endif # # EXTRA_CFLAGS to define the custom CONFIGs on out-of-tree build @@ -239,63 +252,66 @@ EXTRA_CFLAGS := $(foreach config,$(CONFIGS), \ $(if $(filter y m,$(value $(value config))), \ -D$(value config)=1)) -EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME=$(CONFIG_MALI_PLATFORM_NAME) +ifeq ($(MALI_KCONFIG_EXT_PREFIX),) + EXTRA_CFLAGS += -DCONFIG_MALI_PLATFORM_NAME='\"$(CONFIG_MALI_PLATFORM_NAME)\"' + EXTRA_CFLAGS += -DCONFIG_MALI_NO_MALI_DEFAULT_GPU='\"$(CONFIG_MALI_NO_MALI_DEFAULT_GPU)\"' +endif # # KBUILD_EXTRA_SYMBOLS to prevent warnings about unknown functions # EXTRA_SYMBOLS += $(OUT_DIR)/../google-modules/gpu/mali_pixel/Module.symvers -KBUILD_CFLAGS += -Wall -Werror +CFLAGS_MODULE += -Wall -Werror # The following were added to align with W=1 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) -KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter -KBUILD_CFLAGS += -Wmissing-declarations -KBUILD_CFLAGS += -Wmissing-format-attribute -KBUILD_CFLAGS += -Wmissing-prototypes -KBUILD_CFLAGS += -Wold-style-definition +CFLAGS_MODULE += -Wextra -Wunused -Wno-unused-parameter +CFLAGS_MODULE += -Wmissing-declarations +CFLAGS_MODULE += -Wmissing-format-attribute +CFLAGS_MODULE += -Wmissing-prototypes +CFLAGS_MODULE += -Wold-style-definition # The -Wmissing-include-dirs cannot be enabled as the path to some of the # included directories change depending on whether it is an in-tree or # out-of-tree build. -KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable) -KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable) -KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned) -KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation) +CFLAGS_MODULE += $(call cc-option, -Wunused-but-set-variable) +CFLAGS_MODULE += $(call cc-option, -Wunused-const-variable) +CFLAGS_MODULE += $(call cc-option, -Wpacked-not-aligned) +CFLAGS_MODULE += $(call cc-option, -Wstringop-truncation) # The following turn off the warnings enabled by -Wextra -KBUILD_CFLAGS += -Wno-sign-compare -KBUILD_CFLAGS += -Wno-shift-negative-value +CFLAGS_MODULE += -Wno-sign-compare +CFLAGS_MODULE += -Wno-shift-negative-value # This flag is needed to avoid build errors on older kernels -KBUILD_CFLAGS += $(call cc-option, -Wno-cast-function-type) +CFLAGS_MODULE += $(call cc-option, -Wno-cast-function-type) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1 # The following were added to align with W=2 in scripts/Makefile.extrawarn # from the Linux source tree (v5.18.14) -KBUILD_CFLAGS += -Wdisabled-optimization +CFLAGS_MODULE += -Wdisabled-optimization # The -Wshadow flag cannot be enabled unless upstream kernels are # patched to fix redefinitions of certain built-in functions and # global variables. -KBUILD_CFLAGS += $(call cc-option, -Wlogical-op) -KBUILD_CFLAGS += -Wmissing-field-initializers +CFLAGS_MODULE += $(call cc-option, -Wlogical-op) +CFLAGS_MODULE += -Wmissing-field-initializers # -Wtype-limits must be disabled due to build failures on kernel 5.x -KBUILD_CFLAGS += -Wno-type-limits -KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized) -KBUILD_CFLAGS += $(call cc-option, -Wunused-macros) +CFLAGS_MODULE += -Wno-type-limits +CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized) +CFLAGS_MODULE += $(call cc-option, -Wunused-macros) KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2 # This warning is disabled to avoid build failures in some kernel versions -KBUILD_CFLAGS += -Wno-ignored-qualifiers +CFLAGS_MODULE += -Wno-ignored-qualifiers ifeq ($(CONFIG_GCOV_KERNEL),y) - KBUILD_CFLAGS += $(call cc-option, -ftest-coverage) - KBUILD_CFLAGS += $(call cc-option, -fprofile-arcs) + CFLAGS_MODULE += $(call cc-option, -ftest-coverage) + CFLAGS_MODULE += $(call cc-option, -fprofile-arcs) EXTRA_CFLAGS += -DGCOV_PROFILE=1 endif ifeq ($(CONFIG_MALI_KCOV),y) - KBUILD_CFLAGS += $(call cc-option, -fsanitize-coverage=trace-cmp) + CFLAGS_MODULE += $(call cc-option, -fsanitize-coverage=trace-cmp) EXTRA_CFLAGS += -DKCOV=1 EXTRA_CFLAGS += -DKCOV_ENABLE_COMPARISONS=1 endif diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig index 77a528f..2d6fca0 100644 --- a/mali_kbase/Mconfig +++ b/mali_kbase/Mconfig @@ -196,6 +196,18 @@ config MALI_CORESTACK If unsure, say N. +config PAGE_MIGRATION_SUPPORT + bool "Compile with page migration support" + depends on BACKEND_KERNEL + default y + default n if ANDROID + help + Compile in support for page migration. + If set to disabled ('n') then page migration cannot + be enabled at all. If set to enabled, then page migration + support is explicitly compiled in. This has no effect when + PAGE_MIGRATION_OVERRIDE is disabled. + choice prompt "Error injection level" depends on MALI_MIDGARD && MALI_EXPERT @@ -352,5 +364,45 @@ config MALI_HOST_CONTROLS_SC_RAILS Adapter) inside the GPU to handshake with SoC PMU to control the power of cores. +config MALI_TRACE_POWER_GPU_WORK_PERIOD + bool "Enable per-application GPU metrics tracepoints" + depends on MALI_MIDGARD + default y + help + This option enables per-application GPU metrics tracepoints. + + If unsure, say N. + +choice + prompt "CSF Firmware trace mode" + depends on MALI_MIDGARD + default MALI_FW_TRACE_MODE_MANUAL + help + CSF Firmware log operating mode. + +config MALI_FW_TRACE_MODE_MANUAL + bool "manual mode" + depends on MALI_MIDGARD + help + firmware log can be read manually by the userspace (and it will + also be dumped automatically into dmesg on GPU reset). + +config MALI_FW_TRACE_MODE_AUTO_PRINT + bool "automatic printing mode" + depends on MALI_MIDGARD + help + firmware log will be periodically emptied into dmesg, manual + reading through debugfs is disabled. + +config MALI_FW_TRACE_MODE_AUTO_DISCARD + bool "automatic discarding mode" + depends on MALI_MIDGARD + help + firmware log will be periodically discarded, the remaining log can be + read manually by the userspace (and it will also be dumped + automatically into dmesg on GPU reset). + +endchoice + source "kernel/drivers/gpu/arm/arbitration/Mconfig" source "kernel/drivers/gpu/arm/midgard/tests/Mconfig" diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild index 7df24c3..c37cc59 100644 --- a/mali_kbase/backend/gpu/Kbuild +++ b/mali_kbase/backend/gpu/Kbuild @@ -22,7 +22,6 @@ mali_kbase-y += \ backend/gpu/mali_kbase_cache_policy_backend.o \ backend/gpu/mali_kbase_gpuprops_backend.o \ backend/gpu/mali_kbase_irq_linux.o \ - backend/gpu/mali_kbase_js_backend.o \ backend/gpu/mali_kbase_pm_backend.o \ backend/gpu/mali_kbase_pm_driver.o \ backend/gpu/mali_kbase_pm_metrics.o \ @@ -42,7 +41,8 @@ ifeq ($(MALI_USE_CSF),0) backend/gpu/mali_kbase_jm_as.o \ backend/gpu/mali_kbase_debug_job_fault_backend.o \ backend/gpu/mali_kbase_jm_hw.o \ - backend/gpu/mali_kbase_jm_rb.o + backend/gpu/mali_kbase_jm_rb.o \ + backend/gpu/mali_kbase_js_backend.o endif diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c index 7c0abba..86539d5 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -43,12 +43,12 @@ void kbase_cache_set_coherency_mode(struct kbase_device *kbdev, kbdev->current_gpu_coherency_mode = mode; if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); val = AMBA_ENABLE_COHERENCY_PROTOCOL_SET(val, mode); - kbase_reg_write(kbdev, AMBA_ENABLE, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); } else - kbase_reg_write(kbdev, COHERENCY_ENABLE, mode); + kbase_reg_write(kbdev, GPU_CONTROL_REG(COHERENCY_ENABLE), mode); } u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev) @@ -69,24 +69,12 @@ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, bool enable) { if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); + u32 val = kbase_reg_read(kbdev, GPU_CONTROL_REG(AMBA_ENABLE)); val = AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); + kbase_reg_write(kbdev, GPU_CONTROL_REG(AMBA_ENABLE), val); } else { WARN(1, "memory_cache_support not supported"); } } - -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable) -{ - if (kbasep_amba_register_present(kbdev)) { - u32 val = kbase_reg_read(kbdev, AMBA_ENABLE); - - val = AMBA_ENABLE_INVALIDATE_HINT_SET(val, enable); - kbase_reg_write(kbdev, AMBA_ENABLE, val); - } else { - WARN(1, "invalidate_hint not supported"); - } -} diff --git a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h index 8cd8090..0103695 100644 --- a/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h +++ b/mali_kbase/backend/gpu/mali_kbase_cache_policy_backend.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2016, 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,13 +53,4 @@ u32 kbase_cache_get_coherency_features(struct kbase_device *kbdev); */ void kbase_amba_set_memory_cache_support(struct kbase_device *kbdev, bool enable); -/** - * kbase_amba_set_invalidate_hint() - Sets AMBA invalidate hint - * in the GPU. - * @kbdev: Device pointer - * @enable: true for enable. - * - * Note: Only for arch version 12.x.1 onwards. - */ -void kbase_amba_set_invalidate_hint(struct kbase_device *kbdev, bool enable); #endif /* _KBASE_CACHE_POLICY_BACKEND_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c index 8d09347..cca4f74 100644 --- a/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c +++ b/mali_kbase/backend/gpu/mali_kbase_clk_rate_trace_mgr.c @@ -58,8 +58,10 @@ get_clk_rate_trace_callbacks(__maybe_unused struct kbase_device *kbdev) if (WARN_ON(!kbdev) || WARN_ON(!kbdev->dev)) return callbacks; - arbiter_if_node = - of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(kbdev->dev->of_node, "arbiter_if", NULL); + /* Arbitration enabled, override the callback pointer.*/ if (arbiter_if_node) callbacks = &arb_clk_rate_trace_ops; @@ -241,8 +243,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) if (!clk_rtm->clk_rate_trace_ops) return; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - spin_lock(&clk_rtm->lock); + spin_lock_irqsave(&clk_rtm->lock, flags); for (i = 0; i < BASE_MAX_NR_CLOCKS_REGULATORS; i++) { struct kbase_clk_data *clk_data = clk_rtm->clks[i]; @@ -258,8 +259,7 @@ void kbase_clk_rate_trace_manager_gpu_active(struct kbase_device *kbdev) } clk_rtm->gpu_idle = false; - spin_unlock(&clk_rtm->lock); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + spin_unlock_irqrestore(&clk_rtm->lock, flags); } void kbase_clk_rate_trace_manager_gpu_idle(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c index e121b41..cd3b29d 100644 --- a/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_debug_job_fault_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2012-2015, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,7 +59,7 @@ static int job_slot_reg_snapshot[] = { JS_CONFIG_NEXT }; -/*MMU_REG(r)*/ +/*MMU_CONTROL_REG(r)*/ static int mmu_reg_snapshot[] = { MMU_IRQ_MASK, MMU_IRQ_STATUS @@ -118,15 +118,14 @@ bool kbase_debug_job_fault_reg_snapshot_init(struct kbase_context *kctx, /* get the MMU registers*/ for (i = 0; i < sizeof(mmu_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = MMU_REG(mmu_reg_snapshot[i]); + kctx->reg_dump[offset] = MMU_CONTROL_REG(mmu_reg_snapshot[i]); offset += 2; } /* get the Address space registers*/ for (j = 0; j < as_number; j++) { for (i = 0; i < sizeof(as_reg_snapshot)/4; i++) { - kctx->reg_dump[offset] = - MMU_AS_REG(j, as_reg_snapshot[i]); + kctx->reg_dump[offset] = MMU_STAGE1_REG(MMU_AS_REG(j, as_reg_snapshot[i])); offset += 2; } } diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c index ef09c6b..b95277c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_irq_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_irq_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -99,7 +99,7 @@ static irqreturn_t kbase_mmu_irq_handler(int irq, void *data) atomic_inc(&kbdev->faults_pending); - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); #ifdef CONFIG_MALI_DEBUG if (!kbdev->pm.backend.driver_ready_for_irqs) @@ -298,7 +298,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) return IRQ_NONE; } - val = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_STATUS)); + val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -310,7 +310,7 @@ static irqreturn_t kbase_mmu_irq_test_handler(int irq, void *data) kbasep_irq_test_data.triggered = 1; wake_up(&kbasep_irq_test_data.wait); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), val); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), val); return IRQ_HANDLED; } @@ -344,8 +344,8 @@ static int kbasep_common_test_interrupt( break; case MMU_IRQ_TAG: test_handler = kbase_mmu_irq_test_handler; - rawstat_offset = MMU_REG(MMU_IRQ_RAWSTAT); - mask_offset = MMU_REG(MMU_IRQ_MASK); + rawstat_offset = MMU_CONTROL_REG(MMU_IRQ_RAWSTAT); + mask_offset = MMU_CONTROL_REG(MMU_IRQ_MASK); break; case GPU_IRQ_TAG: /* already tested by pm_driver - bail out */ diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c index 72926bc..dd8f4d9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_hw.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_hw.c @@ -585,7 +585,7 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) count += nr_done; while (nr_done) { - if (nr_done == 1) { + if (likely(nr_done == 1)) { kbase_gpu_complete_hw(kbdev, i, completion_code, job_tail, @@ -604,6 +604,14 @@ void kbase_job_done(struct kbase_device *kbdev, u32 done) BASE_JD_EVENT_DONE, 0, &end_timestamp); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /* Increment the end timestamp value by 1 ns to + * avoid having the same value for 'start_time_ns' + * and 'end_time_ns' for the 2nd atom whose job + * completion IRQ got merged with the 1st atom. + */ + end_timestamp = ktime_add(end_timestamp, ns_to_ktime(1)); +#endif } nr_done--; } @@ -1061,12 +1069,12 @@ static void kbase_debug_dump_registers(struct kbase_device *kbdev) i, kbase_reg_read(kbdev, JOB_SLOT_REG(i, JS_HEAD_LO))); } dev_err(kbdev->dev, " MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK))); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h index bfd55a6..380a530 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_jm_internal.h @@ -47,7 +47,7 @@ void kbase_job_done_slot(struct kbase_device *kbdev, int s, u32 completion_code, #if IS_ENABLED(CONFIG_GPU_TRACEPOINTS) static inline char *kbasep_make_job_slot_string(unsigned int js, char *js_string, size_t js_size) { - snprintf(js_string, js_size, "job_slot_%u", js); + (void)scnprintf(js_string, js_size, "job_slot_%u", js); return js_string; } #endif diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c index f4094a3..66f068a 100644 --- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c +++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c @@ -32,6 +32,9 @@ #include <hwcnt/mali_kbase_hwcnt_context.h> #include <mali_kbase_reset_gpu.h> #include <mali_kbase_kinstr_jm.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> +#endif #include <backend/gpu/mali_kbase_cache_policy_backend.h> #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_jm_internal.h> @@ -274,6 +277,59 @@ int kbase_backend_slot_free(struct kbase_device *kbdev, unsigned int js) return SLOT_RB_SIZE - kbase_backend_nr_atoms_on_slot(kbdev, js); } +/** + * trace_atom_completion_for_gpu_metrics - Report the completion of atom for the + * purpose of emitting power/gpu_work_period + * tracepoint. + * + * @katom: Pointer to the atom that completed execution on GPU. + * @end_timestamp: Pointer to the timestamp of atom completion. May be NULL, in + * which case current time will be used. + * + * The function would also report the start for an atom that was in the HEAD_NEXT + * register. + * + * Note: Caller must hold the HW access lock. + */ +static inline void trace_atom_completion_for_gpu_metrics( + struct kbase_jd_atom *const katom, + ktime_t *end_timestamp) +{ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + u64 complete_ns; + struct kbase_context *kctx = katom->kctx; + struct kbase_jd_atom *queued = + kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 1); + +#ifdef CONFIG_MALI_DEBUG + WARN_ON(!kbase_gpu_inspect(kctx->kbdev, katom->slot_nr, 0)); +#endif + + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (unlikely(queued == katom)) + return; + + /* A protected atom and a non-protected atom cannot be in the RB_SUBMITTED + * state at the same time in the job slot ringbuffer. Atom submission state + * machine prevents the submission of a non-protected atom until all + * protected atoms have completed and GPU has exited the protected mode. + * This implies that if the queued atom is in RB_SUBMITTED state, it shall + * be a protected atom and so we can return early. + */ + if (unlikely(kbase_jd_katom_is_protected(katom))) + return; + + if (likely(end_timestamp)) + complete_ns = ktime_to_ns(*end_timestamp); + else + complete_ns = ktime_get_raw_ns(); + + kbase_gpu_metrics_ctx_end_activity(kctx, complete_ns); + if (queued && queued->gpu_rb_state == KBASE_ATOM_GPU_RB_SUBMITTED) + kbase_gpu_metrics_ctx_start_activity(queued->kctx, complete_ns); +#endif +} static void kbase_gpu_release_atom(struct kbase_device *kbdev, struct kbase_jd_atom *katom, @@ -290,6 +346,7 @@ static void kbase_gpu_release_atom(struct kbase_device *kbdev, break; case KBASE_ATOM_GPU_RB_SUBMITTED: + trace_atom_completion_for_gpu_metrics(katom, end_timestamp); kbase_kinstr_jm_atom_hw_release(katom); /* Inform power management at start/finish of atom so it can * update its GPU utilisation metrics. Mark atom as not @@ -865,6 +922,9 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) for (idx = 0; idx < SLOT_RB_SIZE; idx++) { bool cores_ready; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + bool trace_atom_submit_for_gpu_metrics = true; +#endif int ret; if (!katom[idx]) @@ -975,12 +1035,21 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) case KBASE_ATOM_GPU_RB_READY: if (idx == 1) { + enum kbase_atom_gpu_rb_state atom_0_gpu_rb_state = + katom[0]->gpu_rb_state; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + trace_atom_submit_for_gpu_metrics = + (atom_0_gpu_rb_state == + KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB); +#endif + /* Only submit if head atom or previous * atom already submitted */ - if ((katom[0]->gpu_rb_state != + if ((atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED && - katom[0]->gpu_rb_state != + atom_0_gpu_rb_state != KBASE_ATOM_GPU_RB_NOT_IN_SLOT_RB)) break; @@ -1017,7 +1086,15 @@ void kbase_backend_slot_update(struct kbase_device *kbdev) &katom[idx]->start_timestamp); /* Inform platform at start/finish of atom */ + kbasep_platform_event_work_begin(katom[idx]); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (likely(trace_atom_submit_for_gpu_metrics && + !kbase_jd_katom_is_protected(katom[idx]))) + kbase_gpu_metrics_ctx_start_activity( + katom[idx]->kctx, + ktime_to_ns(katom[idx]->start_timestamp)); +#endif } else { if (katom[idx]->core_req & BASE_JD_REQ_PERMON) kbase_pm_release_gpu_cycle_counter_nolock(kbdev); @@ -1079,6 +1156,25 @@ kbase_rb_atom_might_depend(const struct kbase_jd_atom *katom_a, KBASE_KATOM_FLAG_FAIL_BLOCKER))); } +static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, + struct kbase_jd_atom *katom, + u32 action, + bool disjoint) +{ + struct kbase_context *kctx = katom->kctx; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; + kbase_gpu_mark_atom_for_return(kbdev, katom); + kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, + katom->sched_priority); + + if (disjoint) + kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, + katom); +} + /** * kbase_gpu_irq_evict - evict a slot's JSn_HEAD_NEXT atom from the HW if it is * related to a failed JSn_HEAD atom @@ -1129,9 +1225,9 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple kbase_reg_read(kbdev, JOB_SLOT_REG(js, JS_HEAD_NEXT_HI)) != 0)) { kbase_reg_write(kbdev, JOB_SLOT_REG(js, JS_COMMAND_NEXT), JS_COMMAND_NOP); - next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; if (completion_code == BASE_JD_EVENT_STOPPED) { + kbase_gpu_remove_atom(kbdev, next_katom, JS_COMMAND_SOFT_STOP, false); KBASE_TLSTREAM_TL_NRET_ATOM_LPU(kbdev, next_katom, &kbdev->gpu_props.props.raw_props.js_features [next_katom->slot_nr]); @@ -1140,10 +1236,12 @@ bool kbase_gpu_irq_evict(struct kbase_device *kbdev, unsigned int js, u32 comple KBASE_TLSTREAM_TL_NRET_CTX_LPU(kbdev, next_katom->kctx, &kbdev->gpu_props.props.raw_props.js_features [next_katom->slot_nr]); - } + } else { + next_katom->gpu_rb_state = KBASE_ATOM_GPU_RB_READY; - if (next_katom->core_req & BASE_JD_REQ_PERMON) - kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + if (next_katom->core_req & BASE_JD_REQ_PERMON) + kbase_pm_release_gpu_cycle_counter_nolock(kbdev); + } /* On evicting the next_katom, the last submission kctx on the * given job slot then reverts back to the one that owns katom. @@ -1528,25 +1626,6 @@ static inline void kbase_gpu_stop_atom(struct kbase_device *kbdev, unsigned int kbase_jsctx_slot_prio_blocked_set(kctx, js, katom->sched_priority); } -static inline void kbase_gpu_remove_atom(struct kbase_device *kbdev, - struct kbase_jd_atom *katom, - u32 action, - bool disjoint) -{ - struct kbase_context *kctx = katom->kctx; - - lockdep_assert_held(&kbdev->hwaccess_lock); - - katom->event_code = BASE_JD_EVENT_REMOVED_FROM_NEXT; - kbase_gpu_mark_atom_for_return(kbdev, katom); - kbase_jsctx_slot_prio_blocked_set(kctx, katom->slot_nr, - katom->sched_priority); - - if (disjoint) - kbase_job_check_enter_disjoint(kbdev, action, katom->core_req, - katom); -} - static int should_stop_x_dep_slot(struct kbase_jd_atom *katom) { if (katom->x_post_dep) { diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index 0ed04bb..ff4e114 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,28 +28,18 @@ #include <mali_kbase_reset_gpu.h> #include <backend/gpu/mali_kbase_jm_internal.h> #include <backend/gpu/mali_kbase_js_internal.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> + +#endif -#if !MALI_USE_CSF /* * Hold the runpool_mutex for this */ -static inline bool timer_callback_should_run(struct kbase_device *kbdev) +static inline bool timer_callback_should_run(struct kbase_device *kbdev, int nr_running_ctxs) { - struct kbase_backend_data *backend = &kbdev->hwaccess.backend; - int nr_running_ctxs; - lockdep_assert_held(&kbdev->js_data.runpool_mutex); - /* Timer must stop if we are suspending */ - if (backend->suspend_timer) - return false; - - /* nr_contexts_pullable is updated with the runpool_mutex. However, the - * locking in the caller gives us a barrier that ensures - * nr_contexts_pullable is up-to-date for reading - */ - nr_running_ctxs = atomic_read(&kbdev->js_data.nr_contexts_runnable); - #ifdef CONFIG_MALI_DEBUG if (kbdev->js_data.softstop_always) { /* Debug support for allowing soft-stop on a single context */ @@ -273,18 +263,20 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) return HRTIMER_NORESTART; } -#endif /* !MALI_USE_CSF */ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbasep_js_device_data *js_devdata = &kbdev->js_data; struct kbase_backend_data *backend = &kbdev->hwaccess.backend; unsigned long flags; + /* Timer must stop if we are suspending */ + const bool suspend_timer = backend->suspend_timer; + const int nr_running_ctxs = + atomic_read(&kbdev->js_data.nr_contexts_runnable); lockdep_assert_held(&js_devdata->runpool_mutex); - if (!timer_callback_should_run(kbdev)) { + if (suspend_timer || !timer_callback_should_run(kbdev, nr_running_ctxs)) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = false; @@ -298,7 +290,8 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) hrtimer_cancel(&backend->scheduling_timer); } - if (timer_callback_should_run(kbdev) && !backend->timer_running) { + if (!suspend_timer && timer_callback_should_run(kbdev, nr_running_ctxs) && + !backend->timer_running) { /* Take spinlock to force synchronisation with timer */ spin_lock_irqsave(&kbdev->hwaccess_lock, flags); backend->timer_running = true; @@ -309,36 +302,59 @@ void kbase_backend_ctx_count_changed(struct kbase_device *kbdev) KBASE_KTRACE_ADD_JM(kbdev, JS_POLICY_TIMER_START, NULL, NULL, 0u, 0u); } -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (unlikely(suspend_timer)) { + js_devdata->gpu_metrics_timer_needed = false; + /* Cancel the timer as System suspend is happening */ + hrtimer_cancel(&js_devdata->gpu_metrics_timer); + js_devdata->gpu_metrics_timer_running = false; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + /* Explicitly emit the tracepoint on System suspend */ + kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns()); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } + + if (!nr_running_ctxs) { + /* Just set the flag to not restart the timer on expiry */ + js_devdata->gpu_metrics_timer_needed = false; + return; + } + + /* There are runnable contexts so the timer is needed */ + if (!js_devdata->gpu_metrics_timer_needed) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + js_devdata->gpu_metrics_timer_needed = true; + /* No need to restart the timer if it is already running. */ + if (!js_devdata->gpu_metrics_timer_running) { + hrtimer_start(&js_devdata->gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(kbase_gpu_metrics_get_emit_interval()), + HRTIMER_MODE_REL); + js_devdata->gpu_metrics_timer_running = true; + } + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } +#endif } int kbase_backend_timer_init(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_init(&backend->scheduling_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); backend->scheduling_timer.function = timer_callback; backend->timer_running = false; -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ return 0; } void kbase_backend_timer_term(struct kbase_device *kbdev) { -#if !MALI_USE_CSF struct kbase_backend_data *backend = &kbdev->hwaccess.backend; hrtimer_cancel(&backend->scheduling_timer); -#else /* !MALI_USE_CSF */ - CSTD_UNUSED(kbdev); -#endif /* !MALI_USE_CSF */ } void kbase_backend_timer_suspend(struct kbase_device *kbdev) diff --git a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c index 9ce5075..6eedc00 100644 --- a/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c +++ b/mali_kbase/backend/gpu/mali_kbase_l2_mmu_config.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,8 +19,9 @@ * */ +#include <linux/version_compat_defs.h> + #include <mali_kbase.h> -#include <mali_kbase_bits.h> #include <mali_kbase_config_defaults.h> #include <device/mali_kbase_device.h> #include "mali_kbase_l2_mmu_config.h" diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c index dd16fb2..46bcdc7 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -484,13 +484,6 @@ void *gpu_device_get_data(void *model) #define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1 -/* SCons should pass in a default GPU, but other ways of building (e.g. - * in-tree) won't, so define one here in case. - */ -#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU -#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx" -#endif - static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU; module_param(no_mali_gpu, charp, 0000); MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as"); @@ -1378,10 +1371,10 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) dummy->l2_config = value; } #if MALI_USE_CSF - else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) && - addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET + - (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) { - if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET)) + else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET && + addr < CSF_HW_DOORBELL_PAGE_OFFSET + + (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) { + if (addr == CSF_HW_DOORBELL_PAGE_OFFSET) hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF; } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) && (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) { @@ -1409,13 +1402,13 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value) } } #endif - else if (addr == MMU_REG(MMU_IRQ_MASK)) { + else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { hw_error_status.mmu_irq_mask = value; - } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) { hw_error_status.mmu_irq_rawstat &= (~value); - } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) && (addr <= MMU_AS_REG(15, AS_STATUS))) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) && + (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_COMMAND: @@ -1926,10 +1919,9 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) } else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO) && addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) { *value = 0; - } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO) - && addr <= MMU_AS_REG(15, AS_STATUS)) { - int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO)) - >> 6; + } else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) && + addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) { + int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6; switch (addr & 0x3F) { case AS_TRANSTAB_LO: @@ -1973,11 +1965,11 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = 0; break; } - } else if (addr == MMU_REG(MMU_IRQ_MASK)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) { *value = hw_error_status.mmu_irq_mask; - } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) { *value = hw_error_status.mmu_irq_rawstat; - } else if (addr == MMU_REG(MMU_IRQ_STATUS)) { + } else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) { *value = hw_error_status.mmu_irq_mask & hw_error_status.mmu_irq_rawstat; } @@ -1985,8 +1977,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) else if (addr == IPA_CONTROL_REG(STATUS)) { *value = (ipa_control_timer_enabled << 31); } else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3; bool is_low_word = @@ -1995,8 +1986,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3; bool is_low_word = @@ -2005,8 +1995,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3; bool is_low_word = @@ -2015,8 +2004,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value) *value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER, counter_index, is_low_word); } else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) && - (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI( - IPA_CTL_MAX_VAL_CNT_IDX)))) { + (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) { u32 counter_index = (addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3; bool is_low_word = @@ -2214,16 +2202,3 @@ int gpu_model_control(void *model, return 0; } - -/** - * kbase_is_gpu_removed - Has the GPU been removed. - * @kbdev: Kbase device pointer - * - * This function would return true if the GPU has been removed. - * It is stubbed here - * Return: Always false - */ -bool kbase_is_gpu_removed(struct kbase_device *kbdev) -{ - return false; -} diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.c b/mali_kbase/backend/gpu/mali_kbase_model_linux.c index e90e4df..67e00e9 100644 --- a/mali_kbase/backend/gpu/mali_kbase_model_linux.c +++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010, 2012-2015, 2017-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -95,8 +95,7 @@ static void serve_mmu_irq(struct work_struct *work) if (atomic_cmpxchg(&kbdev->serving_mmu_irq, 1, 0) == 1) { u32 val; - while ((val = kbase_reg_read(kbdev, - MMU_REG(MMU_IRQ_STATUS)))) { + while ((val = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_STATUS)))) { /* Handle the IRQ */ kbase_mmu_interrupt(kbdev, val); } @@ -156,7 +155,7 @@ KBASE_EXPORT_TEST_API(kbase_reg_write); u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) { unsigned long flags; - u32 val; + u32 val = 0; spin_lock_irqsave(&kbdev->reg_op_lock, flags); midgard_model_read_reg(kbdev->model, offset, &val); diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c index abbb9c8..46c5ffd 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -169,6 +169,7 @@ int kbase_hwaccess_pm_init(struct kbase_device *kbdev) kbdev->pm.backend.gpu_powered = false; kbdev->pm.backend.gpu_ready = false; kbdev->pm.suspending = false; + kbdev->pm.resuming = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT kbase_pm_set_gpu_lost(kbdev, false); #endif @@ -590,11 +591,13 @@ static int kbase_pm_do_poweroff_sync(struct kbase_device *kbdev) { struct kbase_pm_backend_data *backend = &kbdev->pm.backend; unsigned long flags; - int ret = 0; + int ret; WARN_ON(kbdev->pm.active_count); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) + return ret; kbase_pm_lock(kbdev); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -679,60 +682,6 @@ unlock_hwaccess: spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } -static bool is_poweroff_in_progress(struct kbase_device *kbdev) -{ - bool ret; - unsigned long flags; - - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - ret = (kbdev->pm.backend.poweroff_wait_in_progress == false); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - return ret; -} - -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) -{ -#define POWEROFF_TIMEOUT_MSEC 500 - long remaining = msecs_to_jiffies(POWEROFF_TIMEOUT_MSEC); - remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev), remaining); - if (!remaining) { - /* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will - * definitely be called, so it's safe to continue waiting for it. - */ - if (!work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) { - unsigned long flags; - kbasep_platform_event_core_dump(kbdev, "poweroff work timeout"); - dev_err(kbdev->dev, "failed to wait for poweroff worker after %ims", - POWEROFF_TIMEOUT_MSEC); - kbase_gpu_timeout_debug_message(kbdev); -#if MALI_USE_CSF - //csf.scheduler.state should be accessed with scheduler lock! - //callchains go through this function though holding that lock - //so just print without locking. - dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state); - dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0)); -#endif - //Attempt another state machine transition prompt. - dev_err(kbdev->dev, "Attempt to prompt state machine"); - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_pm_update_state(kbdev); - spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); - - dev_err(kbdev->dev, "GPU state after re-prompt of state machine"); - kbase_gpu_timeout_debug_message(kbdev); - - dev_err(kbdev->dev, "retrying wait, this is likely to still hang. %d", - is_poweroff_in_progress(kbdev)); - } - wait_event_killable(kbdev->pm.backend.poweroff_wait, - is_poweroff_in_progress(kbdev)); - } -#undef POWEROFF_TIMEOUT_MSEC -} -KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); - /** * is_gpu_powered_down - Check whether GPU is powered down * @@ -986,7 +935,13 @@ int kbase_hwaccess_pm_suspend(struct kbase_device *kbdev) kbase_pm_unlock(kbdev); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + ret = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (ret) { +#if !MALI_USE_CSF + kbase_backend_timer_resume(kbdev); +#endif /* !MALI_USE_CSF */ + return ret; + } #endif WARN_ON(kbdev->pm.backend.gpu_powered); @@ -1002,6 +957,8 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) { kbase_pm_lock(kbdev); + /* System resume callback has begun */ + kbdev->pm.resuming = true; kbdev->pm.suspending = false; #ifdef CONFIG_MALI_ARBITER_SUPPORT if (kbase_pm_is_gpu_lost(kbdev)) { @@ -1016,7 +973,6 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev) kbase_backend_timer_resume(kbdev); #endif /* !MALI_USE_CSF */ - wake_up_all(&kbdev->pm.resume_wait); kbase_pm_unlock(kbdev); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c index 2c69ac9..7c891c1 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -51,9 +51,6 @@ #ifdef CONFIG_MALI_ARBITER_SUPPORT #include <arbiter/mali_kbase_arbiter_pm.h> #endif /* CONFIG_MALI_ARBITER_SUPPORT */ -#if MALI_USE_CSF -#include <csf/ipa_control/mali_kbase_csf_ipa_control.h> -#endif #if MALI_USE_CSF #include <linux/delay.h> @@ -699,8 +696,8 @@ static void wait_mcu_as_inactive(struct kbase_device *kbdev) /* Wait for the AS_ACTIVE_INT bit to become 0 for the AS used by MCU FW */ while (--max_loops && - kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & - AS_STATUS_AS_ACTIVE_INT) + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) & + AS_STATUS_AS_ACTIVE_INT) ; if (!WARN_ON_ONCE(max_loops == 0)) @@ -2442,26 +2439,29 @@ void kbase_pm_reset_complete(struct kbase_device *kbdev) #define PM_TIMEOUT_MS (5000) /* 5s */ #endif -void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { +void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg) +{ unsigned long flags; + + dev_err(kbdev->dev, "%s", timeout_msg); #if !MALI_USE_CSF CSTD_UNUSED(flags); dev_err(kbdev->dev, "Desired state :\n"); - dev_err(kbdev->dev, " Shader=%016llx\n", + dev_err(kbdev->dev, "\tShader=%016llx\n", kbdev->pm.backend.shaders_desired ? kbdev->pm.backend.shaders_avail : 0); #else dev_err(kbdev->dev, "GPU pm state :\n"); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - dev_err(kbdev->dev, " scheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count); - dev_err(kbdev->dev, " poweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d", + dev_err(kbdev->dev, "\tscheduler.pm_active_count = %d", kbdev->csf.scheduler.pm_active_count); + dev_err(kbdev->dev, "\tpoweron_required %d pm.active_count %d invoke_poweroff_wait_wq_when_l2_off %d", kbdev->pm.backend.poweron_required, kbdev->pm.active_count, kbdev->pm.backend.invoke_poweroff_wait_wq_when_l2_off); - dev_err(kbdev->dev, " gpu_poweroff_wait_work pending %d", + dev_err(kbdev->dev, "\tgpu_poweroff_wait_work pending %d", work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)); - dev_err(kbdev->dev, " MCU desired = %d\n", + dev_err(kbdev->dev, "\tMCU desired = %d\n", kbase_pm_is_mcu_desired(kbdev)); - dev_err(kbdev->dev, " MCU sw state = %d\n", + dev_err(kbdev->dev, "\tMCU sw state = %d\n", kbdev->pm.backend.mcu_state); dev_err(kbdev->dev, "\tL2 desired = %d (locked_off: %d)\n", kbase_pm_is_l2_desired(kbdev), kbdev->pm.backend.policy_change_clamp_state_to_off); @@ -2474,17 +2474,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); #endif dev_err(kbdev->dev, "Current state :\n"); - dev_err(kbdev->dev, " Shader=%08x%08x\n", + dev_err(kbdev->dev, "\tShader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(SHADER_READY_LO))); - dev_err(kbdev->dev, " Tiler =%08x%08x\n", + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG(TILER_READY_LO))); - dev_err(kbdev->dev, " L2 =%08x%08x\n", + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG(L2_READY_HI)), kbase_reg_read(kbdev, @@ -2493,17 +2493,17 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { kbase_csf_debug_dump_registers(kbdev); #endif dev_err(kbdev->dev, "Cores transitioning :\n"); - dev_err(kbdev->dev, " Shader=%08x%08x\n", + dev_err(kbdev->dev, "\tShader=%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( SHADER_PWRTRANS_LO))); - dev_err(kbdev->dev, " Tiler =%08x%08x\n", + dev_err(kbdev->dev, "\tTiler =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( TILER_PWRTRANS_LO))); - dev_err(kbdev->dev, " L2 =%08x%08x\n", + dev_err(kbdev->dev, "\tL2 =%08x%08x\n", kbase_reg_read(kbdev, GPU_CONTROL_REG( L2_PWRTRANS_HI)), kbase_reg_read(kbdev, GPU_CONTROL_REG( @@ -2512,12 +2512,9 @@ void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev) { dump_stack(); } -static void kbase_pm_timed_out(struct kbase_device *kbdev) +static void kbase_pm_timed_out(struct kbase_device *kbdev, const char *timeout_msg) { - dev_err(kbdev->dev, "Power transition timed out unexpectedly\n"); - kbase_gpu_timeout_debug_message(kbdev); - dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); - + kbase_gpu_timeout_debug_message(kbdev, timeout_msg); /* pixel: If either: * 1. L2/MCU power transition timed out, or, * 2. kbase state machine fell out of sync with the hw state, @@ -2530,6 +2527,7 @@ static void kbase_pm_timed_out(struct kbase_device *kbdev) * We have already lost work if we end up here, so send a powercycle to reset the hw, * which is more reliable. */ + dev_err(kbdev->dev, "Sending reset to GPU - all running jobs will be lost\n"); if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR | RESET_FLAGS_FORCE_PM_HW_RESET)) @@ -2570,7 +2568,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) .info = GPU_UEVENT_INFO_L2_PM_TIMEOUT }; pixel_gpu_uevent_send(kbdev, &evt); - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for desired PM state with L2 powered timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { dev_info( @@ -2582,7 +2580,7 @@ int kbase_pm_wait_for_l2_powered(struct kbase_device *kbdev) return err; } -int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +static int pm_wait_for_desired_state(struct kbase_device *kbdev, bool killable_wait) { unsigned long flags; long remaining; @@ -2600,31 +2598,42 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) /* Wait for cores */ #if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE - remaining = wait_event_killable_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev), timeout); + if (killable_wait) + remaining = wait_event_killable_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), + timeout); #else - remaining = wait_event_timeout( - kbdev->pm.backend.gpu_in_desired_state_wait, - kbase_pm_is_in_desired_state(kbdev), timeout); + killable_wait = false; #endif - + if (!killable_wait) + remaining = wait_event_timeout(kbdev->pm.backend.gpu_in_desired_state_wait, + kbase_pm_is_in_desired_state(kbdev), timeout); if (!remaining) { const struct gpu_uevent evt = { .type = GPU_UEVENT_TYPE_KMD_ERROR, .info = GPU_UEVENT_INFO_PM_TIMEOUT }; pixel_gpu_uevent_send(kbdev, &evt); - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for power transition timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { - dev_info(kbdev->dev, - "Wait for desired PM state got interrupted"); + WARN_ON_ONCE(!killable_wait); + dev_info(kbdev->dev, "Wait for power transition got interrupted"); err = (int)remaining; } return err; } + +int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev) +{ + return pm_wait_for_desired_state(kbdev, true); +} + +int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev) +{ + return pm_wait_for_desired_state(kbdev, false); +} KBASE_EXPORT_TEST_API(kbase_pm_wait_for_desired_state); #if MALI_USE_CSF @@ -2674,7 +2683,7 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) #endif if (!remaining) { - kbase_pm_timed_out(kbdev); + kbase_pm_timed_out(kbdev, "Wait for cores down scaling timed out"); err = -ETIMEDOUT; } else if (remaining < 0) { dev_info( @@ -2687,6 +2696,96 @@ int kbase_pm_wait_for_cores_down_scale(struct kbase_device *kbdev) } #endif +static bool is_poweroff_wait_in_progress(struct kbase_device *kbdev) +{ + bool ret; + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + ret = kbdev->pm.backend.poweroff_wait_in_progress; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return ret; +} + +static int pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev, bool killable_wait) +{ + long remaining; +#if MALI_USE_CSF + /* gpu_poweroff_wait_work would be subjected to the kernel scheduling + * and so the wait time can't only be the function of GPU frequency. + */ + const unsigned int extra_wait_time_ms = 2000; + const long timeout = kbase_csf_timeout_in_jiffies( + kbase_get_timeout_ms(kbdev, CSF_PM_TIMEOUT) + extra_wait_time_ms); +#else +#ifdef CONFIG_MALI_ARBITER_SUPPORT + /* Handling of timeout error isn't supported for arbiter builds */ + const long timeout = MAX_SCHEDULE_TIMEOUT; +#else + const long timeout = msecs_to_jiffies(PM_TIMEOUT_MS); +#endif +#endif + int err = 0; + +#if KERNEL_VERSION(4, 13, 1) <= LINUX_VERSION_CODE + if (killable_wait) + remaining = wait_event_killable_timeout(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev), + timeout); +#else + killable_wait = false; +#endif + + if (!killable_wait) + remaining = wait_event_timeout(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev), timeout); + if (!remaining) { + /* If work is now pending, kbase_pm_gpu_poweroff_wait_wq() will + * definitely be called, so it's safe to continue waiting for it. + */ + if (work_pending(&kbdev->pm.backend.gpu_poweroff_wait_work)) { + wait_event_killable(kbdev->pm.backend.poweroff_wait, + !is_poweroff_wait_in_progress(kbdev)); + } else { + unsigned long flags; + kbasep_platform_event_core_dump(kbdev, "poweroff work timeout"); + kbase_gpu_timeout_debug_message(kbdev, "failed to wait for poweroff worker"); +#if MALI_USE_CSF + //csf.scheduler.state should be accessed with scheduler lock! + //callchains go through this function though holding that lock + //so just print without locking. + dev_err(kbdev->dev, "scheduler.state %d", kbdev->csf.scheduler.state); + dev_err(kbdev->dev, "Firmware ping %d", kbase_csf_firmware_ping_wait(kbdev, 0)); +#endif + //Attempt another state machine transition prompt. + dev_err(kbdev->dev, "Attempt to prompt state machine"); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_pm_update_state(kbdev); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + kbase_gpu_timeout_debug_message(kbdev, "GPU state after re-prompt of state machine"); + err = -ETIMEDOUT; + } + } else if (remaining < 0) { + WARN_ON_ONCE(!killable_wait); + dev_info(kbdev->dev, "Wait for poweroff work got interrupted"); + err = (int)remaining; + } + return err; +} + +int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev) +{ + return pm_wait_for_poweroff_work_complete(kbdev, true); +} + +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev) +{ + return pm_wait_for_poweroff_work_complete(kbdev, false); +} +KBASE_EXPORT_TEST_API(kbase_pm_wait_for_poweroff_work_complete); + void kbase_pm_enable_interrupts(struct kbase_device *kbdev) { unsigned long flags; @@ -2704,12 +2803,12 @@ void kbase_pm_enable_interrupts(struct kbase_device *kbdev) kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); #if MALI_USE_CSF /* Enable only the Page fault bits part */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFF); #else - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0xFFFFFFFF); #endif } @@ -2729,8 +2828,8 @@ void kbase_pm_disable_interrupts_nolock(struct kbase_device *kbdev) kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0); kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), 0xFFFFFFFF); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), 0xFFFFFFFF); } void kbase_pm_disable_interrupts(struct kbase_device *kbdev) @@ -3147,9 +3246,13 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) kbdev->hw_quirks_tiler = 0; kbdev->hw_quirks_mmu = 0; - if (!of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { - dev_info(kbdev->dev, - "Found quirks_gpu = [0x%x] in Devicetree\n", + /* Read the "-" versions of the properties and fall back to + * the "_" versions if these are not found + */ + + if (!of_property_read_u32(np, "quirks-gpu", &kbdev->hw_quirks_gpu) || + !of_property_read_u32(np, "quirks_gpu", &kbdev->hw_quirks_gpu)) { + dev_info(kbdev->dev, "Found quirks_gpu = [0x%x] in Devicetree\n", kbdev->hw_quirks_gpu); } else { error = kbase_set_gpu_quirks(kbdev, prod_id); @@ -3157,33 +3260,30 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev) return error; } - if (!of_property_read_u32(np, "quirks_sc", - &kbdev->hw_quirks_sc)) { - dev_info(kbdev->dev, - "Found quirks_sc = [0x%x] in Devicetree\n", - kbdev->hw_quirks_sc); + if (!of_property_read_u32(np, "quirks-sc", &kbdev->hw_quirks_sc) || + !of_property_read_u32(np, "quirks_sc", &kbdev->hw_quirks_sc)) { + dev_info(kbdev->dev, "Found quirks_sc = [0x%x] in Devicetree\n", + kbdev->hw_quirks_sc); } else { error = kbase_set_sc_quirks(kbdev, prod_id); if (error) return error; } - if (!of_property_read_u32(np, "quirks_tiler", - &kbdev->hw_quirks_tiler)) { - dev_info(kbdev->dev, - "Found quirks_tiler = [0x%x] in Devicetree\n", - kbdev->hw_quirks_tiler); + if (!of_property_read_u32(np, "quirks-tiler", &kbdev->hw_quirks_tiler) || + !of_property_read_u32(np, "quirks_tiler", &kbdev->hw_quirks_tiler)) { + dev_info(kbdev->dev, "Found quirks_tiler = [0x%x] in Devicetree\n", + kbdev->hw_quirks_tiler); } else { error = kbase_set_tiler_quirks(kbdev); if (error) return error; } - if (!of_property_read_u32(np, "quirks_mmu", - &kbdev->hw_quirks_mmu)) { - dev_info(kbdev->dev, - "Found quirks_mmu = [0x%x] in Devicetree\n", - kbdev->hw_quirks_mmu); + if (!of_property_read_u32(np, "quirks-mmu", &kbdev->hw_quirks_mmu) || + !of_property_read_u32(np, "quirks_mmu", &kbdev->hw_quirks_mmu)) { + dev_info(kbdev->dev, "Found quirks_mmu = [0x%x] in Devicetree\n", + kbdev->hw_quirks_mmu); } else { error = kbase_set_mmu_quirks(kbdev); } diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h index 9e29236..d7f19fb 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_internal.h +++ b/mali_kbase/backend/gpu/mali_kbase_pm_internal.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -224,7 +224,7 @@ void kbase_pm_reset_done(struct kbase_device *kbdev); * power off in progress and kbase_pm_context_active() was called instead of * kbase_csf_scheduler_pm_active(). * - * Return: 0 on success, error code on error + * Return: 0 on success, or -ETIMEDOUT code on timeout error. */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #else @@ -247,12 +247,27 @@ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); * must ensure that this is not the case by, for example, calling * kbase_pm_wait_for_poweroff_work_complete() * - * Return: 0 on success, error code on error + * Return: 0 on success, or -ETIMEDOUT error code on timeout error. */ int kbase_pm_wait_for_desired_state(struct kbase_device *kbdev); #endif /** + * kbase_pm_killable_wait_for_desired_state - Wait for the desired power state to be + * reached in a killable state. + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function is same as kbase_pm_wait_for_desired_state(), expect that it would + * allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the + * wait was interrupted. + */ +int kbase_pm_killable_wait_for_desired_state(struct kbase_device *kbdev); + +/** * kbase_pm_wait_for_l2_powered - Wait for the L2 cache to be powered on * * @kbdev: The kbase device structure for the device (must be a valid pointer) @@ -467,8 +482,26 @@ void kbase_pm_release_gpu_cycle_counter_nolock(struct kbase_device *kbdev); * This function effectively just waits for the @gpu_poweroff_wait_work work * item to complete, if it was enqueued. GPU may not have been powered down * before this function returns. + * + * Return: 0 on success, error code on error */ -void kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); +int kbase_pm_wait_for_poweroff_work_complete(struct kbase_device *kbdev); + +/** + * kbase_pm_killable_wait_for_poweroff_work_complete - Wait for the poweroff workqueue to + * complete in killable state. + * + * @kbdev: The kbase device structure for the device (must be a valid pointer) + * + * This function is same as kbase_pm_wait_for_poweroff_work_complete(), expect that + * it would allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 on success, or -ETIMEDOUT code on timeout error or -ERESTARTSYS if the + * wait was interrupted. + */ +int kbase_pm_killable_wait_for_poweroff_work_complete(struct kbase_device *kbdev); /** * kbase_pm_wait_for_gpu_power_down - Wait for the GPU power down to complete @@ -857,6 +890,8 @@ static inline bool kbase_pm_mcu_is_in_desired_state(struct kbase_device *kbdev) { bool in_desired_state = true; + lockdep_assert_held(&kbdev->hwaccess_lock); + if (kbase_pm_is_mcu_desired(kbdev) && kbdev->pm.backend.mcu_state != KBASE_MCU_ON) in_desired_state = false; else if (!kbase_pm_is_mcu_desired(kbdev) && diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c index f5dc008..7d7650c 100644 --- a/mali_kbase/backend/gpu/mali_kbase_pm_policy.c +++ b/mali_kbase/backend/gpu/mali_kbase_pm_policy.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -54,7 +54,9 @@ void kbase_pm_policy_init(struct kbase_device *kbdev) unsigned long flags; int i; - if (of_property_read_string(np, "power_policy", &power_policy_name) == 0) { + /* Read "power-policy" property and fallback to "power_policy" if not found */ + if ((of_property_read_string(np, "power-policy", &power_policy_name) == 0) || + (of_property_read_string(np, "power_policy", &power_policy_name) == 0)) { for (i = 0; i < ARRAY_SIZE(all_policy_list); i++) if (sysfs_streq(all_policy_list[i]->name, power_policy_name)) { default_policy = all_policy_list[i]; @@ -298,6 +300,8 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, bool reset_gpu = false; bool reset_op_prevented = true; struct kbase_csf_scheduler *scheduler = NULL; + u32 pwroff; + bool switching_to_always_on; #endif KBASE_DEBUG_ASSERT(kbdev != NULL); @@ -306,6 +310,16 @@ void kbase_pm_set_policy(struct kbase_device *kbdev, KBASE_KTRACE_ADD(kbdev, PM_SET_POLICY, NULL, new_policy->id); #if MALI_USE_CSF + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + switching_to_always_on = new_policy == &kbase_pm_always_on_policy_ops; + if (pwroff == 0 && !switching_to_always_on) { + dev_warn(kbdev->dev, + "power_policy: cannot switch away from always_on with mcu_shader_pwroff_timeout set to 0\n"); + dev_warn(kbdev->dev, + "power_policy: resetting mcu_shader_pwroff_timeout to default value to switch policy from always_on\n"); + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); + } + scheduler = &kbdev->csf.scheduler; KBASE_DEBUG_ASSERT(scheduler != NULL); diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c index 7a4d662..28365c0 100644 --- a/mali_kbase/backend/gpu/mali_kbase_time.c +++ b/mali_kbase/backend/gpu/mali_kbase_time.c @@ -29,6 +29,39 @@ #include <device/mali_kbase_device.h> #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_config_defaults.h> +#include <linux/version_compat_defs.h> + +struct kbase_timeout_info { + char *selector_str; + u64 timeout_cycles; +}; + +#if MALI_USE_CSF +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [CSF_FIRMWARE_TIMEOUT] = { "CSF_FIRMWARE_TIMEOUT", MIN(CSF_FIRMWARE_TIMEOUT_CYCLES, + CSF_FIRMWARE_PING_TIMEOUT_CYCLES) }, + [CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES }, + [CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES }, + [CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT", + CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES }, + [CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT", + CSF_FIRMWARE_PING_TIMEOUT_CYCLES }, + [CSF_SCHED_PROTM_PROGRESS_TIMEOUT] = { "CSF_SCHED_PROTM_PROGRESS_TIMEOUT", + DEFAULT_PROGRESS_TIMEOUT_CYCLES }, + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [KCPU_FENCE_SIGNAL_TIMEOUT] = { "KCPU_FENCE_SIGNAL_TIMEOUT", + KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES }, +}; +#else +static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = { + [MMU_AS_INACTIVE_WAIT_TIMEOUT] = { "MMU_AS_INACTIVE_WAIT_TIMEOUT", + MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES }, + [JM_DEFAULT_JS_FREE_TIMEOUT] = { "JM_DEFAULT_JS_FREE_TIMEOUT", + JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES }, +}; +#endif void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, @@ -108,94 +141,130 @@ void kbase_backend_get_gpu_time(struct kbase_device *kbdev, u64 *cycle_counter, #endif } -unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, - enum kbase_timeout_selector selector) +static u64 kbase_device_get_scaling_frequency(struct kbase_device *kbdev) +{ + u64 freq_khz = kbdev->lowest_gpu_freq_khz; + + if (!freq_khz) { + dev_dbg(kbdev->dev, + "Lowest frequency uninitialized! Using reference frequency for scaling"); + return DEFAULT_REF_TIMEOUT_FREQ_KHZ; + } + + return freq_khz; +} + +void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + unsigned int timeout_ms) { + char *selector_str; + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + selector_str = timeout_info[selector].selector_str; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + selector_str = timeout_info[selector].selector_str; + + kbdev->backend_time.device_scaled_timeouts[selector] = timeout_ms; + dev_dbg(kbdev->dev, "\t%-35s: %ums\n", selector_str, timeout_ms); +} + +void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + u64 timeout_cycles, u32 cycle_multiplier) +{ + u64 final_cycles; + u64 timeout; + u64 freq_khz = kbase_device_get_scaling_frequency(kbdev); + + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + selector = KBASE_DEFAULT_TIMEOUT; + dev_warn(kbdev->dev, + "Unknown timeout selector passed, falling back to default: %s\n", + timeout_info[selector].selector_str); + } + + /* If the multiplication overflows, we will have unsigned wrap-around, and so might + * end up with a shorter timeout. In those cases, we then want to have the largest + * timeout possible that will not run into these issues. Note that this will not + * wait for U64_MAX/frequency ms, as it will be clamped to a max of UINT_MAX + * milliseconds by subsequent steps. + */ + if (check_mul_overflow(timeout_cycles, (u64)cycle_multiplier, &final_cycles)) + final_cycles = U64_MAX; + /* Timeout calculation: * dividing number of cycles by freq in KHz automatically gives value * in milliseconds. nr_cycles will have to be multiplied by 1e3 to * get result in microseconds, and 1e6 to get result in nanoseconds. */ + timeout = div_u64(final_cycles, freq_khz); + + if (unlikely(timeout > UINT_MAX)) { + dev_dbg(kbdev->dev, + "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", + timeout, timeout_info[selector].selector_str, + kbase_device_get_scaling_frequency(kbdev)); + timeout = UINT_MAX; + } - u64 timeout, nr_cycles = 0; - u64 freq_khz; + kbase_device_set_timeout_ms(kbdev, selector, (unsigned int)timeout); +} - /* Only for debug messages, safe default in case it's mis-maintained */ - const char *selector_str = "(unknown)"; +/** + * kbase_timeout_scaling_init - Initialize the table of scaled timeout + * values associated with a @kbase_device. + * + * @kbdev: KBase device pointer. + * + * Return: 0 on success, negative error code otherwise. + */ +static int kbase_timeout_scaling_init(struct kbase_device *kbdev) +{ + int err; + enum kbase_timeout_selector selector; - if (!kbdev->lowest_gpu_freq_khz) { - dev_dbg(kbdev->dev, - "Lowest frequency uninitialized! Using reference frequency for scaling"); - freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; - } else { - freq_khz = kbdev->lowest_gpu_freq_khz; + /* First, we initialize the minimum and maximum device frequencies, which + * are used to compute the timeouts. + */ + err = kbase_pm_gpu_freq_init(kbdev); + if (unlikely(err < 0)) { + dev_dbg(kbdev->dev, "Could not initialize GPU frequency\n"); + return err; } - switch (selector) { - case MMU_AS_INACTIVE_WAIT_TIMEOUT: - selector_str = "MMU_AS_INACTIVE_WAIT_TIMEOUT"; - nr_cycles = MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES; - break; - case KBASE_TIMEOUT_SELECTOR_COUNT: - default: -#if !MALI_USE_CSF - WARN(1, "Invalid timeout selector used! Using default value"); - nr_cycles = JM_DEFAULT_TIMEOUT_CYCLES; - break; - case JM_DEFAULT_JS_FREE_TIMEOUT: - selector_str = "JM_DEFAULT_JS_FREE_TIMEOUT"; - nr_cycles = JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES; - break; -#else - /* Use Firmware timeout if invalid selection */ - WARN(1, - "Invalid timeout selector used! Using CSF Firmware timeout"); - fallthrough; - case CSF_FIRMWARE_TIMEOUT: - selector_str = "CSF_FIRMWARE_TIMEOUT"; - /* Any FW timeout cannot be longer than the FW ping interval, after which - * the firmware_aliveness_monitor will be triggered and may restart - * the GPU if the FW is unresponsive. + dev_dbg(kbdev->dev, "Scaling kbase timeouts:\n"); + for (selector = 0; selector < KBASE_TIMEOUT_SELECTOR_COUNT; selector++) { + u32 cycle_multiplier = 1; + u64 nr_cycles = timeout_info[selector].timeout_cycles; +#if MALI_USE_CSF + /* Special case: the scheduler progress timeout can be set manually, + * and does not have a canonical length defined in the headers. Hence, + * we query it once upon startup to get a baseline, and change it upon + * every invocation of the appropriate functions */ - nr_cycles = min(CSF_FIRMWARE_PING_TIMEOUT_CYCLES, CSF_FIRMWARE_TIMEOUT_CYCLES); - - if (nr_cycles == CSF_FIRMWARE_PING_TIMEOUT_CYCLES) - dev_warn(kbdev->dev, "Capping %s to CSF_FIRMWARE_PING_TIMEOUT\n", - selector_str); - break; - case CSF_PM_TIMEOUT: - selector_str = "CSF_PM_TIMEOUT"; - nr_cycles = CSF_PM_TIMEOUT_CYCLES; - break; - case CSF_GPU_RESET_TIMEOUT: - selector_str = "CSF_GPU_RESET_TIMEOUT"; - nr_cycles = CSF_GPU_RESET_TIMEOUT_CYCLES; - break; - case CSF_CSG_SUSPEND_TIMEOUT: - selector_str = "CSF_CSG_SUSPEND_TIMEOUT"; - nr_cycles = CSF_CSG_SUSPEND_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_BOOT_TIMEOUT: - selector_str = "CSF_FIRMWARE_BOOT_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES; - break; - case CSF_FIRMWARE_PING_TIMEOUT: - selector_str = "CSF_FIRMWARE_PING_TIMEOUT"; - nr_cycles = CSF_FIRMWARE_PING_TIMEOUT_CYCLES; - break; - case CSF_SCHED_PROTM_PROGRESS_TIMEOUT: - selector_str = "CSF_SCHED_PROTM_PROGRESS_TIMEOUT"; - nr_cycles = kbase_csf_timeout_get(kbdev); - break; + if (selector == CSF_SCHED_PROTM_PROGRESS_TIMEOUT) + nr_cycles = kbase_csf_timeout_get(kbdev); #endif + + /* Since we are in control of the iteration bounds for the selector, + * we don't have to worry about bounds checking when setting the timeout. + */ + kbase_device_set_timeout(kbdev, selector, nr_cycles, cycle_multiplier); } + return 0; +} - timeout = div_u64(nr_cycles, freq_khz); - if (WARN(timeout > UINT_MAX, - "Capping excessive timeout %llums for %s at freq %llukHz to UINT_MAX ms", - (unsigned long long)timeout, selector_str, (unsigned long long)freq_khz)) - timeout = UINT_MAX; - return (unsigned int)timeout; +unsigned int kbase_get_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector) +{ + if (unlikely(selector >= KBASE_TIMEOUT_SELECTOR_COUNT)) { + dev_warn(kbdev->dev, "Querying wrong selector, falling back to default\n"); + selector = KBASE_DEFAULT_TIMEOUT; + } + + return kbdev->backend_time.device_scaled_timeouts[selector]; } KBASE_EXPORT_TEST_API(kbase_get_timeout_ms); @@ -247,18 +316,21 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t int kbase_backend_time_init(struct kbase_device *kbdev) { + int err = 0; #if MALI_USE_CSF u64 cpu_ts = 0; u64 gpu_ts = 0; u64 freq; u64 common_factor; + kbase_pm_register_access_enable(kbdev); get_cpu_gpu_time(kbdev, &cpu_ts, &gpu_ts, NULL); freq = arch_timer_get_cntfrq(); if (!freq) { dev_warn(kbdev->dev, "arch_timer_get_rate() is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } common_factor = gcd(NSEC_PER_SEC, freq); @@ -268,12 +340,23 @@ int kbase_backend_time_init(struct kbase_device *kbdev) if (!kbdev->backend_time.divisor) { dev_warn(kbdev->dev, "CPU to GPU divisor is zero!"); - return -EINVAL; + err = -EINVAL; + goto disable_registers; } kbdev->backend_time.offset = cpu_ts - div64_u64(gpu_ts * kbdev->backend_time.multiplier, kbdev->backend_time.divisor); #endif - return 0; + if (kbase_timeout_scaling_init(kbdev)) { + dev_warn(kbdev->dev, "Could not initialize timeout scaling"); + err = -EINVAL; + } + +#if MALI_USE_CSF +disable_registers: + kbase_pm_register_access_disable(kbdev); +#endif + + return err; } diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp index e82dd12..381b1fe 100644 --- a/mali_kbase/build.bp +++ b/mali_kbase/build.bp @@ -68,6 +68,9 @@ bob_defaults { large_page_alloc: { kbuild_options: ["CONFIG_LARGE_PAGE_ALLOC=y"], }, + page_migration_support: { + kbuild_options: ["CONFIG_PAGE_MIGRATION_SUPPORT=y"], + }, mali_memory_fully_backed: { kbuild_options: ["CONFIG_MALI_MEMORY_FULLY_BACKED=y"], }, @@ -143,6 +146,18 @@ bob_defaults { mali_coresight: { kbuild_options: ["CONFIG_MALI_CORESIGHT=y"], }, + mali_fw_trace_mode_manual: { + kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_MANUAL=y"], + }, + mali_fw_trace_mode_auto_print: { + kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT=y"], + }, + mali_fw_trace_mode_auto_discard: { + kbuild_options: ["CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD=y"], + }, + mali_trace_power_gpu_work_period: { + kbuild_options: ["CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD=y"], + }, kbuild_options: [ "CONFIG_MALI_PLATFORM_NAME={{.mali_platform_name}}", "MALI_CUSTOMER_RELEASE={{.release}}", diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c index 9aa661a..45a5a6c 100644 --- a/mali_kbase/context/backend/mali_kbase_context_csf.c +++ b/mali_kbase/context/backend/mali_kbase_context_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -124,7 +124,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, unsigned long const api_version, - struct file *const filp) + struct kbase_file *const kfile) { struct kbase_context *kctx; unsigned int i = 0; @@ -143,9 +143,11 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->filp = filp; + kctx->kfile = kfile; kctx->create_flags = flags; + memcpy(kctx->comm, current->comm, sizeof(current->comm)); + if (is_compat) kbase_ctx_flag_set(kctx, KCTX_COMPAT); #if defined(CONFIG_64BIT) @@ -213,6 +215,16 @@ void kbase_destroy_context(struct kbase_context *kctx) kctx->tgid, kctx->id); } + /* Have synchronized against the System suspend and incremented the + * pm.active_count. So any subsequent invocation of System suspend + * callback would get blocked. + * If System suspend callback was already in progress then the above loop + * would have waited till the System resume callback has begun. + * So wait for the System resume callback to also complete as we want to + * avoid context termination during System resume also. + */ + wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev)); + kbase_mem_pool_group_mark_dying(&kctx->mem_pools); kbase_context_term_partial(kctx, ARRAY_SIZE(context_init)); diff --git a/mali_kbase/context/backend/mali_kbase_context_jm.c b/mali_kbase/context/backend/mali_kbase_context_jm.c index 7acb3f6..39595d9 100644 --- a/mali_kbase/context/backend/mali_kbase_context_jm.c +++ b/mali_kbase/context/backend/mali_kbase_context_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -179,7 +179,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, unsigned long const api_version, - struct file *const filp) + struct kbase_file *const kfile) { struct kbase_context *kctx; unsigned int i = 0; @@ -198,7 +198,7 @@ struct kbase_context *kbase_create_context(struct kbase_device *kbdev, kctx->kbdev = kbdev; kctx->api_version = api_version; - kctx->filp = filp; + kctx->kfile = kfile; kctx->create_flags = flags; if (is_compat) @@ -258,6 +258,17 @@ void kbase_destroy_context(struct kbase_context *kctx) wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev)); } + + /* Have synchronized against the System suspend and incremented the + * pm.active_count. So any subsequent invocation of System suspend + * callback would get blocked. + * If System suspend callback was already in progress then the above loop + * would have waited till the System resume callback has begun. + * So wait for the System resume callback to also complete as we want to + * avoid context termination during System resume also. + */ + wait_event(kbdev->pm.resume_wait, !kbase_pm_is_resuming(kbdev)); + #ifdef CONFIG_MALI_ARBITER_SUPPORT atomic_dec(&kbdev->pm.gpu_users_waiting); #endif /* CONFIG_MALI_ARBITER_SUPPORT */ diff --git a/mali_kbase/context/mali_kbase_context.c b/mali_kbase/context/mali_kbase_context.c index 84d56f7..70941ef 100644 --- a/mali_kbase/context/mali_kbase_context.c +++ b/mali_kbase/context/mali_kbase_context.c @@ -190,7 +190,7 @@ int kbase_context_common_init(struct kbase_context *kctx) kctx->pid = current->pid; /* Check if this is a Userspace created context */ - if (likely(kctx->filp)) { + if (likely(kctx->kfile)) { struct pid *pid_struct; rcu_read_lock(); @@ -264,7 +264,7 @@ int kbase_context_common_init(struct kbase_context *kctx) if (err) { dev_err(kctx->kbdev->dev, "(err:%d) failed to insert kctx to kbase_process", err); - if (likely(kctx->filp)) { + if (likely(kctx->kfile)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } @@ -356,7 +356,7 @@ void kbase_context_common_term(struct kbase_context *kctx) kbase_remove_kctx_from_process(kctx); mutex_unlock(&kctx->kbdev->kctx_list_lock); - if (likely(kctx->filp)) { + if (likely(kctx->kfile)) { mmdrop(kctx->process_mm); put_task_struct(kctx->task); } diff --git a/mali_kbase/context/mali_kbase_context.h b/mali_kbase/context/mali_kbase_context.h index 7c90e27..22cb00c 100644 --- a/mali_kbase/context/mali_kbase_context.h +++ b/mali_kbase/context/mali_kbase_context.h @@ -56,8 +56,9 @@ void kbase_context_debugfs_term(struct kbase_context *const kctx); * BASEP_CONTEXT_CREATE_KERNEL_FLAGS. * @api_version: Application program interface version, as encoded in * a single integer by the KBASE_API_VERSION macro. - * @filp: Pointer to the struct file corresponding to device file - * /dev/malixx instance, passed to the file's open method. + * @kfile: Pointer to the object representing the /dev/malixx device + * file instance. Shall be passed as NULL for internally created + * contexts. * * Up to one context can be created for each client that opens the device file * /dev/malixx. Context creation is deferred until a special ioctl() system call @@ -69,7 +70,7 @@ struct kbase_context * kbase_create_context(struct kbase_device *kbdev, bool is_compat, base_context_create_flags const flags, unsigned long api_version, - struct file *filp); + struct kbase_file *const kfile); /** * kbase_destroy_context - Destroy a kernel base context. diff --git a/mali_kbase/csf/Kbuild b/mali_kbase/csf/Kbuild index c5438f0..c626092 100644 --- a/mali_kbase/csf/Kbuild +++ b/mali_kbase/csf/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -32,6 +32,7 @@ mali_kbase-y += \ csf/mali_kbase_csf_csg_debugfs.o \ csf/mali_kbase_csf_kcpu_debugfs.o \ csf/mali_kbase_csf_sync_debugfs.o \ + csf/mali_kbase_csf_kcpu_fence_debugfs.o \ csf/mali_kbase_csf_protected_memory.o \ csf/mali_kbase_csf_tiler_heap_debugfs.o \ csf/mali_kbase_csf_cpu_queue_debugfs.o \ diff --git a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c index 4336705..bbf2e4e 100644 --- a/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c +++ b/mali_kbase/csf/ipa_control/mali_kbase_csf_ipa_control.c @@ -64,12 +64,19 @@ * struct kbase_ipa_control_listener_data - Data for the GPU clock frequency * listener * - * @listener: GPU clock frequency listener. - * @kbdev: Pointer to kbase device. + * @listener: GPU clock frequency listener. + * @kbdev: Pointer to kbase device. + * @clk_chg_wq: Dedicated workqueue to process the work item corresponding to + * a clock rate notification. + * @clk_chg_work: Work item to process the clock rate change + * @rate: The latest notified rate change, in unit of Hz */ struct kbase_ipa_control_listener_data { struct kbase_clk_rate_listener listener; struct kbase_device *kbdev; + struct workqueue_struct *clk_chg_wq; + struct work_struct clk_chg_work; + atomic_t rate; }; static u32 timer_value(u32 gpu_rate) @@ -271,52 +278,61 @@ kbase_ipa_control_rate_change_notify(struct kbase_clk_rate_listener *listener, u32 clk_index, u32 clk_rate_hz) { if ((clk_index == KBASE_CLOCK_DOMAIN_TOP) && (clk_rate_hz != 0)) { - size_t i; struct kbase_ipa_control_listener_data *listener_data = - container_of(listener, - struct kbase_ipa_control_listener_data, - listener); - struct kbase_device *kbdev = listener_data->kbdev; - struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; - - lockdep_assert_held(&kbdev->hwaccess_lock); - if (!kbdev->pm.backend.gpu_ready) { - dev_err(kbdev->dev, - "%s: GPU frequency cannot change while GPU is off", - __func__); - return; - } + container_of(listener, struct kbase_ipa_control_listener_data, listener); + + /* Save the rate and delegate the job to a work item */ + atomic_set(&listener_data->rate, clk_rate_hz); + queue_work(listener_data->clk_chg_wq, &listener_data->clk_chg_work); + } +} - /* Interrupts are already disabled and interrupt state is also saved */ - spin_lock(&ipa_ctrl->lock); +static void kbase_ipa_ctrl_rate_change_worker(struct work_struct *data) +{ + struct kbase_ipa_control_listener_data *listener_data = + container_of(data, struct kbase_ipa_control_listener_data, clk_chg_work); + struct kbase_device *kbdev = listener_data->kbdev; + struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; + unsigned long flags; + u32 rate; + size_t i; - for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { - struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (session->active) { - size_t j; + if (!kbdev->pm.backend.gpu_ready) { + dev_err(kbdev->dev, "%s: GPU frequency cannot change while GPU is off", __func__); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + return; + } - for (j = 0; j < session->num_prfcnts; j++) { - struct kbase_ipa_control_prfcnt *prfcnt = - &session->prfcnts[j]; + spin_lock(&ipa_ctrl->lock); + /* Picking up the latest notified rate */ + rate = (u32)atomic_read(&listener_data->rate); - if (prfcnt->gpu_norm) - calc_prfcnt_delta(kbdev, prfcnt, true); - } - } - } + for (i = 0; i < KBASE_IPA_CONTROL_MAX_SESSIONS; i++) { + struct kbase_ipa_control_session *session = &ipa_ctrl->sessions[i]; - ipa_ctrl->cur_gpu_rate = clk_rate_hz; + if (session->active) { + size_t j; - /* Update the timer for automatic sampling if active sessions - * are present. Counters have already been manually sampled. - */ - if (ipa_ctrl->num_active_sessions > 0) { - kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), - timer_value(ipa_ctrl->cur_gpu_rate)); + for (j = 0; j < session->num_prfcnts; j++) { + struct kbase_ipa_control_prfcnt *prfcnt = &session->prfcnts[j]; + + if (prfcnt->gpu_norm) + calc_prfcnt_delta(kbdev, prfcnt, true); + } } - spin_unlock(&ipa_ctrl->lock); } + + ipa_ctrl->cur_gpu_rate = rate; + /* Update the timer for automatic sampling if active sessions + * are present. Counters have already been manually sampled. + */ + if (ipa_ctrl->num_active_sessions > 0) + kbase_reg_write(kbdev, IPA_CONTROL_REG(TIMER), timer_value(rate)); + + spin_unlock(&ipa_ctrl->lock); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } void kbase_ipa_control_init(struct kbase_device *kbdev) @@ -344,11 +360,27 @@ void kbase_ipa_control_init(struct kbase_device *kbdev) listener_data = kmalloc(sizeof(struct kbase_ipa_control_listener_data), GFP_KERNEL); if (listener_data) { - listener_data->listener.notify = - kbase_ipa_control_rate_change_notify; - listener_data->kbdev = kbdev; - ipa_ctrl->rtm_listener_data = listener_data; - } + listener_data->clk_chg_wq = + alloc_workqueue("ipa_ctrl_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); + if (listener_data->clk_chg_wq) { + INIT_WORK(&listener_data->clk_chg_work, kbase_ipa_ctrl_rate_change_worker); + listener_data->listener.notify = kbase_ipa_control_rate_change_notify; + listener_data->kbdev = kbdev; + ipa_ctrl->rtm_listener_data = listener_data; + /* Initialise to 0, which is out of normal notified rates */ + atomic_set(&listener_data->rate, 0); + } else { + dev_warn(kbdev->dev, + "%s: failed to allocate workqueue, clock rate update disabled", + __func__); + kfree(listener_data); + listener_data = NULL; + } + } else + dev_warn(kbdev->dev, + "%s: failed to allocate memory, IPA control clock rate update disabled", + __func__); + spin_lock_irqsave(&clk_rtm->lock, flags); if (clk_rtm->clks[KBASE_CLOCK_DOMAIN_TOP]) ipa_ctrl->cur_gpu_rate = @@ -370,8 +402,10 @@ void kbase_ipa_control_term(struct kbase_device *kbdev) WARN_ON(ipa_ctrl->num_active_sessions); - if (listener_data) + if (listener_data) { kbase_clk_rate_trace_manager_unsubscribe(clk_rtm, &listener_data->listener); + destroy_workqueue(listener_data->clk_chg_wq); + } kfree(ipa_ctrl->rtm_listener_data); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); @@ -997,14 +1031,11 @@ void kbase_ipa_control_rate_change_notify_test(struct kbase_device *kbdev, u32 clk_index, u32 clk_rate_hz) { struct kbase_ipa_control *ipa_ctrl = &kbdev->csf.ipa_control; - struct kbase_ipa_control_listener_data *listener_data = - ipa_ctrl->rtm_listener_data; - unsigned long flags; + struct kbase_ipa_control_listener_data *listener_data = ipa_ctrl->rtm_listener_data; - spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbase_ipa_control_rate_change_notify(&listener_data->listener, - clk_index, clk_rate_hz); - spin_lock_irqrestore(&kbdev->hwaccess_lock, flags); + kbase_ipa_control_rate_change_notify(&listener_data->listener, clk_index, clk_rate_hz); + /* Ensure the callback has taken effect before returning back to the test caller */ + flush_work(&listener_data->clk_chg_work); } KBASE_EXPORT_TEST_API(kbase_ipa_control_rate_change_notify_test); #endif @@ -1057,4 +1088,3 @@ void kbase_ipa_control_protm_exited(struct kbase_device *kbdev) } } } - diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c index 2e3ced3..8eaedde 100644 --- a/mali_kbase/csf/mali_kbase_csf.c +++ b/mali_kbase/csf/mali_kbase_csf.c @@ -38,6 +38,7 @@ #include <linux/protected_memory_allocator.h> #include <tl/mali_kbase_tracepoints.h> #include "mali_kbase_csf_mcu_shared_reg.h" +#include <linux/version_compat_defs.h> #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK) #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK) @@ -171,19 +172,19 @@ static int get_user_pages_mmap_handle(struct kbase_context *kctx, static void init_user_io_pages(struct kbase_queue *queue) { - u32 *input_addr = (u32 *)(queue->user_io_addr); - u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + u64 *input_addr = queue->user_io_addr; + u64 *output_addr64 = queue->user_io_addr + PAGE_SIZE / sizeof(u64); + u32 *output_addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); - input_addr[CS_INSERT_LO/4] = 0; - input_addr[CS_INSERT_HI/4] = 0; - - input_addr[CS_EXTRACT_INIT_LO/4] = 0; - input_addr[CS_EXTRACT_INIT_HI/4] = 0; - - output_addr[CS_EXTRACT_LO/4] = 0; - output_addr[CS_EXTRACT_HI/4] = 0; - - output_addr[CS_ACTIVE/4] = 0; + /* + * CS_INSERT and CS_EXTRACT registers contain 64-bit memory addresses which + * should be accessed atomically. Here we update them 32-bits at a time, but + * as this is initialisation code, non-atomic accesses are safe. + */ + input_addr[CS_INSERT_LO / sizeof(*input_addr)] = 0; + input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] = 0; + output_addr64[CS_EXTRACT_LO / sizeof(*output_addr64)] = 0; + output_addr32[CS_ACTIVE / sizeof(*output_addr32)] = 0; } static void kernel_unmap_user_io_pages(struct kbase_context *kctx, @@ -205,7 +206,7 @@ static int kernel_map_user_io_pages(struct kbase_context *kctx, struct page *page_list[2]; pgprot_t cpu_map_prot; unsigned long flags; - char *user_io_addr; + uint64_t *user_io_addr; int ret = 0; size_t i; @@ -246,7 +247,7 @@ unlock: static void term_queue_group(struct kbase_queue_group *group); static void get_queue(struct kbase_queue *queue); -static void release_queue(struct kbase_queue *queue); +static bool release_queue(struct kbase_queue *queue); /** * kbase_csf_free_command_stream_user_pages() - Free the resources allocated @@ -400,7 +401,16 @@ static void get_queue(struct kbase_queue *queue) WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount)); } -static void release_queue(struct kbase_queue *queue) +/** + * release_queue() - Release a reference to a GPU queue + * + * @queue: The queue to release. + * + * Return: true if the queue has been released. + * + * The queue will be released when its reference count reaches zero. + */ +static bool release_queue(struct kbase_queue *queue) { lockdep_assert_held(&queue->kctx->csf.lock); if (kbase_refcount_dec_and_test(&queue->refcount)) { @@ -410,7 +420,6 @@ static void release_queue(struct kbase_queue *queue) dev_dbg(queue->kctx->kbdev->dev, "Remove any pending command queue fatal from ctx %d_%d", queue->kctx->tgid, queue->kctx->id); - kbase_csf_event_remove_error(queue->kctx, &queue->error); /* After this the Userspace would be able to free the * memory for GPU queue. In case the Userspace missed @@ -423,7 +432,11 @@ static void release_queue(struct kbase_queue *queue) kbase_gpu_vm_unlock(queue->kctx); kfree(queue); + + return true; } + + return false; } static void oom_event_worker(struct work_struct *data); @@ -531,37 +544,25 @@ static int csf_queue_register_internal(struct kbase_context *kctx, queue->size = (queue_size << PAGE_SHIFT); queue->csi_index = KBASEP_IF_NR_INVALID; - queue->enabled = false; queue->priority = reg->priority; + /* Default to a safe value, this would be updated on binding */ + queue->group_priority = KBASE_QUEUE_GROUP_PRIORITY_LOW; kbase_refcount_set(&queue->refcount, 1); - queue->group = NULL; queue->bind_state = KBASE_CSF_QUEUE_UNBOUND; queue->handle = BASEP_MEM_INVALID_HANDLE; queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID; - queue->status_wait = 0; - queue->sync_ptr = 0; - queue->sync_value = 0; - -#if IS_ENABLED(CONFIG_DEBUG_FS) - queue->saved_cmd_ptr = 0; -#endif - - queue->sb_status = 0; queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED; - atomic_set(&queue->pending, 0); - INIT_LIST_HEAD(&queue->link); - INIT_LIST_HEAD(&queue->error.link); + atomic_set(&queue->pending_kick, 0); + INIT_LIST_HEAD(&queue->pending_kick_link); INIT_WORK(&queue->oom_event_work, oom_event_worker); INIT_WORK(&queue->cs_error_work, cs_error_worker); list_add(&queue->link, &kctx->csf.queue_list); - queue->extract_ofs = 0; - region->user_data = queue; /* Initialize the cs_trace configuration parameters, When buffer_size @@ -636,6 +637,22 @@ int kbase_csf_queue_register_ex(struct kbase_context *kctx, static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue); +static void wait_pending_queue_kick(struct kbase_queue *queue) +{ + struct kbase_context *const kctx = queue->kctx; + + /* Drain a pending queue kick if any. It should no longer be + * possible to issue further queue kicks at this point: either the + * queue has been unbound, or the context is being terminated. + * + * Signal kbase_csf_scheduler_kthread() to allow for the + * eventual completion of the current iteration. Once it's done the + * event_wait wait queue shall be signalled. + */ + complete(&kctx->kbdev->csf.scheduler.kthread_signal); + wait_event(kctx->kbdev->csf.event_wait, atomic_read(&queue->pending_kick) == 0); +} + void kbase_csf_queue_terminate(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_terminate *term) { @@ -673,6 +690,18 @@ void kbase_csf_queue_terminate(struct kbase_context *kctx, queue->queue_reg->user_data = NULL; kbase_gpu_vm_unlock(kctx); + rt_mutex_unlock(&kctx->csf.lock); + /* The GPU reset can be allowed now as the queue has been unbound. */ + if (reset_prevented) { + kbase_reset_gpu_allow(kbdev); + reset_prevented = false; + } + wait_pending_queue_kick(queue); + /* The work items can be cancelled as Userspace is terminating the queue */ + cancel_work_sync(&queue->oom_event_work); + cancel_work_sync(&queue->cs_error_work); + rt_mutex_lock(&kctx->csf.lock); + release_queue(queue); } @@ -717,6 +746,7 @@ int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_ bind->out.mmap_handle = queue->handle; group->bound_queues[bind->in.csi_index] = queue; queue->group = group; + queue->group_priority = group->priority; queue->csi_index = bind->in.csi_index; queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS; @@ -726,12 +756,20 @@ out: return ret; } -static struct kbase_queue_group *get_bound_queue_group( - struct kbase_queue *queue) +/** + * get_bound_queue_group - Get the group to which a queue was bound + * + * @queue: Pointer to the queue for this group + * + * Return: The group to which this queue was bound, or NULL on error. + */ +static struct kbase_queue_group *get_bound_queue_group(struct kbase_queue *queue) { struct kbase_context *kctx = queue->kctx; struct kbase_queue_group *group; + lockdep_assert_held(&kctx->csf.lock); + if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND) return NULL; @@ -753,63 +791,6 @@ static struct kbase_queue_group *get_bound_queue_group( return group; } -static void enqueue_gpu_submission_work(struct kbase_context *const kctx) -{ - kthread_queue_work(&kctx->csf.pending_submission_worker, &kctx->csf.pending_submission_work); -} - -/** - * pending_submission_worker() - Work item to process pending kicked GPU command queues. - * - * @work: Pointer to pending_submission_work. - * - * This function starts all pending queues, for which the work - * was previously submitted via ioctl call from application thread. - * If the queue is already scheduled and resident, it will be started - * right away, otherwise once the group is made resident. - */ -static void pending_submission_worker(struct kthread_work *work) -{ - struct kbase_context *kctx = - container_of(work, struct kbase_context, csf.pending_submission_work); - struct kbase_device *kbdev = kctx->kbdev; - struct kbase_queue *queue; - int err = kbase_reset_gpu_prevent_and_wait(kbdev); - - if (err) { - dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue "); - return; - } - - rt_mutex_lock(&kctx->csf.lock); - - /* Iterate through the queue list and schedule the pending ones for submission. */ - list_for_each_entry(queue, &kctx->csf.queue_list, link) { - if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) { - struct kbase_queue_group *group = get_bound_queue_group(queue); - int ret; - - if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) { - dev_dbg(kbdev->dev, "queue is not bound to a group"); - continue; - } - - ret = kbase_csf_scheduler_queue_start(queue); - if (unlikely(ret)) { - dev_dbg(kbdev->dev, "Failed to start queue"); - if (ret == -EBUSY) { - atomic_cmpxchg(&queue->pending, 0, 1); - enqueue_gpu_submission_work(kctx); - } - } - } - } - - rt_mutex_unlock(&kctx->csf.lock); - - kbase_reset_gpu_allow(kbdev); -} - void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot) { if (WARN_ON(slot < 0)) @@ -902,7 +883,6 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick) { struct kbase_device *kbdev = kctx->kbdev; - bool trigger_submission = false; struct kbase_va_region *region; int err = 0; @@ -920,9 +900,19 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, if (!kbase_is_region_invalid_or_free(region)) { struct kbase_queue *queue = region->user_data; - if (queue) { - atomic_cmpxchg(&queue->pending, 0, 1); - trigger_submission = true; + if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) { + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + if (list_empty(&queue->pending_kick_link)) { + /* Queue termination shall block until this + * kick has been handled. + */ + atomic_inc(&queue->pending_kick); + list_add_tail( + &queue->pending_kick_link, + &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); + complete(&kbdev->csf.scheduler.kthread_signal); + } + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); } } else { dev_dbg(kbdev->dev, @@ -931,9 +921,6 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, } kbase_gpu_vm_unlock(kctx); - if (likely(trigger_submission)) - enqueue_gpu_submission_work(kctx); - return err; } @@ -1222,6 +1209,9 @@ static int create_queue_group(struct kbase_context *const kctx, } else { int err = 0; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + group->prev_act = false; +#endif group->kctx = kctx; group->handle = group_handle; group->csg_nr = KBASEP_CSG_NR_INVALID; @@ -1246,6 +1236,7 @@ static int create_queue_group(struct kbase_context *const kctx, group->dvs_buf = create->in.dvs_buf; + #if IS_ENABLED(CONFIG_DEBUG_FS) group->deschedule_deferred_cnt = 0; #endif @@ -1256,8 +1247,6 @@ static int create_queue_group(struct kbase_context *const kctx, INIT_LIST_HEAD(&group->link); INIT_LIST_HEAD(&group->link_to_schedule); INIT_LIST_HEAD(&group->error_fatal.link); - INIT_LIST_HEAD(&group->error_timeout.link); - INIT_LIST_HEAD(&group->error_tiler_oom.link); INIT_WORK(&group->timer_event_work, timer_event_worker); kthread_init_work(&group->protm_event_work, protm_event_worker); bitmap_zero(group->protm_pending_bitmap, @@ -1307,7 +1296,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, const u32 compute_count = hweight64(create->in.compute_mask); size_t i; - for (i = 0; i < sizeof(create->in.padding); i++) { + for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { if (create->in.padding[i] != 0) { dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); return -EINVAL; @@ -1316,8 +1305,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, rt_mutex_lock(&kctx->csf.lock); - if ((create->in.tiler_max > tiler_count) || - (create->in.fragment_max > fragment_count) || + if ((create->in.tiler_max > tiler_count) || (create->in.fragment_max > fragment_count) || (create->in.compute_max > compute_count)) { dev_dbg(kctx->kbdev->dev, "Invalid maximum number of endpoints for a queue group"); @@ -1335,8 +1323,7 @@ int kbase_csf_queue_group_create(struct kbase_context *const kctx, dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u", create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK); err = -EINVAL; - } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && - create->in.dvs_buf) { + } else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) && create->in.dvs_buf) { dev_warn( kctx->kbdev->dev, "GPU does not support DVS but userspace is trying to use it"); @@ -1512,8 +1499,6 @@ static void remove_pending_group_fatal_error(struct kbase_queue_group *group) "Remove any pending group fatal error from context %pK\n", (void *)group->kctx); - kbase_csf_event_remove_error(kctx, &group->error_tiler_oom); - kbase_csf_event_remove_error(kctx, &group->error_timeout); kbase_csf_event_remove_error(kctx, &group->error_fatal); } @@ -1681,61 +1666,79 @@ int kbase_csf_ctx_init(struct kbase_context *kctx) kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq", WQ_UNBOUND, 1); - if (unlikely(!kctx->csf.wq)) - goto out; - err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn, - &kctx->csf.pending_submission_worker, "mali_submit"); - if (err) { - dev_err(kctx->kbdev->dev, "error initializing pending submission worker thread"); - goto out_err_submission_kthread; - } + if (likely(kctx->csf.wq)) { + err = kbase_csf_scheduler_context_init(kctx); - err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn, - &kctx->csf.protm_event_worker, "mali_protm_event"); - if (err) { - dev_err(kctx->kbdev->dev, "error initializing protm event worker thread"); - goto out_err_protm_kthread; - } + if (likely(!err)) { + err = kbase_csf_kcpu_queue_context_init(kctx); - err = kbase_csf_scheduler_context_init(kctx); - if (unlikely(err)) - goto out_err_scheduler_context; + if (likely(!err)) { + err = kbase_csf_tiler_heap_context_init(kctx); - err = kbase_csf_kcpu_queue_context_init(kctx); - if (unlikely(err)) - goto out_err_kcpu_queue_context; + if (likely(!err)) { + rt_mutex_init(&kctx->csf.lock); - err = kbase_csf_tiler_heap_context_init(kctx); - if (unlikely(err)) - goto out_err_tiler_heap_context; + err = kbasep_ctx_user_reg_page_mapping_init(kctx); - rt_mutex_init(&kctx->csf.lock); - kthread_init_work(&kctx->csf.pending_submission_work, - pending_submission_worker); + if (likely(!err)) { + err = kbase_create_realtime_thread(kctx->kbdev, kthread_worker_fn, + &kctx->csf.protm_event_worker, "mali_protm_event"); + if (unlikely(err)) { + dev_err(kctx->kbdev->dev, "error initializing protm event worker thread"); + kbasep_ctx_user_reg_page_mapping_term(kctx); + } + } - err = kbasep_ctx_user_reg_page_mapping_init(kctx); - if (unlikely(err)) - goto out_err_user_reg_page_mapping_init; + if (unlikely(err)) + kbase_csf_tiler_heap_context_term(kctx); + } - return err; + if (unlikely(err)) + kbase_csf_kcpu_queue_context_term(kctx); + } + + if (unlikely(err)) + kbase_csf_scheduler_context_term(kctx); + } + + if (unlikely(err)) + destroy_workqueue(kctx->csf.wq); + } -out_err_user_reg_page_mapping_init: - kbase_csf_tiler_heap_context_term(kctx); -out_err_tiler_heap_context: - kbase_csf_kcpu_queue_context_term(kctx); -out_err_kcpu_queue_context: - kbase_csf_scheduler_context_term(kctx); -out_err_scheduler_context: - kbase_destroy_kworker_stack(&kctx->csf.protm_event_worker); -out_err_protm_kthread: - kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker); -out_err_submission_kthread: - destroy_workqueue(kctx->csf.wq); -out: return err; } +void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx, + struct kbase_fault *fault) +{ + struct base_gpu_queue_group_error err_payload = + (struct base_gpu_queue_group_error){ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL, + .payload = { .fatal_group = { + .sideband = fault->addr, + .status = fault->status, + } } }; + struct kbase_device *kbdev = kctx->kbdev; + const u32 num_groups = kbdev->csf.global_iface.group_num; + unsigned long flags; + int csg_nr; + + lockdep_assert_held(&kbdev->hwaccess_lock); + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + + if (!group || (group->kctx != kctx)) + continue; + + group->faulted = true; + kbase_csf_add_group_fatal_error(group, &err_payload); + } + kbase_csf_scheduler_spin_unlock(kbdev, flags); +} + void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, struct kbase_fault *fault) { @@ -1777,6 +1780,9 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) { term_queue_group(group); + /* This would effectively be a NOP if the fatal error was already added to + * the error_list by kbase_csf_ctx_report_page_fault_for_active_groups(). + */ kbase_csf_add_group_fatal_error(group, &err_payload); reported = true; } @@ -1833,8 +1839,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) if (reset_prevented) kbase_reset_gpu_allow(kbdev); - kthread_cancel_work_sync(&kctx->csf.pending_submission_work); - /* Now that all queue groups have been terminated, there can be no * more OoM or timer event interrupts but there can be inflight work * items. Destroying the wq will implicitly flush those work items. @@ -1873,6 +1877,12 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) queue = list_first_entry(&kctx->csf.queue_list, struct kbase_queue, link); + list_del_init(&queue->link); + + rt_mutex_unlock(&kctx->csf.lock); + wait_pending_queue_kick(queue); + rt_mutex_lock(&kctx->csf.lock); + /* The reference held when the IO mapping was created on bind * would have been dropped otherwise the termination of Kbase * context itself wouldn't have kicked-in. So there shall be @@ -1880,15 +1890,13 @@ void kbase_csf_ctx_term(struct kbase_context *kctx) * registered. */ WARN_ON(kbase_refcount_read(&queue->refcount) != 1); - list_del_init(&queue->link); + release_queue(queue); } rt_mutex_unlock(&kctx->csf.lock); - kbase_destroy_kworker_stack(&kctx->csf.pending_submission_worker); kbase_destroy_kworker_stack(&kctx->csf.protm_event_worker); - kbasep_ctx_user_reg_page_mapping_term(kctx); kbase_csf_tiler_heap_context_term(kctx); kbase_csf_kcpu_queue_context_term(kctx); @@ -1992,16 +2000,13 @@ static void report_tiler_oom_error(struct kbase_queue_group *group) } } } }; kbase_csf_event_add_error(group->kctx, - &group->error_tiler_oom, + &group->error_fatal, &error); kbase_event_wakeup_sync(group->kctx); } static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) { - int err; - const unsigned int cache_flush_wait_timeout_ms = 2000; - kbase_pm_lock(kbdev); /* With the advent of partial cache flush, dirty cache lines could * be left in the GPU L2 caches by terminating the queue group here @@ -2011,17 +2016,12 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev) */ if (kbdev->pm.backend.gpu_powered) { kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); - err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms); - - if (err) { + if (kbase_gpu_wait_cache_clean_timeout(kbdev, + kbdev->mmu_or_gpu_cache_op_wait_time_ms)) dev_warn( kbdev->dev, - "[%llu] Timeout waiting for cache clean to complete after fatal error", + "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC to complete after fatal error", kbase_backend_get_cycle_cnt(kbdev)); - - if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) - kbase_reset_gpu(kbdev); - } } kbase_pm_unlock(kbdev); @@ -2153,7 +2153,6 @@ static void oom_event_worker(struct work_struct *data) rt_mutex_lock(&kctx->csf.lock); kbase_queue_oom_event(queue); - release_queue(queue); rt_mutex_unlock(&kctx->csf.lock); kbase_reset_gpu_allow(kbdev); @@ -2180,7 +2179,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group) "Notify the event notification thread, forward progress timeout (%llu cycles)\n", kbase_csf_timeout_get(group->kctx->kbdev)); - kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error); + kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error); kbase_event_wakeup_sync(group->kctx); } @@ -2406,12 +2405,10 @@ handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) && (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) { if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) { - get_queue(queue); queue->cs_error = cs_fault; queue->cs_error_info = cs_fault_info; queue->cs_error_fatal = false; - if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) - release_queue(queue); + queue_work(queue->kctx->csf.wq, &queue->cs_error_work); return; } } @@ -2422,31 +2419,29 @@ handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack) kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true); } -static void report_queue_fatal_error(struct kbase_queue *const queue, - u32 cs_fatal, u64 cs_fatal_info, - u8 group_handle) +static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fatal, + u64 cs_fatal_info, struct kbase_queue_group *group) { - struct base_csf_notification error = { - .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, - .payload = { - .csg_error = { - .handle = group_handle, - .error = { - .error_type = - BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, - .payload = { - .fatal_queue = { - .sideband = cs_fatal_info, - .status = cs_fatal, - .csi_index = queue->csi_index, - } - } - } - } - } - }; + struct base_csf_notification + error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR, + .payload = { + .csg_error = { + .error = { .error_type = + BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL, + .payload = { .fatal_queue = { + .sideband = cs_fatal_info, + .status = cs_fatal, + } } } } } }; + + if (!queue) + return; + + if (WARN_ON_ONCE(!group)) + return; - kbase_csf_event_add_error(queue->kctx, &queue->error, &error); + error.payload.csg_error.handle = group->handle; + error.payload.csg_error.error.payload.fatal_queue.csi_index = queue->csi_index; + kbase_csf_event_add_error(queue->kctx, &group->error_fatal, &error); kbase_event_wakeup_sync(queue->kctx); } @@ -2461,10 +2456,10 @@ static void cs_error_worker(struct work_struct *const data) { struct kbase_queue *const queue = container_of(data, struct kbase_queue, cs_error_work); + const u32 cs_fatal_exception_type = CS_FATAL_EXCEPTION_TYPE_GET(queue->cs_error); struct kbase_context *const kctx = queue->kctx; struct kbase_device *const kbdev = kctx->kbdev; struct kbase_queue_group *group; - u8 group_handle; bool reset_prevented = false; int err; @@ -2511,14 +2506,22 @@ static void cs_error_worker(struct work_struct *const data) } #endif - group_handle = group->handle; term_queue_group(group); flush_gpu_cache_on_fatal_error(kbdev); - report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, - group_handle); + /* For an invalid GPU page fault, CS_BUS_FAULT fatal error is expected after the + * page fault handler disables the AS of faulty context. Need to skip reporting the + * CS_BUS_FAULT fatal error to the Userspace as it doesn't have the full fault info. + * Page fault handler will report the fatal error with full page fault info. + */ + if ((cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_BUS_FAULT) && group->faulted) { + dev_dbg(kbdev->dev, + "Skipped reporting CS_BUS_FAULT for queue %d of group %d of ctx %d_%d", + queue->csi_index, group->handle, kctx->tgid, kctx->id); + } else { + report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info, group); + } unlock: - release_queue(queue); rt_mutex_unlock(&kctx->csf.lock); if (reset_prevented) kbase_reset_gpu_allow(kbdev); @@ -2580,12 +2583,10 @@ handle_fatal_event(struct kbase_queue *const queue, if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu(queue->kctx->kbdev); } - get_queue(queue); queue->cs_error = cs_fatal; queue->cs_error_info = cs_fatal_info; queue->cs_error_fatal = true; - if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work)) - release_queue(queue); + queue_work(queue->kctx->csf.wq, &queue->cs_error_work); } kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack, @@ -2672,7 +2673,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, if (((cs_req & CS_REQ_TILER_OOM_MASK) ^ (cs_ack & CS_ACK_TILER_OOM_MASK))) { - get_queue(queue); KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM, group, queue, cs_req ^ cs_ack); if (!queue_work(wq, &queue->oom_event_work)) { @@ -2686,7 +2686,6 @@ static void process_cs_interrupts(struct kbase_queue_group *const group, "Tiler OOM work pending: queue %d group %d (ctx %d_%d)", queue->csi_index, group->handle, queue->kctx->tgid, queue->kctx->id); - release_queue(queue); } } @@ -2797,17 +2796,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, int const c KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr); - if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { - kbase_csf_firmware_csg_input_mask(ginfo, - CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); - - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); - - /* SYNC_UPDATE events shall invalidate GPU idle event */ - atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); - - kbase_csf_event_signal_cpu_only(group->kctx); - } + kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, req, ack); if ((req ^ ack) & CSG_REQ_IDLE_MASK) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; @@ -3117,13 +3106,16 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) do { unsigned long flags; u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF; - struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX }; bool glb_idle_irq_received = false; kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val); order_job_irq_clear_with_iface_mem_read(); if (csg_interrupts != 0) { + struct irq_idle_and_protm_track track = { .protm_grp = NULL, + .idle_seq = U32_MAX, + .idle_slot = S8_MAX }; + kbase_csf_scheduler_spin_lock(kbdev, &flags); /* Looping through and track the highest idle and protm groups */ while (csg_interrupts != 0) { @@ -3220,6 +3212,24 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val) KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val); } +void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev, + struct kbase_csf_cmd_stream_group_info *ginfo, + struct kbase_queue_group *group, u32 req, u32 ack) +{ + kbase_csf_scheduler_spin_lock_assert_held(kbdev); + + if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) { + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK); + + KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack); + + /* SYNC_UPDATE events shall invalidate GPU idle event */ + atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true); + + kbase_csf_event_signal_cpu_only(group->kctx); + } +} + void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev) { if (kbdev->csf.db_filp) { @@ -3258,6 +3268,28 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev) return 0; } +void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev) +{ + size_t i; + + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) + INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]); + spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock); +} + +void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev) +{ + size_t i; + + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) { + if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i])) + dev_warn(kbdev->dev, + "Some GPU queue kicks for priority %zu were not handled", i); + } + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); +} + void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev) { if (kbdev->csf.user_reg.filp) { @@ -3290,7 +3322,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) } page = as_page(phys); - addr = kmap_atomic(page); + addr = kbase_kmap_atomic(page); /* Write a special value for the latest flush register inside the * dummy page @@ -3299,7 +3331,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev) kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32), DMA_BIDIRECTIONAL); - kunmap_atomic(addr); + kbase_kunmap_atomic(addr); kbdev->csf.user_reg.filp = filp; kbdev->csf.user_reg.dummy_page = phys; @@ -3320,3 +3352,60 @@ u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority) return out_priority; } + +void kbase_csf_process_queue_kick(struct kbase_queue *queue) +{ + struct kbase_context *kctx = queue->kctx; + struct kbase_device *kbdev = kctx->kbdev; + bool retry_kick = false; + int err = kbase_reset_gpu_prevent_and_wait(kbdev); + + if (err) { + dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue"); + goto out_release_queue; + } + + rt_mutex_lock(&kctx->csf.lock); + + if (queue->bind_state != KBASE_CSF_QUEUE_BOUND) + goto out_allow_gpu_reset; + + err = kbase_csf_scheduler_queue_start(queue); + if (unlikely(err)) { + dev_dbg(kbdev->dev, "Failed to start queue"); + if (err == -EBUSY) { + retry_kick = true; + + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + if (list_empty(&queue->pending_kick_link)) { + /* A failed queue kick shall be pushed to the + * back of the queue to avoid potential abuse. + */ + list_add_tail( + &queue->pending_kick_link, + &kbdev->csf.pending_gpuq_kicks[queue->group_priority]); + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + } else { + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + WARN_ON(atomic_read(&queue->pending_kick) == 0); + } + + complete(&kbdev->csf.scheduler.kthread_signal); + } + } + +out_allow_gpu_reset: + if (likely(!retry_kick)) { + WARN_ON(atomic_read(&queue->pending_kick) == 0); + atomic_dec(&queue->pending_kick); + } + + rt_mutex_unlock(&kctx->csf.lock); + + kbase_reset_gpu_allow(kbdev); + + return; +out_release_queue: + WARN_ON(atomic_read(&queue->pending_kick) == 0); + atomic_dec(&queue->pending_kick); +} diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h index 35d0331..29119e1 100644 --- a/mali_kbase/csf/mali_kbase_csf.h +++ b/mali_kbase/csf/mali_kbase_csf.h @@ -49,8 +49,8 @@ #define KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID (U32_MAX) /* 60ms optimizes power while minimizing latency impact for UI test cases. */ -#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US (600) -#define FIRMWARE_IDLE_HYSTERESIS_TIME_USEC (60000) /* Default 60 milliseconds */ +#define MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS (600 * 1000) +#define FIRMWARE_IDLE_HYSTERESIS_TIME_NS (60 * 1000 * 1000) /* Default 60 milliseconds */ /* Idle hysteresis time can be scaled down when GPU sleep feature is used */ #define FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER (5) @@ -78,6 +78,18 @@ void kbase_csf_ctx_handle_fault(struct kbase_context *kctx, struct kbase_fault *fault); /** + * kbase_csf_ctx_report_page_fault_for_active_groups - Notify Userspace about GPU page fault + * for active groups of the faulty context. + * + * @kctx: Pointer to faulty kbase context. + * @fault: Pointer to the fault. + * + * This function notifies the event notification thread of the GPU page fault. + */ +void kbase_csf_ctx_report_page_fault_for_active_groups(struct kbase_context *kctx, + struct kbase_fault *fault); + +/** * kbase_csf_ctx_term - Terminate the CSF interface for a GPU address space. * * @kctx: Pointer to the kbase context which is being terminated. @@ -315,6 +327,19 @@ void kbase_csf_add_group_fatal_error( void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val); /** + * kbase_csf_handle_csg_sync_update - Handle SYNC_UPDATE notification for the group. + * + * @kbdev: The kbase device to handle the SYNC_UPDATE interrupt. + * @ginfo: Pointer to the CSG interface used by the @group + * @group: Pointer to the GPU command queue group. + * @req: CSG_REQ register value corresponding to @group. + * @ack: CSG_ACK register value corresponding to @group. + */ +void kbase_csf_handle_csg_sync_update(struct kbase_device *const kbdev, + struct kbase_csf_cmd_stream_group_info *ginfo, + struct kbase_queue_group *group, u32 req, u32 ack); + +/** * kbase_csf_doorbell_mapping_init - Initialize the fields that facilitates * the update of userspace mapping of HW * doorbell page. @@ -363,6 +388,22 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev); void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev); /** + * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling + * GPU queue kicks. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev); + +/** + * kbase_csf_pending_gpuq_kicks_init - De-initialize the data used for handling + * GPU queue kicks. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + */ +void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev); + +/** * kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface. * * @kbdev: Instance of a GPU platform device that implements a CSF interface. @@ -505,4 +546,17 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev) #endif } +/** + * kbase_csf_process_queue_kick() - Process a pending kicked GPU command queue. + * + * @queue: Pointer to the queue to process. + * + * This function starts the pending queue, for which the work + * was previously submitted via ioctl call from application thread. + * If the queue is already scheduled and resident, it will be started + * right away, otherwise once the group is made resident. + */ +void kbase_csf_process_queue_kick(struct kbase_queue *queue); + + #endif /* _KBASE_CSF_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c index 44221b0..a319a4a 100644 --- a/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_cpu_queue_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -126,30 +126,24 @@ void kbase_csf_cpu_queue_debugfs_init(struct kbase_context *kctx) int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, u64 buffer, size_t buf_size) { - int err = 0; - size_t alloc_size = buf_size; char *dump_buffer; if (!buffer || !alloc_size) - goto done; + return 0; alloc_size = (alloc_size + PAGE_SIZE) & ~(PAGE_SIZE - 1); dump_buffer = kzalloc(alloc_size, GFP_KERNEL); - if (ZERO_OR_NULL_PTR(dump_buffer)) { - err = -ENOMEM; - goto done; - } + if (!dump_buffer) + return -ENOMEM; WARN_ON(kctx->csf.cpu_queue.buffer != NULL); - err = copy_from_user(dump_buffer, + if (copy_from_user(dump_buffer, u64_to_user_ptr(buffer), - buf_size); - if (err) { + buf_size)) { kfree(dump_buffer); - err = -EFAULT; - goto done; + return -EFAULT; } rt_mutex_lock(&kctx->csf.lock); @@ -161,13 +155,12 @@ int kbase_csf_cpu_queue_dump(struct kbase_context *kctx, kctx->csf.cpu_queue.buffer = dump_buffer; kctx->csf.cpu_queue.buffer_size = buf_size; complete_all(&kctx->csf.cpu_queue.dump_cmp); - } else { + } else kfree(dump_buffer); - } rt_mutex_unlock(&kctx->csf.lock); -done: - return err; + + return 0; } #else /* diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c index a45b588..c94e656 100644 --- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -287,7 +287,8 @@ static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file, static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, struct kbase_queue *queue) { - u32 *addr; + u64 *addr; + u32 *addr32; u64 cs_extract; u64 cs_insert; u32 cs_active; @@ -309,12 +310,14 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, !queue->group)) return; - addr = (u32 *)queue->user_io_addr; - cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32); + addr = queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO / sizeof(*addr)]; - addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); - cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32); - cs_active = addr[CS_ACTIVE/4]; + addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr); + cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)]; + + addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(*addr)); + cs_active = addr32[CS_ACTIVE / sizeof(*addr32)]; #define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \ "Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n" @@ -446,22 +449,20 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, group->csg_nr); seq_puts(file, "*** The following group-record is likely stale\n"); } + seq_puts( + file, + "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); + seq_printf( + file, + "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", + group->handle, group->csg_nr, slot_priority, group->run_state, + group->priority, CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), + CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), + CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), + CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), exclusive, idle); - seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); - seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", - group->handle, - group->csg_nr, - slot_priority, - group->run_state, - group->priority, - CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), - CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), - CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), - CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), - CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), - CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), - exclusive, - idle); } else { seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); seq_printf(file, "%7d, %6d, %9d, %8d\n", diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h index cb4e5eb..ef973b7 100644 --- a/mali_kbase/csf/mali_kbase_csf_defs.h +++ b/mali_kbase/csf/mali_kbase_csf_defs.h @@ -265,15 +265,18 @@ enum kbase_queue_group_priority { * @CSF_PM_TIMEOUT: Timeout for GPU Power Management to reach the desired * Shader, L2 and MCU state. * @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete. - * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for all active CSGs to be suspended. + * @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended. * @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot. * @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond * to a ping from KBase. * @CSF_SCHED_PROTM_PROGRESS_TIMEOUT: Timeout used to prevent protected mode execution hang. * @MMU_AS_INACTIVE_WAIT_TIMEOUT: Maximum waiting time in ms for the completion - * of a MMU operation + * of a MMU operation. + * @KCPU_FENCE_SIGNAL_TIMEOUT: Waiting time in ms for triggering a KCPU queue sync state dump * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in * the enum. + * @KBASE_DEFAULT_TIMEOUT: Default timeout used when an invalid selector is passed + * to the pre-computed timeout getter. */ enum kbase_timeout_selector { CSF_FIRMWARE_TIMEOUT, @@ -284,9 +287,11 @@ enum kbase_timeout_selector { CSF_FIRMWARE_PING_TIMEOUT, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, MMU_AS_INACTIVE_WAIT_TIMEOUT, + KCPU_FENCE_SIGNAL_TIMEOUT, /* Must be the last in the enum */ - KBASE_TIMEOUT_SELECTOR_COUNT + KBASE_TIMEOUT_SELECTOR_COUNT, + KBASE_DEFAULT_TIMEOUT = CSF_FIRMWARE_TIMEOUT }; /** @@ -324,6 +329,14 @@ struct kbase_csf_notification { * It is in page units. * @link: Link to the linked list of GPU command queues created per * GPU address space. + * @pending_kick: Indicates whether there is a pending kick to be handled. + * @pending_kick_link: Link to the linked list of GPU command queues that have + * been kicked, but the kick has not yet been processed. + * This link would be deleted right before the kick is + * handled to allow for future kicks to occur in the mean + * time. For this reason, this must not be used to check + * for the presence of a pending queue kick. @pending_kick + * should be used instead. * @refcount: Reference count, stands for the number of times the queue * has been referenced. The reference is taken when it is * created, when it is bound to the group and also when the @@ -336,6 +349,7 @@ struct kbase_csf_notification { * @base_addr: Base address of the CS buffer. * @size: Size of the CS buffer. * @priority: Priority of this queue within the group. + * @group_priority: Priority of the group to which this queue has been bound. * @bind_state: Bind state of the queue as enum @kbase_csf_queue_bind_state * @csi_index: The ID of the assigned CS hardware interface. * @enabled: Indicating whether the CS is running, or not. @@ -363,7 +377,6 @@ struct kbase_csf_notification { * @trace_offset_ptr: Pointer to the CS trace buffer offset variable. * @trace_buffer_size: CS trace buffer size for the queue. * @trace_cfg: CS trace configuration parameters. - * @error: GPU command queue fatal information to pass to user space. * @cs_error_work: Work item to handle the CS fatal event reported for this * queue or the CS fault event if dump on fault is enabled * and acknowledgment for CS fault event needs to be done @@ -373,7 +386,6 @@ struct kbase_csf_notification { * @cs_error: Records information about the CS fatal event or * about CS fault event if dump on fault is enabled. * @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred. - * @pending: Indicating whether the queue has new submitted work. * @extract_ofs: The current EXTRACT offset, this is only updated when handling * the GLB IDLE IRQ if the idle timeout value is non-0 in order * to help detect a queue's true idle status. @@ -386,11 +398,13 @@ struct kbase_queue { struct kbase_context *kctx; u64 user_io_gpu_va; struct tagged_addr phys[2]; - char *user_io_addr; + u64 *user_io_addr; u64 handle; int doorbell_nr; unsigned long db_file_offset; struct list_head link; + atomic_t pending_kick; + struct list_head pending_kick_link; kbase_refcount_t refcount; struct kbase_queue_group *group; struct kbase_va_region *queue_reg; @@ -398,6 +412,7 @@ struct kbase_queue { u64 base_addr; u32 size; u8 priority; + u8 group_priority; s8 csi_index; enum kbase_csf_queue_bind_state bind_state; bool enabled; @@ -410,12 +425,10 @@ struct kbase_queue { u64 trace_offset_ptr; u32 trace_buffer_size; u32 trace_cfg; - struct kbase_csf_notification error; struct work_struct cs_error_work; u64 cs_error_info; u32 cs_error; bool cs_error_fatal; - atomic_t pending; u64 extract_ofs; #if IS_ENABLED(CONFIG_DEBUG_FS) u64 saved_cmd_ptr; @@ -514,10 +527,6 @@ struct kbase_protected_suspend_buffer { * have pending protected mode entry requests. * @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be * returned to userspace if such an error has occurred. - * @error_timeout: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT - * to be returned to userspace if such an error has occurred. - * @error_tiler_oom: An error of type BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM - * to be returned to userspace if such an error has occurred. * @timer_event_work: Work item to handle the progress timeout fatal event * for the group. * @deschedule_deferred_cnt: Counter keeping a track of the number of threads @@ -544,6 +553,7 @@ struct kbase_queue_group { u8 compute_max; u8 csi_handlers; + u64 tiler_mask; u64 fragment_mask; u64 compute_mask; @@ -566,8 +576,6 @@ struct kbase_queue_group { DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP); struct kbase_csf_notification error_fatal; - struct kbase_csf_notification error_timeout; - struct kbase_csf_notification error_tiler_oom; struct work_struct timer_event_work; @@ -582,6 +590,12 @@ struct kbase_queue_group { #endif void *csg_reg; u8 csg_reg_bind_retries; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @prev_act: Previous CSG activity transition in a GPU metrics. + */ + bool prev_act; +#endif }; /** @@ -834,8 +848,6 @@ struct kbase_csf_user_reg_context { * @link: Link to this csf context in the 'runnable_kctxs' list of * the scheduler instance * @sched: Object representing the scheduler's context - * @pending_submission_worker: Worker for the pending submission work item - * @pending_submission_work: Work item to process pending kicked GPU command queues. * @protm_event_worker: Worker to process requests to enter protected mode. * @cpu_queue: CPU queue information. Only be available when DEBUG_FS * is enabled. @@ -855,8 +867,6 @@ struct kbase_csf_context { struct workqueue_struct *wq; struct list_head link; struct kbase_csf_scheduler_context sched; - struct kthread_worker pending_submission_worker; - struct kthread_work pending_submission_work; struct kthread_worker protm_event_worker; #if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_csf_cpu_queue_context cpu_queue; @@ -1004,21 +1014,19 @@ struct kbase_csf_mcu_shared_regions { * "tock" schedule operation concluded. Used for * evaluating the exclusion window for in-cycle * schedule operation. + * @csf_worker: Dedicated kthread_worker to execute the @tick_work. * @timer_enabled: Whether the CSF scheduler wakes itself up for * periodic scheduling tasks. If this value is 0 * then it will only perform scheduling under the * influence of external factors e.g., IRQs, IOCTLs. - * @csf_worker: Dedicated kthread_worker to execute the @tick_work. * @tick_timer: High-resolution timer employed to schedule tick * workqueue items (kernel-provided delayed_work * items do not use hrtimer and for some reason do * not provide sufficiently reliable periodicity). - * @tick_work: Work item that performs the "schedule on tick" - * operation to implement timeslice-based scheduling. - * @tock_work: Work item that would perform the schedule on tock - * operation to implement the asynchronous scheduling. - * @pending_tock_work: Indicates that the tock work item should re-execute - * once it's finished instead of going back to sleep. + * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should perform + * a scheduling tick. + * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should perform + * a scheduling tock. * @ping_work: Work item that would ping the firmware at regular * intervals, only if there is a single active CSG * slot, to check if firmware is alive and would @@ -1064,13 +1072,6 @@ struct kbase_csf_mcu_shared_regions { * after GPU and L2 cache have been powered up. So when * this count is zero, MCU will not be powered up. * @csg_scheduling_period_ms: Duration of Scheduling tick in milliseconds. - * @tick_timer_active: Indicates whether the @tick_timer is effectively - * active or not, as the callback function of - * @tick_timer will enqueue @tick_work only if this - * flag is true. This is mainly useful for the case - * when scheduling tick needs to be advanced from - * interrupt context, without actually deactivating - * the @tick_timer first and then enqueing @tick_work. * @tick_protm_pending_seq: Scan out sequence number of the group that has * protected mode execution pending for the queue(s) * bound to it and will be considered first for the @@ -1097,6 +1098,12 @@ struct kbase_csf_mcu_shared_regions { * @mcu_regs_data: Scheduler MCU shared regions data for managing the * shared interface mappings for on-slot queues and * CSG suspend buffers. + * @kthread_signal: Used to wake up the GPU queue submission + * thread when a queue needs attention. + * @kthread_running: Whether the GPU queue submission thread should keep + * executing. + * @gpuq_kthread: High-priority thread used to handle GPU queue + * submissions. */ struct kbase_csf_scheduler { struct rt_mutex lock; @@ -1118,11 +1125,10 @@ struct kbase_csf_scheduler { DECLARE_BITMAP(csg_slots_idle_mask, MAX_SUPPORTED_CSGS); DECLARE_BITMAP(csg_slots_prio_update, MAX_SUPPORTED_CSGS); unsigned long last_schedule; - bool timer_enabled; struct kthread_worker csf_worker; + atomic_t timer_enabled; struct hrtimer tick_timer; - struct kthread_work tick_work; - struct kthread_delayed_work tock_work; + atomic_t pending_tick_work; atomic_t pending_tock_work; struct delayed_work ping_work; struct kbase_context *top_ctx; @@ -1140,7 +1146,6 @@ struct kbase_csf_scheduler { u32 non_idle_scanout_grps; u32 pm_active_count; unsigned int csg_scheduling_period_ms; - bool tick_timer_active; u32 tick_protm_pending_seq; #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS struct work_struct sc_rails_off_work; @@ -1151,6 +1156,15 @@ struct kbase_csf_scheduler { ktime_t protm_enter_time; struct kbase_csf_sched_heap_reclaim_mgr reclaim_mgr; struct kbase_csf_mcu_shared_regions mcu_regs_data; + struct completion kthread_signal; + bool kthread_running; + struct task_struct *gpuq_kthread; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics_tb: Handler of firmware trace buffer for gpu_metrics + */ + struct firmware_trace_buffer *gpu_metrics_tb; +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ }; /* @@ -1167,9 +1181,9 @@ struct kbase_csf_scheduler { GLB_PROGRESS_TIMER_TIMEOUT_SCALE) /* - * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of micro-seconds. + * Default GLB_PWROFF_TIMER_TIMEOUT value in unit of nanosecond. */ -#define DEFAULT_GLB_PWROFF_TIMEOUT_US (800) +#define DEFAULT_GLB_PWROFF_TIMEOUT_NS (800 * 1000) /* * In typical operations, the management of the shader core power transitions @@ -1389,7 +1403,7 @@ struct kbase_csf_mcu_fw { /* * Firmware log polling period. */ -#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS 25 +#define KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT 25 /** * enum kbase_csf_firmware_log_mode - Firmware log operating mode @@ -1401,10 +1415,16 @@ struct kbase_csf_mcu_fw { * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: Automatic printing mode, firmware log * will be periodically emptied into dmesg, manual reading through debugfs is * disabled. + * + * @KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD: Automatic discarding mode, firmware + * log will be periodically discarded, the remaining log can be read manually by + * the userspace (and it will also be dumped automatically into dmesg on GPU + * reset). */ enum kbase_csf_firmware_log_mode { KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL, - KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT + KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT, + KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD }; /** @@ -1418,6 +1438,7 @@ enum kbase_csf_firmware_log_mode { * @dump_buf: Buffer used for dumping the log. * @func_call_list_va_start: Virtual address of the start of the call list of FW log functions. * @func_call_list_va_end: Virtual address of the end of the call list of FW log functions. + * @poll_period_ms: Firmware log polling period in milliseconds. */ struct kbase_csf_firmware_log { enum kbase_csf_firmware_log_mode mode; @@ -1426,6 +1447,7 @@ struct kbase_csf_firmware_log { u8 *dump_buf; u32 func_call_list_va_start; u32 func_call_list_va_end; + atomic_t poll_period_ms; }; /** @@ -1521,7 +1543,7 @@ struct kbase_csf_user_reg { * image. * @shared_interface: Pointer to the interface object containing info for * the memory area shared between firmware & host. - * @shared_reg_rbtree: RB tree of the memory regions allocated from the + * @mcu_shared_zone: Memory zone tracking memory regions allocated from the * shared interface segment in MCU firmware address * space. * @db_filp: Pointer to a dummy file, that alongwith @@ -1584,22 +1606,28 @@ struct kbase_csf_user_reg { * fatal event. * @coredump_work: Work item for initiating a platform core dump. * @ipa_control: IPA Control component manager. - * @mcu_core_pwroff_dur_us: Sysfs attribute for the glb_pwroff timeout input - * in unit of micro-seconds. The firmware does not use + * @mcu_core_pwroff_dur_ns: Sysfs attribute for the glb_pwroff timeout input + * in unit of nanoseconds. The firmware does not use * it directly. * @mcu_core_pwroff_dur_count: The counterpart of the glb_pwroff timeout input * in interface required format, ready to be used * directly in the firmware. + * @mcu_core_pwroff_dur_count_modifier: Update csffw_glb_req_cfg_pwroff_timer + * to make the shr(10) modifier conditional + * on new flag in GLB_PWROFF_TIMER_CONFIG * @mcu_core_pwroff_reg_shadow: The actual value that has been programed into * the glb_pwoff register. This is separated from * the @p mcu_core_pwroff_dur_count as an update * to the latter is asynchronous. - * @gpu_idle_hysteresis_us: Sysfs attribute for the idle hysteresis time - * window in unit of microseconds. The firmware does not + * @gpu_idle_hysteresis_ns: Sysfs attribute for the idle hysteresis time + * window in unit of nanoseconds. The firmware does not * use it directly. * @gpu_idle_dur_count: The counterpart of the hysteresis time window in * interface required format, ready to be used * directly in the firmware. + * @gpu_idle_dur_count_modifier: Update csffw_glb_req_idle_enable to make the shr(10) + * modifier conditional on the new flag + * in GLB_IDLE_TIMER_CONFIG. * @fw_timeout_ms: Timeout value (in milliseconds) used when waiting * for any request sent to the firmware. * @hwcnt: Contain members required for handling the dump of @@ -1611,6 +1639,12 @@ struct kbase_csf_user_reg { * @dof: Structure for dump on fault. * @user_reg: Collective information to support the mapping to * USER Register page for user processes. + * @pending_gpuq_kicks: Lists of GPU queue that have been kicked but not + * yet processed, categorised by queue group's priority. + * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and + * kbase_queue.pending_kick_link. + * @quirks_ext: Pointer to an allocated buffer containing the firmware + * workarounds configuration. */ struct kbase_csf_device { struct kbase_mmu_table mcu_mmu; @@ -1620,7 +1654,7 @@ struct kbase_csf_device { struct kobject *fw_cfg_kobj; struct kbase_csf_trace_buffers firmware_trace_buffers; void *shared_interface; - struct rb_root shared_reg_rbtree; + struct kbase_reg_zone mcu_shared_zone; struct file *db_filp; u32 db_file_offsets; struct tagged_addr dummy_db_page; @@ -1642,11 +1676,13 @@ struct kbase_csf_device { struct work_struct fw_error_work; struct work_struct coredump_work; struct kbase_ipa_control ipa_control; - u32 mcu_core_pwroff_dur_us; + u32 mcu_core_pwroff_dur_ns; u32 mcu_core_pwroff_dur_count; + u32 mcu_core_pwroff_dur_count_modifier; u32 mcu_core_pwroff_reg_shadow; - u32 gpu_idle_hysteresis_us; + u32 gpu_idle_hysteresis_ns; u32 gpu_idle_dur_count; + u32 gpu_idle_dur_count_modifier; unsigned int fw_timeout_ms; struct kbase_csf_hwcnt hwcnt; struct kbase_csf_mcu_fw fw; @@ -1662,6 +1698,9 @@ struct kbase_csf_device { struct kbase_debug_coresight_device coresight; #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */ struct kbase_csf_user_reg user_reg; + struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT]; + spinlock_t pending_gpuq_kicks_lock; + u32 *quirks_ext; }; /** @@ -1678,10 +1717,6 @@ struct kbase_csf_device { * @bf_data: Data relating to Bus fault. * @gf_data: Data relating to GPU fault. * @current_setup: Stores the MMU configuration for this address space. - * @is_unresponsive: Flag to indicate MMU is not responding. - * Set if a MMU command isn't completed within - * &kbase_device:mmu_as_inactive_wait_time_ms. - * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. */ struct kbase_as { int number; @@ -1693,7 +1728,6 @@ struct kbase_as { struct kbase_fault bf_data; struct kbase_fault gf_data; struct kbase_mmu_setup current_setup; - bool is_unresponsive; }; #endif /* _KBASE_CSF_DEFS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c index a4f561b..22f9aeb 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware.c @@ -52,11 +52,12 @@ #include <mmu/mali_kbase_mmu.h> #include <asm/arch_timer.h> #include <linux/delay.h> +#include <linux/version_compat_defs.h> -#define MALI_MAX_FIRMWARE_NAME_LEN ((size_t)20) +#define MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN ((size_t)20) -static char fw_name[MALI_MAX_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; -module_param_string(fw_name, fw_name, sizeof(fw_name), 0644); +static char default_fw_name[MALI_MAX_DEFAULT_FIRMWARE_NAME_LEN] = "mali_csffw.bin"; +module_param_string(fw_name, default_fw_name, sizeof(default_fw_name), 0644); MODULE_PARM_DESC(fw_name, "firmware image"); /* The waiting time for firmware to boot */ @@ -78,7 +79,6 @@ MODULE_PARM_DESC(fw_debug, "Enables effective use of a debugger for debugging firmware code."); #endif - #define FIRMWARE_HEADER_MAGIC (0xC3F13A6Eul) #define FIRMWARE_HEADER_VERSION_MAJOR (0ul) #define FIRMWARE_HEADER_VERSION_MINOR (3ul) @@ -188,7 +188,7 @@ struct firmware_timeline_metadata { /* The shared interface area, used for communicating with firmware, is managed * like a virtual memory zone. Reserve the virtual space from that zone * corresponding to shared interface entry parsed from the firmware image. - * The shared_reg_rbtree should have been initialized before calling this + * The MCU_SHARED_ZONE should have been initialized before calling this * function. */ static int setup_shared_iface_static_region(struct kbase_device *kbdev) @@ -201,8 +201,7 @@ static int setup_shared_iface_static_region(struct kbase_device *kbdev) if (!interface) return -EINVAL; - reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, - interface->num_pages_aligned, KBASE_REG_ZONE_MCU_SHARED); + reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, interface->num_pages_aligned); if (reg) { mutex_lock(&kbdev->csf.reg_lock); ret = kbase_add_va_region_rbtree(kbdev, reg, @@ -308,7 +307,7 @@ static void boot_csf_firmware(struct kbase_device *kbdev) static int wait_ready(struct kbase_device *kbdev) { const ktime_t wait_loop_start = ktime_get_raw(); - const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms; s64 diff; do { @@ -316,7 +315,8 @@ static int wait_ready(struct kbase_device *kbdev) for (i = 0; i < 1000; i++) { /* Wait for the MMU status to indicate there is no active command */ - if (!(kbase_reg_read(kbdev, MMU_AS_REG(MCU_AS_NR, AS_STATUS)) & + if (!(kbase_reg_read(kbdev, + MMU_STAGE1_REG(MMU_AS_REG(MCU_AS_NR, AS_STATUS))) & AS_STATUS_AS_ACTIVE)) return 0; } @@ -449,7 +449,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, for (page_num = 0; page_num < page_limit; ++page_num) { struct page *const page = as_page(phys[page_num]); - char *const p = kmap_atomic(page); + char *const p = kbase_kmap_atomic(page); u32 const copy_len = min_t(u32, PAGE_SIZE, data_len); if (copy_len > 0) { @@ -466,7 +466,7 @@ static void load_fw_image_section(struct kbase_device *kbdev, const u8 *data, kbase_sync_single_for_device(kbdev, kbase_dma_addr_from_tagged(phys[page_num]), PAGE_SIZE, DMA_TO_DEVICE); - kunmap_atomic(p); + kbase_kunmap_atomic(p); } } @@ -533,6 +533,7 @@ out: * within the 2MB pages aligned allocation. * @is_small_page: This is an output flag used to select between the small and large page * to be used for the FW entry allocation. + * @force_small_page: Use 4kB pages to allocate memory needed for FW loading * * Go through all the already initialized interfaces and find if a previously * allocated large page can be used to store contents of new FW interface entry. @@ -544,7 +545,7 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, const u32 flags, struct tagged_addr **phys, struct protected_memory_allocation ***pma, u32 num_pages, u32 *num_pages_aligned, - bool *is_small_page) + bool *is_small_page, bool force_small_page) { struct kbase_csf_firmware_interface *interface = NULL; struct kbase_csf_firmware_interface *target_interface = NULL; @@ -560,6 +561,8 @@ static inline bool entry_find_large_page_to_reuse(struct kbase_device *kbdev, *phys = NULL; *pma = NULL; + if (force_small_page) + goto out; /* If the section starts at 2MB aligned boundary, * then use 2MB page(s) for it. @@ -653,7 +656,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, struct protected_memory_allocation **pma = NULL; bool reuse_pages = false; bool is_small_page = true; - bool ignore_page_migration = true; + bool force_small_page = false; if (data_end < data_start) { dev_err(kbdev->dev, "Firmware corrupt, data_end < data_start (0x%x<0x%x)\n", @@ -696,16 +699,15 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, num_pages = (virtual_end - virtual_start) >> PAGE_SHIFT; - if(!protected_mode) { - reuse_pages = entry_find_large_page_to_reuse( - kbdev, virtual_start, virtual_end, flags, &phys, &pma, - num_pages, &num_pages_aligned, &is_small_page); - } - else { - num_pages_aligned = num_pages; + if(protected_mode) { + force_small_page = true; dev_warn(kbdev->dev, "Protected memory allocation requested for %u bytes (%u pages), serving with small pages and tight allocation.", (virtual_end - virtual_start), num_pages); } +retry_alloc: + reuse_pages = entry_find_large_page_to_reuse(kbdev, virtual_start, virtual_end, flags, + &phys, &pma, num_pages, &num_pages_aligned, + &is_small_page, force_small_page); if (!reuse_pages) phys = kmalloc_array(num_pages_aligned, sizeof(*phys), GFP_KERNEL); @@ -716,16 +718,18 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, if (!reuse_pages) { pma = kbase_csf_protected_memory_alloc( kbdev, phys, num_pages_aligned, is_small_page); - } - - if (!pma) { - /* If we can't allocate sufficient memory for FW - bail out and leave protected execution unsupported by termintating the allocator. */ - dev_warn(kbdev->dev, - "Protected memory allocation failed during FW initialization - Firmware protected mode entry will not be supported"); - kbase_csf_protected_memory_term(kbdev); - kbdev->csf.pma_dev = NULL; - kfree(phys); - return 0; + if (!pma) { + /* If we can't allocate sufficient memory for FW - bail out and leave protected execution unsupported by termintating the allocator. */ + dev_warn(kbdev->dev, + "Protected memory allocation failed during FW initialization - Firmware protected mode entry will not be supported"); + kbase_csf_protected_memory_term(kbdev); + kbdev->csf.pma_dev = NULL; + kfree(phys); + return 0; + } + } else if (WARN_ON(!pma)) { + ret = -EINVAL; + goto out; } } else { if (!reuse_pages) { @@ -733,14 +737,22 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, kbase_mem_pool_group_select(kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page), num_pages_aligned, phys, false, NULL); - ignore_page_migration = false; } } if (ret < 0) { - dev_err(kbdev->dev, - "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x\n", - num_pages_aligned, virtual_start); + dev_warn( + kbdev->dev, + "Failed to allocate %u physical pages for the firmware interface entry at VA 0x%x using %s ", + num_pages_aligned, virtual_start, + is_small_page ? "small pages" : "large page"); + WARN_ON(reuse_pages); + if (!is_small_page) { + dev_warn(kbdev->dev, "Retrying by using small pages"); + force_small_page = true; + kfree(phys); + goto retry_alloc; + } goto out; } @@ -843,8 +855,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev, ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, virtual_start >> PAGE_SHIFT, phys, num_pages_aligned, mem_flags, - KBASE_MEM_GROUP_CSF_FW, NULL, NULL, - ignore_page_migration); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL); if (ret != 0) { dev_err(kbdev->dev, "Failed to insert firmware pages\n"); @@ -1316,7 +1327,7 @@ static inline void access_firmware_memory_common(struct kbase_device *kbdev, u32 page_num = offset_bytes >> PAGE_SHIFT; u32 offset_in_page = offset_bytes & ~PAGE_MASK; struct page *target_page = as_page(interface->phys[page_num]); - uintptr_t cpu_addr = (uintptr_t)kmap_atomic(target_page); + uintptr_t cpu_addr = (uintptr_t)kbase_kmap_atomic(target_page); u32 *addr = (u32 *)(cpu_addr + offset_in_page); if (read) { @@ -1331,7 +1342,7 @@ static inline void access_firmware_memory_common(struct kbase_device *kbdev, sizeof(u32), DMA_BIDIRECTIONAL); } - kunmap_atomic((u32 *)cpu_addr); + kbase_kunmap_atomic((u32 *)cpu_addr); } static inline void access_firmware_memory(struct kbase_device *kbdev, @@ -1713,6 +1724,11 @@ static void enable_shader_poweroff_timer(struct kbase_device *const kbdev, kbase_csf_firmware_global_input(global_iface, GLB_PWROFF_TIMER, pwroff_reg); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_PWROFF_TIMER_CONFIG, + kbdev->csf.mcu_core_pwroff_dur_count_modifier, + GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK); + set_global_request(global_iface, GLB_REQ_CFG_PWROFF_TIMER_MASK); /* Save the programed reg value in its shadow field */ @@ -1739,6 +1755,11 @@ static void enable_gpu_idle_timer(struct kbase_device *const kbdev) kbase_csf_firmware_global_input(global_iface, GLB_IDLE_TIMER, kbdev->csf.gpu_idle_dur_count); + + kbase_csf_firmware_global_input_mask(global_iface, GLB_IDLE_TIMER_CONFIG, + kbdev->csf.gpu_idle_dur_count_modifier, + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK); + kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, GLB_REQ_REQ_IDLE_ENABLE, GLB_REQ_IDLE_ENABLE_MASK); dev_dbg(kbdev->dev, "Enabling GPU idle timer with count-value: 0x%.8x", @@ -2000,6 +2021,10 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work) return; #endif + err = kbase_csf_firmware_cfg_fw_wa_enable(kbdev); + if (WARN_ON(err)) + return; + /* Reboot the firmware */ kbase_csf_firmware_enable_mcu(kbdev); } @@ -2042,13 +2067,13 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_us) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ns, u32 *modifier) { #define MICROSECONDS_PER_SECOND 1000000u #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_us; + u64 dur_val = dur_ns; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; @@ -2066,21 +2091,24 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_u "Can't get the timestamp frequency, use cycle counter format with firmware idle hysteresis!"); } - /* Formula for dur_val = ((dur_us/1000000) * freq_HZ) >> 10) */ - dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000000); + /* Formula for dur_val = (dur/1e9) * freq_HZ) */ + dur_val = dur_val * freq; + dur_val = div_u64(dur_val, NSEC_PER_SEC); + if (dur_val < S32_MAX) { + *modifier = 1; + } else { + dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT; + *modifier = 0; + } /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; reg_val_u32 = GLB_IDLE_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ - if (src_system_timestamp) - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); - else - reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER); + reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET( + reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : + GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER)); return reg_val_u32; } @@ -2091,19 +2119,21 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_us; + dur = kbdev->csf.gpu_idle_hysteresis_ns; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; } -u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) +u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur_ns) { unsigned long flags; + u32 modifier = 0; + #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS - const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US); + const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS, &modifier); #else - const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur_ns, &modifier); #endif /* The 'fw_load_lock' is taken to synchronize against the deferred @@ -2112,19 +2142,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); goto end; } mutex_unlock(&kbdev->fw_load_lock); + if (kbase_reset_gpu_prevent_and_wait(kbdev)) { + dev_warn(kbdev->dev, + "Failed to prevent GPU reset when updating idle_hysteresis_time"); + return kbdev->csf.gpu_idle_dur_count; + } + kbase_csf_scheduler_pm_active(kbdev); - if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { + if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) { dev_err(kbdev->dev, "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); kbase_csf_scheduler_pm_idle(kbdev); + kbase_reset_gpu_allow(kbdev); + return kbdev->csf.gpu_idle_dur_count; } @@ -2153,8 +2192,9 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, wait_for_global_request(kbdev, GLB_REQ_IDLE_DISABLE_MASK); kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); @@ -2164,8 +2204,9 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, * enabled */ kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur_ns; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_scheduler_spin_unlock(kbdev, flags); } kbase_csf_scheduler_unlock(kbdev); @@ -2173,11 +2214,11 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_unlock(&kbdev->csf.reg_lock); #endif - dev_dbg(kbdev->dev, "GPU suspend timeout updated: %i us (0x%.8x)", - kbdev->csf.gpu_idle_hysteresis_us, + dev_dbg(kbdev->dev, "GPU suspend timeout updated: %i ns (0x%.8x)", + kbdev->csf.gpu_idle_hysteresis_ns, kbdev->csf.gpu_idle_dur_count); kbase_csf_scheduler_pm_idle(kbdev); - + kbase_reset_gpu_allow(kbdev); end: dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", hysteresis_val); @@ -2185,14 +2226,18 @@ end: return hysteresis_val; } -static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_ns, + u32 *modifier) { /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); - u64 dur_val = dur_us; + u64 dur_val = dur_ns; u32 cnt_val_u32, reg_val_u32; bool src_system_timestamp = freq > 0; + const struct kbase_pm_policy *current_policy = kbase_pm_get_policy(kbdev); + bool always_on = current_policy == &kbase_pm_always_on_policy_ops; + if (!src_system_timestamp) { /* Get the cycle_counter source alternative */ spin_lock(&kbdev->pm.clk_rtm.lock); @@ -2207,21 +2252,32 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 "Can't get the timestamp frequency, use cycle counter with MCU shader Core Poweroff timer!"); } - /* Formula for dur_val = ((dur_us/1e6) * freq_HZ) >> 10) */ - dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; - dur_val = div_u64(dur_val, 1000000); + /* Formula for dur_val = (dur/1e9) * freq_HZ) */ + dur_val = dur_val * freq; + dur_val = div_u64(dur_val, NSEC_PER_SEC); + if (dur_val < S32_MAX) { + *modifier = 1; + } else { + dur_val = dur_val >> HYSTERESIS_VAL_UNIT_SHIFT; + *modifier = 0; + } - /* Interface limits the value field to S32_MAX */ - cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; + if (dur_val == 0 && !always_on) { + /* Lower Bound - as 0 disables timeout and host controls shader-core power management. */ + cnt_val_u32 = 1; + } else if (dur_val > S32_MAX) { + /* Upper Bound - as interface limits the field to S32_MAX */ + cnt_val_u32 = S32_MAX; + } else { + cnt_val_u32 = (u32)dur_val; + } reg_val_u32 = GLB_PWROFF_TIMER_TIMEOUT_SET(0, cnt_val_u32); /* add the source flag */ - if (src_system_timestamp) - reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP); - else - reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET(reg_val_u32, - GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER); + reg_val_u32 = GLB_PWROFF_TIMER_TIMER_SOURCE_SET( + reg_val_u32, + (src_system_timestamp ? GLB_PWROFF_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP : + GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER)); return reg_val_u32; } @@ -2232,20 +2288,23 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + pwroff = kbdev->csf.mcu_core_pwroff_dur_ns; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return pwroff; } -u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) +u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur_ns) { unsigned long flags; - const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); + u32 modifier = 0; + + const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur_ns, &modifier); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->csf.mcu_core_pwroff_dur_us = dur; + kbdev->csf.mcu_core_pwroff_dur_ns = dur_ns; kbdev->csf.mcu_core_pwroff_dur_count = pwroff; + kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); @@ -2253,6 +2312,11 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 return pwroff; } +u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev) +{ + return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS); +} + /** * kbase_device_csf_iterator_trace_init - Send request to enable iterator * trace port. @@ -2264,19 +2328,25 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 static int kbase_device_csf_iterator_trace_init(struct kbase_device *kbdev) { /* Enable the iterator trace port if supported by the GPU. - * It requires the GPU to have a nonzero "iter_trace_enable" + * It requires the GPU to have a nonzero "iter-trace-enable" * property in the device tree, and the FW must advertise * this feature in GLB_FEATURES. */ if (kbdev->pm.backend.gpu_powered) { - /* check device tree for iterator trace enable property */ + /* check device tree for iterator trace enable property + * and fallback to "iter_trace_enable" if it is not found + */ const void *iter_trace_param = of_get_property( kbdev->dev->of_node, - "iter_trace_enable", NULL); + "iter-trace-enable", NULL); const struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface; + if (!iter_trace_param) + iter_trace_param = + of_get_property(kbdev->dev->of_node, "iter_trace_enable", NULL); + if (iter_trace_param) { u32 iter_trace_value = be32_to_cpup(iter_trace_param); @@ -2324,6 +2394,8 @@ static void coredump_worker(struct work_struct *data) int kbase_csf_firmware_early_init(struct kbase_device *kbdev) { + u32 modifier = 0; + init_waitqueue_head(&kbdev->csf.event_wait); kbdev->csf.interrupt_received = false; @@ -2336,11 +2408,13 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) */ kbdev->csf.mcu_core_pwroff_dur_count = 1; #else - kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; + kbdev->csf.mcu_core_pwroff_dur_ns = DEFAULT_GLB_PWROFF_TIMEOUT_NS; kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( - kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); + kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS, &modifier); + kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier; #endif + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_timeline_metadata); @@ -2352,6 +2426,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->csf.coredump_work, coredump_worker); mutex_init(&kbdev->csf.reg_lock); + kbase_csf_pending_gpuq_kicks_init(kbdev); kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL }; @@ -2360,21 +2435,25 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { + kbase_csf_pending_gpuq_kicks_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; + u32 modifier = 0; + + kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS; + #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns); #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( - kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_US); + kbdev, MALI_HOST_CONTROLS_SC_RAILS_IDLE_TIMER_NS, &modifier); /* Set to the lowest possible value for FW to immediately write * to the power off register to disable the cores. @@ -2382,10 +2461,12 @@ int kbase_csf_firmware_late_init(struct kbase_device *kbdev) kbdev->csf.mcu_core_pwroff_dur_count = 1; #else kbdev->csf.gpu_idle_dur_count = convert_dur_to_idle_count( - kbdev, kbdev->csf.gpu_idle_hysteresis_us); - kbdev->csf.mcu_core_pwroff_dur_us = DEFAULT_GLB_PWROFF_TIMEOUT_US; + kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier); + kbdev->csf.gpu_idle_dur_count_modifier = modifier; + kbdev->csf.mcu_core_pwroff_dur_ns = DEFAULT_GLB_PWROFF_TIMEOUT_NS; kbdev->csf.mcu_core_pwroff_dur_count = convert_dur_to_core_pwroff_count( - kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_US); + kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS, &modifier); + kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier; #endif return 0; @@ -2401,6 +2482,7 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) u32 entry_end_offset; u32 entry_offset; int ret; + const char *fw_name = default_fw_name; lockdep_assert_held(&kbdev->fw_load_lock); @@ -2424,6 +2506,33 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) goto err_out; } +#if IS_ENABLED(CONFIG_OF) + /* If we can't read CSF firmware name from DTB, + * fw_name is not modified and remains the default. + */ + ret = of_property_read_string(kbdev->dev->of_node, "firmware-name", &fw_name); + if (ret == -EINVAL) { + /* Property doesn't exist in DTB, and fw_name already points to default FW name + * so just reset return value and continue. + */ + ret = 0; + } else if (ret == -ENODATA) { + dev_warn(kbdev->dev, + "\"firmware-name\" DTB property contains no data, using default FW name"); + /* Reset return value so FW does not fail to load */ + ret = 0; + } else if (ret == -EILSEQ) { + /* This is reached when the size of the fw_name buffer is too small for the string + * stored in the DTB and the null terminator. + */ + dev_warn(kbdev->dev, + "\"firmware-name\" DTB property value too long, using default FW name."); + /* Reset return value so FW does not fail to load */ + ret = 0; + } + +#endif /* IS_ENABLED(CONFIG_OF) */ + if (request_firmware(&firmware, fw_name, kbdev->dev) != 0) { dev_err(kbdev->dev, "Failed to load firmware image '%s'\n", @@ -2534,6 +2643,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) } #endif + ret = kbase_csf_firmware_cfg_fw_wa_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize firmware workarounds"); + goto err_out; + } + /* Make sure L2 cache is powered up */ kbase_pm_wait_for_l2_powered(kbdev); @@ -2568,6 +2683,12 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) if (ret != 0) goto err_out; + ret = kbase_csf_firmware_log_init(kbdev); + if (ret != 0) { + dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); + goto err_out; + } + ret = kbase_csf_firmware_cfg_init(kbdev); if (ret != 0) goto err_out; @@ -2576,12 +2697,6 @@ int kbase_csf_firmware_load_init(struct kbase_device *kbdev) if (ret != 0) goto err_out; - ret = kbase_csf_firmware_log_init(kbdev); - if (ret != 0) { - dev_err(kbdev->dev, "Failed to initialize FW trace (err %d)", ret); - goto err_out; - } - if (kbdev->csf.fw_core_dump.available) kbase_csf_firmware_core_dump_init(kbdev); @@ -2607,10 +2722,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) WARN(ret, "failed to wait for GPU reset"); - kbase_csf_firmware_log_term(kbdev); - kbase_csf_firmware_cfg_term(kbdev); + kbase_csf_firmware_log_term(kbdev); + kbase_csf_timeout_term(kbdev); kbase_csf_free_dummy_user_reg_page(kbdev); @@ -2638,6 +2753,8 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) unload_mmu_tables(kbdev); + kbase_csf_firmware_cfg_fw_wa_term(kbdev); + kbase_csf_firmware_trace_buffers_term(kbdev); while (!list_empty(&kbdev->csf.firmware_interfaces)) { @@ -3014,7 +3131,9 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) /* Ensure GPU is powered-up until we complete config update.*/ kbase_csf_scheduler_pm_active(kbdev); - kbase_csf_scheduler_wait_mcu_active(kbdev); + err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev); + if (err) + goto exit; /* The 'reg_lock' is also taken and is held till the update is * complete, to ensure the config update gets serialized. @@ -3031,6 +3150,7 @@ int kbase_csf_trigger_firmware_config_update(struct kbase_device *kbdev) GLB_REQ_FIRMWARE_CONFIG_UPDATE_MASK); mutex_unlock(&kbdev->csf.reg_lock); +exit: kbase_csf_scheduler_pm_idle(kbdev); return err; } @@ -3176,8 +3296,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, - KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages); if (!va_reg) goto va_region_alloc_error; @@ -3193,7 +3312,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL); if (ret) goto mmu_insert_pages_error; diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h index 9e85c1d..d8ed8d6 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -56,7 +56,7 @@ #define CSF_NUM_DOORBELL ((u8)24) /* Offset to the first HW doorbell page */ -#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)0x80000) +#define CSF_HW_DOORBELL_PAGE_OFFSET ((u32)DOORBELLS_BASE) /* Size of HW Doorbell page, used to calculate the offset to subsequent pages */ #define CSF_HW_DOORBELL_PAGE_SIZE ((u32)0x10000) @@ -870,6 +870,22 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev); u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur); /** + * kbase_csf_firmware_reset_mcu_core_pwroff_time - Reset the MCU shader Core power-off + * time value + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * Sets the MCU Shader Core power-off time value to the default. + * + * The configured MCU shader Core power-off timer will only have effect when the host + * driver has delegated the shader cores' power management to MCU. + * + * Return: the actual internal core power-off timer value in register defined + * format. + */ +u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev); + +/** * kbase_csf_interface_version - Helper function to build the full firmware * interface version in a format compatible with * GLB_VERSION register diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c index 13a816b..48ddbb5 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,6 +35,7 @@ #define HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME "Host controls SC rails" #endif +#define CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME "WA_CFG0" /** * struct firmware_config - Configuration item within the MCU firmware @@ -117,7 +118,7 @@ static ssize_t show_fw_cfg(struct kobject *kobj, return -EINVAL; } - return snprintf(buf, PAGE_SIZE, "%u\n", val); + return scnprintf(buf, PAGE_SIZE, "%u\n", val); } static ssize_t store_fw_cfg(struct kobject *kobj, @@ -150,6 +151,9 @@ static ssize_t store_fw_cfg(struct kobject *kobj, HOST_CONTROLS_SC_RAILS_CFG_ENTRY_NAME)) return -EPERM; #endif + if (!strcmp(config->name, + CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME)) + return -EPERM; if ((val < config->min) || (val > config->max)) return -EINVAL; @@ -275,6 +279,19 @@ int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) kbase_csf_read_firmware_memory(kbdev, config->address, &config->cur_val); + if (!strcmp(config->name, CSF_FIRMWARE_CFG_LOG_VERBOSITY_ENTRY_NAME) && + (config->cur_val)) { + err = kbase_csf_firmware_log_toggle_logging_calls(config->kbdev, + config->cur_val); + + if (err) { + kobject_put(&config->kobj); + dev_err(kbdev->dev, "Failed to enable logging (result: %d)", err); + return err; + } + } + + err = kobject_init_and_add(&config->kobj, &fw_cfg_kobj_type, kbdev->csf.fw_cfg_kobj, "%s", config->name); if (err) { @@ -361,6 +378,25 @@ int kbase_csf_firmware_cfg_find_config_address(struct kbase_device *kbdev, const return -ENOENT; } +int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev) +{ + struct firmware_config *config; + + /* "quirks_ext" property is optional */ + if (!kbdev->csf.quirks_ext) + return 0; + + list_for_each_entry(config, &kbdev->csf.firmware_config, node) { + if (strcmp(config->name, CSF_FIRMWARE_CFG_WA_CFG0_ENTRY_NAME)) + continue; + dev_info(kbdev->dev, "External quirks 0: 0x%08x", kbdev->csf.quirks_ext[0]); + kbase_csf_update_firmware_memory(kbdev, config->address, kbdev->csf.quirks_ext[0]); + return 0; + } + + return -ENOENT; +} + #ifdef CONFIG_MALI_HOST_CONTROLS_SC_RAILS int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev) { @@ -379,6 +415,54 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev) } #endif +int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev) +{ + int ret; + int entry_count; + size_t entry_bytes; + + /* "quirks-ext" property is optional and may have no value. + * Also try fallback "quirks_ext" property if it doesn't exist. + */ + entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext"); + + if (entry_count == -EINVAL) + entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext"); + + if (entry_count == -EINVAL || entry_count == -ENODATA) + return 0; + + entry_bytes = entry_count * sizeof(u32); + kbdev->csf.quirks_ext = kzalloc(entry_bytes, GFP_KERNEL); + if (!kbdev->csf.quirks_ext) + return -ENOMEM; + + ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks-ext", kbdev->csf.quirks_ext, + entry_count); + + if (ret == -EINVAL) + ret = of_property_read_u32_array(kbdev->dev->of_node, "quirks_ext", + kbdev->csf.quirks_ext, entry_count); + + if (ret == -EINVAL || ret == -ENODATA) { + /* This is unexpected since the property is already accessed for counting the number + * of its elements. + */ + dev_err(kbdev->dev, "\"quirks_ext\" DTB property data read failed"); + return ret; + } + if (ret == -EOVERFLOW) { + dev_err(kbdev->dev, "\"quirks_ext\" DTB property data size exceeds 32 bits"); + return ret; + } + + return kbase_csf_firmware_cfg_fw_wa_enable(kbdev); +} + +void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev) +{ + kfree(kbdev->csf.quirks_ext); +} #else int kbase_csf_firmware_cfg_init(struct kbase_device *kbdev) @@ -404,4 +488,15 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev) return 0; } #endif + +int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev) +{ + return 0; +} + +int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h index bf99c46..f565290 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h +++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -97,5 +97,37 @@ int kbase_csf_firmware_cfg_enable_host_ctrl_sc_rails(struct kbase_device *kbdev) */ int kbase_csf_firmware_cfg_find_config_address(struct kbase_device *kbdev, const char *name, u32 *addr); +/** + * kbase_csf_firmware_cfg_fw_wa_enable() - Enable firmware workarounds configuration. + * + * @kbdev: Kbase device structure + * + * Look for the config entry that enables support in FW for workarounds and set it according to + * the firmware workaround configuration before the initial boot or reload of firmware. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_cfg_fw_wa_enable(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_cfg_fw_wa_init() - Initialize firmware workarounds configuration. + * + * @kbdev: Kbase device structure + * + * Retrieve and save the firmware workarounds configuration from device-tree "quirks_ext" property. + * Then, look for the config entry that enables support in FW for workarounds and set it according + * to the configuration before the initial firmware boot. + * + * Return: 0 if successful, negative error code on failure + */ +int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev); + +/** + * kbase_csf_firmware_cfg_fw_wa_term - Delete local cache for firmware workarounds configuration. + * + * @kbdev: Pointer to the Kbase device + * + */ +void kbase_csf_firmware_cfg_fw_wa_term(struct kbase_device *kbdev); #endif /* _KBASE_CSF_FIRMWARE_CFG_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c index ce8e4af..493e1c8 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_core_dump.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,6 +25,7 @@ #include <linux/file.h> #include <linux/elf.h> #include <linux/elfcore.h> +#include <linux/version_compat_defs.h> #include "mali_kbase.h" #include "mali_kbase_csf_firmware_core_dump.h" @@ -507,7 +508,7 @@ static int fw_core_dump_create(struct kbase_device *kbdev) /* Ensure MCU is active before requesting the core dump. */ kbase_csf_scheduler_pm_active(kbdev); - err = kbase_csf_scheduler_wait_mcu_active(kbdev); + err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev); if (!err) err = kbase_csf_firmware_req_core_dump(kbdev); @@ -666,9 +667,9 @@ static int fw_core_dump_seq_show(struct seq_file *m, void *v) /* Write the current page. */ page = as_page(data->interface->phys[data->page_num]); - p = kmap_atomic(page); + p = kbase_kmap_atomic(page); seq_write(m, p, FW_PAGE_SIZE); - kunmap_atomic(p); + kbase_kunmap_atomic(p); return 0; } diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_log.c b/mali_kbase/csf/mali_kbase_csf_firmware_log.c index 77d3b1e..89df839 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_log.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_log.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -55,7 +55,7 @@ static int kbase_csf_firmware_log_enable_mask_read(void *data, u64 *val) { struct kbase_device *kbdev = (struct kbase_device *)data; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); if (tb == NULL) { dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); @@ -70,7 +70,7 @@ static int kbase_csf_firmware_log_enable_mask_write(void *data, u64 val) { struct kbase_device *kbdev = (struct kbase_device *)data; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); u64 new_mask; unsigned int enable_bits_count; @@ -115,7 +115,7 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us int ret; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); if (tb == NULL) { dev_err(kbdev->dev, "Couldn't get the firmware trace buffer"); @@ -125,8 +125,9 @@ static ssize_t kbasep_csf_firmware_log_debugfs_read(struct file *file, char __us if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) return -EBUSY; - /* Reading from userspace is only allowed in manual mode */ - if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL) { + /* Reading from userspace is only allowed in manual mode or auto-discard mode */ + if (fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL && + fw_log->mode != KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD) { ret = -EINVAL; goto out; } @@ -176,8 +177,9 @@ static int kbase_csf_firmware_log_mode_write(void *data, u64 val) cancel_delayed_work_sync(&fw_log->poll_work); break; case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT: + case KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD: schedule_delayed_work(&fw_log->poll_work, - msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms))); break; default: ret = -EINVAL; @@ -191,6 +193,24 @@ out: return ret; } +static int kbase_csf_firmware_log_poll_period_read(void *data, u64 *val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + *val = atomic_read(&fw_log->poll_period_ms); + return 0; +} + +static int kbase_csf_firmware_log_poll_period_write(void *data, u64 val) +{ + struct kbase_device *kbdev = (struct kbase_device *)data; + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + + atomic_set(&fw_log->poll_period_ms, val); + return 0; +} + DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_enable_mask_fops, kbase_csf_firmware_log_enable_mask_read, kbase_csf_firmware_log_enable_mask_write, "%llx\n"); @@ -204,56 +224,135 @@ static const struct file_operations kbasep_csf_firmware_log_debugfs_fops = { DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_mode_fops, kbase_csf_firmware_log_mode_read, kbase_csf_firmware_log_mode_write, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(kbase_csf_firmware_log_poll_period_fops, + kbase_csf_firmware_log_poll_period_read, + kbase_csf_firmware_log_poll_period_write, "%llu\n"); #endif /* CONFIG_DEBUG_FS */ +static void kbase_csf_firmware_log_discard_buffer(struct kbase_device *kbdev) +{ + struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + struct firmware_trace_buffer *tb = + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); + + if (tb == NULL) { + dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware log discard skipped"); + return; + } + + if (atomic_cmpxchg(&fw_log->busy, 0, 1) != 0) + return; + + kbase_csf_firmware_trace_buffer_discard(tb); + + atomic_set(&fw_log->busy, 0); +} + static void kbase_csf_firmware_log_poll(struct work_struct *work) { struct kbase_device *kbdev = container_of(work, struct kbase_device, csf.fw_log.poll_work.work); struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; - schedule_delayed_work(&fw_log->poll_work, - msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS)); + if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT) + kbase_csf_firmware_log_dump_buffer(kbdev); + else if (fw_log->mode == KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD) + kbase_csf_firmware_log_discard_buffer(kbdev); + else + return; - kbase_csf_firmware_log_dump_buffer(kbdev); + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(atomic_read(&fw_log->poll_period_ms))); } int kbase_csf_firmware_log_init(struct kbase_device *kbdev) { struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; + int err = 0; +#if defined(CONFIG_DEBUG_FS) + struct dentry *dentry; +#endif /* CONFIG_DEBUG_FS */ /* Add one byte for null-termination */ fw_log->dump_buf = kmalloc(FIRMWARE_LOG_DUMP_BUF_SIZE + 1, GFP_KERNEL); - if (fw_log->dump_buf == NULL) - return -ENOMEM; + if (fw_log->dump_buf == NULL) { + err = -ENOMEM; + goto out; + } /* Ensure null-termination for all strings */ fw_log->dump_buf[FIRMWARE_LOG_DUMP_BUF_SIZE] = 0; + /* Set default log polling period */ + atomic_set(&fw_log->poll_period_ms, KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT); + + INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); +#ifdef CONFIG_MALI_FW_TRACE_MODE_AUTO_DISCARD + fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_DISCARD; + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT)); +#elif defined(CONFIG_MALI_FW_TRACE_MODE_AUTO_PRINT) + fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_AUTO_PRINT; + schedule_delayed_work(&fw_log->poll_work, + msecs_to_jiffies(KBASE_CSF_FIRMWARE_LOG_POLL_PERIOD_MS_DEFAULT)); +#else /* CONFIG_MALI_FW_TRACE_MODE_MANUAL */ fw_log->mode = KBASE_CSF_FIRMWARE_LOG_MODE_MANUAL; +#endif atomic_set(&fw_log->busy, 0); - INIT_DEFERRABLE_WORK(&fw_log->poll_work, kbase_csf_firmware_log_poll); -#if defined(CONFIG_DEBUG_FS) - debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, kbdev, - &kbase_csf_firmware_log_enable_mask_fops); - debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, - &kbasep_csf_firmware_log_debugfs_fops); - debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, - &kbase_csf_firmware_log_mode_fops); -#endif /* CONFIG_DEBUG_FS */ +#if !defined(CONFIG_DEBUG_FS) + return 0; +#else /* !CONFIG_DEBUG_FS */ + dentry = debugfs_create_file("fw_trace_enable_mask", 0644, kbdev->mali_debugfs_directory, + kbdev, &kbase_csf_firmware_log_enable_mask_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_trace_enable_mask\n"); + err = -ENOENT; + goto free_out; + } + dentry = debugfs_create_file("fw_traces", 0444, kbdev->mali_debugfs_directory, kbdev, + &kbasep_csf_firmware_log_debugfs_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_traces\n"); + err = -ENOENT; + goto free_out; + } + dentry = debugfs_create_file("fw_trace_mode", 0644, kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_firmware_log_mode_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_trace_mode\n"); + err = -ENOENT; + goto free_out; + } + dentry = debugfs_create_file("fw_trace_poll_period_ms", 0644, kbdev->mali_debugfs_directory, + kbdev, &kbase_csf_firmware_log_poll_period_fops); + if (IS_ERR_OR_NULL(dentry)) { + dev_err(kbdev->dev, "Unable to create fw_trace_poll_period_ms"); + err = -ENOENT; + goto free_out; + } return 0; + +free_out: + kfree(fw_log->dump_buf); + fw_log->dump_buf = NULL; +#endif /* CONFIG_DEBUG_FS */ +out: + return err; } void kbase_csf_firmware_log_term(struct kbase_device *kbdev) { struct kbase_csf_firmware_log *fw_log = &kbdev->csf.fw_log; - cancel_delayed_work_sync(&fw_log->poll_work); - kfree(fw_log->dump_buf); + if (fw_log->dump_buf) { + cancel_delayed_work_sync(&fw_log->poll_work); + kfree(fw_log->dump_buf); + fw_log->dump_buf = NULL; + } } void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev) @@ -262,7 +361,7 @@ void kbase_csf_firmware_log_dump_buffer(struct kbase_device *kbdev) u8 *buf = fw_log->dump_buf, *p, *pnewline, *pend, *pendbuf; unsigned int read_size, remaining_size; struct firmware_trace_buffer *tb = - kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); if (tb == NULL) { dev_dbg(kbdev->dev, "Can't get the trace buffer, firmware trace dump skipped"); @@ -415,7 +514,7 @@ int kbase_csf_firmware_log_toggle_logging_calls(struct kbase_device *kbdev, u32 /* Wait for the MCU to get disabled */ dev_info(kbdev->dev, "Wait for the MCU to get disabled"); - ret = kbase_pm_wait_for_desired_state(kbdev); + ret = kbase_pm_killable_wait_for_desired_state(kbdev); if (ret) { dev_err(kbdev->dev, "wait for PM state failed when toggling FW logging calls"); diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c index 514492c..764c18d 100644 --- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c +++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c @@ -936,7 +936,7 @@ void kbase_csf_firmware_reload_completed(struct kbase_device *kbdev) kbase_pm_update_state(kbdev); } -static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms) +static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_ms, u32 *modifier) { #define HYSTERESIS_VAL_UNIT_SHIFT (10) /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ @@ -963,6 +963,8 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_m dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; dur_val = div_u64(dur_val, 1000); + *modifier = 0; + /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; @@ -984,7 +986,7 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 dur; kbase_csf_scheduler_spin_lock(kbdev, &flags); - dur = kbdev->csf.gpu_idle_hysteresis_us; + dur = kbdev->csf.gpu_idle_hysteresis_ns; kbase_csf_scheduler_spin_unlock(kbdev, flags); return dur; @@ -993,7 +995,9 @@ u32 kbase_csf_firmware_get_gpu_idle_hysteresis_time(struct kbase_device *kbdev) u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, u32 dur) { unsigned long flags; - const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur); + u32 modifier = 0; + + const u32 hysteresis_val = convert_dur_to_idle_count(kbdev, dur, &modifier); /* The 'fw_load_lock' is taken to synchronize against the deferred * loading of FW, where the idle timer will be enabled. @@ -1001,19 +1005,28 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, mutex_lock(&kbdev->fw_load_lock); if (unlikely(!kbdev->csf.firmware_inited)) { kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbdev->csf.gpu_idle_hysteresis_us = dur; + kbdev->csf.gpu_idle_hysteresis_ns = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_scheduler_spin_unlock(kbdev, flags); mutex_unlock(&kbdev->fw_load_lock); goto end; } mutex_unlock(&kbdev->fw_load_lock); + if (kbase_reset_gpu_prevent_and_wait(kbdev)) { + dev_warn(kbdev->dev, + "Failed to prevent GPU reset when updating idle_hysteresis_time"); + return kbdev->csf.gpu_idle_dur_count; + } + kbase_csf_scheduler_pm_active(kbdev); - if (kbase_csf_scheduler_wait_mcu_active(kbdev)) { + if (kbase_csf_scheduler_killable_wait_mcu_active(kbdev)) { dev_err(kbdev->dev, "Unable to activate the MCU, the idle hysteresis value shall remain unchanged"); kbase_csf_scheduler_pm_idle(kbdev); + kbase_reset_gpu_allow(kbdev); + return kbdev->csf.gpu_idle_dur_count; } @@ -1041,6 +1054,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_firmware_enable_gpu_idle_timer(kbdev); kbase_csf_scheduler_spin_unlock(kbdev, flags); wait_for_global_request(kbdev, GLB_REQ_IDLE_ENABLE_MASK); @@ -1052,6 +1066,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, kbase_csf_scheduler_spin_lock(kbdev, &flags); kbdev->csf.gpu_idle_hysteresis_us = dur; kbdev->csf.gpu_idle_dur_count = hysteresis_val; + kbdev->csf.gpu_idle_dur_count_modifier = modifier; kbase_csf_scheduler_spin_unlock(kbdev, flags); } kbase_csf_scheduler_unlock(kbdev); @@ -1060,7 +1075,7 @@ u32 kbase_csf_firmware_set_gpu_idle_hysteresis_time(struct kbase_device *kbdev, #endif kbase_csf_scheduler_pm_idle(kbdev); - + kbase_reset_gpu_allow(kbdev); end: dev_dbg(kbdev->dev, "CSF set firmware idle hysteresis count-value: 0x%.8x", hysteresis_val); @@ -1068,7 +1083,8 @@ end: return hysteresis_val; } -static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us) +static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u32 dur_us, + u32 *modifier) { /* Get the cntfreq_el0 value, which drives the SYSTEM_TIMESTAMP */ u64 freq = arch_timer_get_cntfrq(); @@ -1094,6 +1110,8 @@ static u32 convert_dur_to_core_pwroff_count(struct kbase_device *kbdev, const u3 dur_val = (dur_val * freq) >> HYSTERESIS_VAL_UNIT_SHIFT; dur_val = div_u64(dur_val, 1000000); + *modifier = 0; + /* Interface limits the value field to S32_MAX */ cnt_val_u32 = (dur_val > S32_MAX) ? S32_MAX : (u32)dur_val; @@ -1115,7 +1133,7 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) unsigned long flags; spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - pwroff = kbdev->csf.mcu_core_pwroff_dur_us; + pwroff = kbdev->csf.mcu_core_pwroff_dur_ns; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); return pwroff; @@ -1124,11 +1142,14 @@ u32 kbase_csf_firmware_get_mcu_core_pwroff_time(struct kbase_device *kbdev) u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 dur) { unsigned long flags; - const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur); + u32 modifier = 0; + + const u32 pwroff = convert_dur_to_core_pwroff_count(kbdev, dur, &modifier); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - kbdev->csf.mcu_core_pwroff_dur_us = dur; + kbdev->csf.mcu_core_pwroff_dur_ns = dur; kbdev->csf.mcu_core_pwroff_dur_count = pwroff; + kbdev->csf.mcu_core_pwroff_dur_count_modifier = modifier; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_dbg(kbdev->dev, "MCU shader Core Poweroff input update: 0x%.8x", pwroff); @@ -1136,6 +1157,11 @@ u32 kbase_csf_firmware_set_mcu_core_pwroff_time(struct kbase_device *kbdev, u32 return pwroff; } +u32 kbase_csf_firmware_reset_mcu_core_pwroff_time(struct kbase_device *kbdev) +{ + return kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, DEFAULT_GLB_PWROFF_TIMEOUT_NS); +} + int kbase_csf_firmware_early_init(struct kbase_device *kbdev) { init_waitqueue_head(&kbdev->csf.event_wait); @@ -1144,6 +1170,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) kbdev->csf.fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT); + kbase_csf_firmware_reset_mcu_core_pwroff_time(kbdev); INIT_LIST_HEAD(&kbdev->csf.firmware_interfaces); INIT_LIST_HEAD(&kbdev->csf.firmware_config); INIT_LIST_HEAD(&kbdev->csf.firmware_trace_buffers.list); @@ -1153,25 +1180,30 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev) INIT_WORK(&kbdev->csf.fw_error_work, firmware_error_worker); mutex_init(&kbdev->csf.reg_lock); + kbase_csf_pending_gpuq_kicks_init(kbdev); return 0; } void kbase_csf_firmware_early_term(struct kbase_device *kbdev) { + kbase_csf_pending_gpuq_kicks_term(kbdev); mutex_destroy(&kbdev->csf.reg_lock); } int kbase_csf_firmware_late_init(struct kbase_device *kbdev) { - kbdev->csf.gpu_idle_hysteresis_us = FIRMWARE_IDLE_HYSTERESIS_TIME_USEC; + u32 modifier = 0; + + kbdev->csf.gpu_idle_hysteresis_ns = FIRMWARE_IDLE_HYSTERESIS_TIME_NS; #ifdef KBASE_PM_RUNTIME if (kbase_pm_gpu_sleep_allowed(kbdev)) - kbdev->csf.gpu_idle_hysteresis_us /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; + kbdev->csf.gpu_idle_hysteresis_ns /= FIRMWARE_IDLE_HYSTERESIS_GPU_SLEEP_SCALER; #endif - WARN_ON(!kbdev->csf.gpu_idle_hysteresis_us); + WARN_ON(!kbdev->csf.gpu_idle_hysteresis_ns); kbdev->csf.gpu_idle_dur_count = - convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_us); + convert_dur_to_idle_count(kbdev, kbdev->csf.gpu_idle_hysteresis_ns, &modifier); + kbdev->csf.gpu_idle_dur_count_modifier = modifier; return 0; } @@ -1254,10 +1286,10 @@ void kbase_csf_firmware_unload_term(struct kbase_device *kbdev) /* NO_MALI: Don't stop firmware or unload MMU tables */ - kbase_csf_scheduler_term(kbdev); - kbase_csf_free_dummy_user_reg_page(kbdev); + kbase_csf_scheduler_term(kbdev); + kbase_csf_doorbell_mapping_term(kbdev); free_global_iface(kbdev); @@ -1604,8 +1636,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( if (!cpu_addr) goto vmap_error; - va_reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, num_pages, - KBASE_REG_ZONE_MCU_SHARED); + va_reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, num_pages); if (!va_reg) goto va_region_alloc_error; @@ -1621,7 +1652,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init( ret = kbase_mmu_insert_pages_no_flush(kbdev, &kbdev->csf.mcu_mmu, va_reg->start_pfn, &phys[0], num_pages, gpu_map_properties, - KBASE_MEM_GROUP_CSF_FW, NULL, NULL, false); + KBASE_MEM_GROUP_CSF_FW, NULL, NULL); if (ret) goto mmu_insert_pages_error; diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c index 6cb6733..08d82d2 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.c +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c @@ -24,7 +24,9 @@ #include <mali_kbase_ctx_sched.h> #include "device/mali_kbase_device.h" #include "mali_kbase_csf.h" +#include "mali_kbase_csf_sync_debugfs.h" #include <linux/export.h> +#include <linux/version_compat_defs.h> #if IS_ENABLED(CONFIG_SYNC_FILE) #include "mali_kbase_fence.h" @@ -679,7 +681,7 @@ static int kbase_csf_queue_group_suspend_prepare( struct tagged_addr *page_array; u64 start, end, i; - if (((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_SAME_VA) || + if ((kbase_bits_to_zone(reg->flags) != SAME_VA_ZONE) || (kbase_reg_current_backed_size(reg) < nr_pages) || !(reg->flags & KBASE_REG_CPU_WR) || (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) || @@ -1343,6 +1345,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, /* Fence gets signaled. Deactivate the timer for fence-wait timeout */ del_timer(&kcpu_queue->fence_timeout); #endif + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, fence->context, fence->seqno); @@ -1445,14 +1448,14 @@ static void fence_timeout_callback(struct timer_list *timer) } /** - * fence_timeout_start() - Start a timer to check fence-wait timeout + * fence_wait_timeout_start() - Start a timer to check fence-wait timeout * * @cmd: KCPU command queue * * Activate a timer to check whether a fence-wait command in the queue * gets completed within FENCE_WAIT_TIMEOUT_MS */ -static void fence_timeout_start(struct kbase_kcpu_command_queue *cmd) +static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd) { mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS)); } @@ -1489,18 +1492,20 @@ static int kbase_kcpu_fence_wait_process( if (kcpu_queue->fence_wait_processed) { fence_status = dma_fence_get_status(fence); } else { - int cb_err = dma_fence_add_callback(fence, + int cb_err; + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_START, kcpu_queue, + fence->context, fence->seqno); + + cb_err = dma_fence_add_callback(fence, &fence_info->fence_cb, kbase_csf_fence_wait_callback); - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, - KCPU_FENCE_WAIT_START, kcpu_queue, - fence->context, fence->seqno); fence_status = cb_err; if (cb_err == 0) { kcpu_queue->fence_wait_processed = true; #ifdef CONFIG_MALI_FENCE_DEBUG - fence_timeout_start(kcpu_queue); + fence_wait_timeout_start(kcpu_queue); #endif } else if (cb_err == -ENOENT) { fence_status = dma_fence_get_status(fence); @@ -1512,14 +1517,12 @@ static int kbase_kcpu_fence_wait_process( "Unexpected status for fence %s of ctx:%d_%d kcpu queue:%u", info.name, kctx->tgid, kctx->id, kcpu_queue->id); } - /* - * At this point the fence in question is already signalled without - * any error. Its useful to print a FENCE_WAIT_END trace here to - * indicate completion. - */ - KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, - KCPU_FENCE_WAIT_END, kcpu_queue, - fence->context, fence->seqno); + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, + fence->context, fence->seqno); + } else { + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_WAIT_END, kcpu_queue, + fence->context, fence->seqno); } } @@ -1565,12 +1568,193 @@ static int kbase_kcpu_fence_wait_prepare(struct kbase_kcpu_command_queue *kcpu_q return 0; } +/** + * fence_signal_timeout_start() - Start a timer to check enqueued fence-signal command is + * blocked for too long a duration + * + * @kcpu_queue: KCPU command queue + * + * Activate the queue's fence_signal_timeout timer to check whether a fence-signal command + * enqueued has been blocked for longer than a configured wait duration. + */ +static void fence_signal_timeout_start(struct kbase_kcpu_command_queue *kcpu_queue) +{ + struct kbase_device *kbdev = kcpu_queue->kctx->kbdev; + unsigned int wait_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT); + + if (atomic_read(&kbdev->fence_signal_timeout_enabled)) + mod_timer(&kcpu_queue->fence_signal_timeout, jiffies + msecs_to_jiffies(wait_ms)); +} + +static void kbase_kcpu_command_fence_force_signaled_set( + struct kbase_kcpu_command_fence_info *fence_info, + bool has_force_signaled) +{ + fence_info->fence_has_force_signaled = has_force_signaled; +} + +bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info) +{ + return fence_info->fence_has_force_signaled; +} + +static int kbase_kcpu_fence_force_signal_process( + struct kbase_kcpu_command_queue *kcpu_queue, + struct kbase_kcpu_command_fence_info *fence_info) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + int ret; + + /* already force signaled just return*/ + if (kbase_kcpu_command_fence_has_force_signaled(fence_info)) + return 0; + + if (WARN_ON(!fence_info->fence)) + return -EINVAL; + + ret = dma_fence_signal(fence_info->fence); + if (unlikely(ret < 0)) { + dev_warn(kctx->kbdev->dev, "dma_fence(%d) has been signalled already\n", ret); + /* Treated as a success */ + ret = 0; + } + + KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_FENCE_SIGNAL, kcpu_queue, + fence_info->fence->context, + fence_info->fence->seqno); + +#if (KERNEL_VERSION(5, 1, 0) > LINUX_VERSION_CODE) + dev_info(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%u\n", + kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence, + fence_info->fence->context, fence_info->fence->seqno); +#else + dev_info(kctx->kbdev->dev, + "ctx:%d_%d kcpu queue[%pK]:%u signal fence[%pK] context#seqno:%llu#%llu\n", + kctx->tgid, kctx->id, kcpu_queue, kcpu_queue->id, fence_info->fence, + fence_info->fence->context, fence_info->fence->seqno); +#endif + + /* dma_fence refcount needs to be decreased to release it. */ + dma_fence_put(fence_info->fence); + fence_info->fence = NULL; + + return ret; +} + +static void kcpu_force_signal_fence(struct kbase_kcpu_command_queue *kcpu_queue) +{ + int status; + int i; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct kbase_context *const kctx = kcpu_queue->kctx; +#ifdef CONFIG_MALI_FENCE_DEBUG + int del; +#endif + + /* Force trigger all pending fence-signal commands */ + for (i = 0; i != kcpu_queue->num_pending_cmds; ++i) { + struct kbase_kcpu_command *cmd = + &kcpu_queue->commands[(u8)(kcpu_queue->start_offset + i)]; + + if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) { + /* If a fence had already force-signalled previously, + * just skip it in this round of force signalling. + */ + if (kbase_kcpu_command_fence_has_force_signaled(&cmd->info.fence)) + continue; + + fence = kbase_fence_get(&cmd->info.fence); + + dev_info(kctx->kbdev->dev, "kbase KCPU[%pK] cmd%d fence[%pK] force signaled\n", + kcpu_queue, i+1, fence); + + /* set ETIMEDOUT error flag before signal the fence*/ + dma_fence_set_error_helper(fence, -ETIMEDOUT); + + /* force signal fence */ + status = kbase_kcpu_fence_force_signal_process( + kcpu_queue, &cmd->info.fence); + if (status < 0) + dev_err(kctx->kbdev->dev, "kbase signal failed\n"); + else + kbase_kcpu_command_fence_force_signaled_set(&cmd->info.fence, true); + + kcpu_queue->has_error = true; + } + } + + /* set fence_signal_pending_cnt to 0 + * and del_timer of the kcpu_queue + * because we signaled all the pending fence in the queue + */ + atomic_set(&kcpu_queue->fence_signal_pending_cnt, 0); +#ifdef CONFIG_MALI_FENCE_DEBUG + del = del_timer_sync(&kcpu_queue->fence_signal_timeout); + dev_info(kctx->kbdev->dev, "kbase KCPU [%pK] delete fence signal timeout timer ret: %d", + kcpu_queue, del); +#else + del_timer_sync(&kcpu_queue->fence_signal_timeout); +#endif +} + +static void kcpu_queue_force_fence_signal(struct kbase_kcpu_command_queue *kcpu_queue) +{ + struct kbase_context *const kctx = kcpu_queue->kctx; + char buff[] = "surfaceflinger"; + + /* Force signal unsignaled fence expect surfaceflinger */ + if (memcmp(kctx->comm, buff, sizeof(buff))) { + mutex_lock(&kcpu_queue->lock); + kcpu_force_signal_fence(kcpu_queue); + mutex_unlock(&kcpu_queue->lock); + } +} + +/** + * fence_signal_timeout_cb() - Timeout callback function for fence-signal-wait + * + * @timer: Timer struct + * + * Callback function on an enqueued fence signal command has expired on its configured wait + * duration. At the moment it's just a simple place-holder for other tasks to expand on actual + * sync state dump via a bottom-half workqueue item. + */ +static void fence_signal_timeout_cb(struct timer_list *timer) +{ + struct kbase_kcpu_command_queue *kcpu_queue = + container_of(timer, struct kbase_kcpu_command_queue, fence_signal_timeout); + struct kbase_context *const kctx = kcpu_queue->kctx; +#ifdef CONFIG_MALI_FENCE_DEBUG + dev_warn(kctx->kbdev->dev, "kbase KCPU fence signal timeout callback triggered"); +#endif + + /* If we have additional pending fence signal commands in the queue, re-arm for the + * remaining fence signal commands, and dump the work to dmesg, only if the + * global configuration option is set. + */ + if (atomic_read(&kctx->kbdev->fence_signal_timeout_enabled)) { + if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1) + fence_signal_timeout_start(kcpu_queue); + + kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->timeout_work); + } +} + static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcpu_queue, struct kbase_kcpu_command_fence_info *fence_info) { struct kbase_context *const kctx = kcpu_queue->kctx; int ret; + /* already force signaled */ + if (kbase_kcpu_command_fence_has_force_signaled(fence_info)) + return 0; + if (WARN_ON(!fence_info->fence)) return -EINVAL; @@ -1586,6 +1770,25 @@ static int kbasep_kcpu_fence_signal_process(struct kbase_kcpu_command_queue *kcp fence_info->fence->context, fence_info->fence->seqno); + /* If one has multiple enqueued fence signal commands, re-arm the timer */ + if (atomic_dec_return(&kcpu_queue->fence_signal_pending_cnt) > 0) { + fence_signal_timeout_start(kcpu_queue); +#ifdef CONFIG_MALI_FENCE_DEBUG + dev_dbg(kctx->kbdev->dev, + "kbase re-arm KCPU fence signal timeout timer for next signal command"); +#endif + } else { +#ifdef CONFIG_MALI_FENCE_DEBUG + int del = del_timer_sync(&kcpu_queue->fence_signal_timeout); + + dev_dbg(kctx->kbdev->dev, "kbase KCPU delete fence signal timeout timer ret: %d", + del); + CSTD_UNUSED(del); +#else + del_timer_sync(&kcpu_queue->fence_signal_timeout); +#endif + } + /* dma_fence refcount needs to be decreased to release it. */ kbase_fence_put(fence_info->fence); fence_info->fence = NULL; @@ -1614,6 +1817,10 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q /* Set reference to KCPU metadata */ kcpu_fence->metadata = kcpu_queue->metadata; + /* Set reference to KCPU metadata and increment refcount */ + kcpu_fence->metadata = kcpu_queue->metadata; + WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); + #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) fence_out = (struct fence *)kcpu_fence; #else @@ -1635,8 +1842,6 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q dma_fence_get(fence_out); #endif - WARN_ON(!kbase_refcount_inc_not_zero(&kcpu_fence->metadata->refcount)); - /* create a sync_file fd representing the fence */ *sync_file = sync_file_create(fence_out); if (!(*sync_file)) { @@ -1654,6 +1859,7 @@ static int kbasep_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_q current_command->type = BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL; current_command->info.fence.fence = fence_out; + kbase_kcpu_command_fence_force_signaled_set(¤t_command->info.fence, false); return 0; @@ -1700,6 +1906,10 @@ static int kbase_kcpu_fence_signal_prepare(struct kbase_kcpu_command_queue *kcpu * before returning success. */ fd_install(fd, sync_file->file); + + if (atomic_inc_return(&kcpu_queue->fence_signal_pending_cnt) == 1) + fence_signal_timeout_start(kcpu_queue); + return 0; fail: @@ -1732,6 +1942,90 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, KBASE_EXPORT_TEST_API(kbase_kcpu_fence_signal_init); #endif /* CONFIG_SYNC_FILE */ +static void kcpu_queue_dump(struct kbase_kcpu_command_queue *queue) +{ + struct kbase_context *kctx = queue->kctx; + struct kbase_kcpu_command *cmd; + struct kbase_kcpu_command_fence_info *fence_info; + struct kbase_kcpu_dma_fence *kcpu_fence; +#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) + struct fence *fence; +#else + struct dma_fence *fence; +#endif + struct kbase_sync_fence_info info; + size_t i; + + mutex_lock(&queue->lock); + + /* Find the next fence signal command in the queue */ + for (i = 0; i != queue->num_pending_cmds; ++i) { + cmd = &queue->commands[(u8)(queue->start_offset + i)]; + if (cmd->type == BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL) { + fence_info = &cmd->info.fence; + /* find the first unforce signaled fence */ + if (!kbase_kcpu_command_fence_has_force_signaled(fence_info)) + break; + } + } + + if (i == queue->num_pending_cmds) { + dev_err(kctx->kbdev->dev, + "%s: No fence signal command found in ctx:%d_%d kcpu queue:%u", __func__, + kctx->tgid, kctx->id, queue->id); + mutex_unlock(&queue->lock); + return; + } + + + fence = kbase_fence_get(fence_info); + if (!fence) { + dev_err(kctx->kbdev->dev, "no fence found in ctx:%d_%d kcpu queue:%u", kctx->tgid, + kctx->id, queue->id); + mutex_unlock(&queue->lock); + return; + } + + kcpu_fence = kbase_kcpu_dma_fence_get(fence); + if (!kcpu_fence) { + dev_err(kctx->kbdev->dev, "no fence metadata found in ctx:%d_%d kcpu queue:%u", + kctx->tgid, kctx->id, queue->id); + kbase_fence_put(fence); + mutex_unlock(&queue->lock); + return; + } + + kbase_sync_fence_info_get(fence, &info); + + dev_warn(kctx->kbdev->dev, "------------------------------------------------\n"); + dev_warn(kctx->kbdev->dev, "KCPU Fence signal timeout detected for ctx:%d_%d\n", kctx->tgid, + kctx->id); + dev_warn(kctx->kbdev->dev, "------------------------------------------------\n"); + dev_warn(kctx->kbdev->dev, "Kcpu queue:%u still waiting for fence[%pK] context#seqno:%s\n", + queue->id, fence, info.name); + dev_warn(kctx->kbdev->dev, "Fence metadata timeline name: %s\n", + kcpu_fence->metadata->timeline_name); + + kbase_fence_put(fence); + mutex_unlock(&queue->lock); + + mutex_lock(&kctx->csf.kcpu_queues.lock); + kbasep_csf_sync_kcpu_dump_locked(kctx, NULL); + mutex_unlock(&kctx->csf.kcpu_queues.lock); + + dev_warn(kctx->kbdev->dev, "-----------------------------------------------\n"); +} + +static void kcpu_queue_timeout_worker(struct kthread_work *data) +{ + struct kbase_kcpu_command_queue *queue = + container_of(data, struct kbase_kcpu_command_queue, timeout_work); + + kcpu_queue_dump(queue); + + kcpu_queue_force_fence_signal(queue); +} + static void kcpu_queue_process_worker(struct kthread_work *data) { struct kbase_kcpu_command_queue *queue = container_of(data, @@ -2087,6 +2381,7 @@ static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, status = kbase_csf_queue_group_suspend_process( queue->kctx, sus_buf, cmd->info.suspend_buf_copy.group_handle); + if (status) queue->has_error = true; @@ -2579,6 +2874,7 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, INIT_LIST_HEAD(&queue->jit_blocked); queue->has_error = false; kthread_init_work(&queue->work, kcpu_queue_process_worker); + kthread_init_work(&queue->timeout_work, kcpu_queue_timeout_worker); queue->id = idx; newq->id = idx; @@ -2594,9 +2890,96 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, #ifdef CONFIG_MALI_FENCE_DEBUG kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback); #endif + +#if IS_ENABLED(CONFIG_SYNC_FILE) + atomic_set(&queue->fence_signal_pending_cnt, 0); + kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb); +#endif out: mutex_unlock(&kctx->csf.kcpu_queues.lock); return ret; } KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_new); + +int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + unsigned long queue_idx; + struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues; + + mutex_lock(&kcpu_ctx->lock); + + for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) { + struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx]; + + if (unlikely(!kcpu_queue)) + continue; + + mutex_lock(&kcpu_queue->lock); + + if (atomic_read(&kcpu_queue->fence_signal_pending_cnt)) { + int ret = del_timer_sync(&kcpu_queue->fence_signal_timeout); + + dev_dbg(kbdev->dev, + "Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend", + queue_idx, kctx->tgid, kctx->id, + ret ? "pending" : "not pending"); + } + +#ifdef CONFIG_MALI_FENCE_DEBUG + if (kcpu_queue->fence_wait_processed) { + int ret = del_timer_sync(&kcpu_queue->fence_timeout); + + dev_dbg(kbdev->dev, + "Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) was %s on suspend", + queue_idx, kctx->tgid, kctx->id, + ret ? "pending" : "not pending"); + } +#endif + mutex_unlock(&kcpu_queue->lock); + } + mutex_unlock(&kcpu_ctx->lock); + } + return 0; +} + +void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev) +{ + struct kbase_context *kctx; + + list_for_each_entry(kctx, &kbdev->kctx_list, kctx_list_link) { + unsigned long queue_idx; + struct kbase_csf_kcpu_queue_context *kcpu_ctx = &kctx->csf.kcpu_queues; + + mutex_lock(&kcpu_ctx->lock); + + for_each_set_bit(queue_idx, kcpu_ctx->in_use, KBASEP_MAX_KCPU_QUEUES) { + struct kbase_kcpu_command_queue *kcpu_queue = kcpu_ctx->array[queue_idx]; + + if (unlikely(!kcpu_queue)) + continue; + + mutex_lock(&kcpu_queue->lock); +#ifdef CONFIG_MALI_FENCE_DEBUG + if (kcpu_queue->fence_wait_processed) { + fence_wait_timeout_start(kcpu_queue); + dev_dbg(kbdev->dev, + "Fence wait timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume", + queue_idx, kctx->tgid, kctx->id); + } +#endif + if (atomic_read(&kbdev->fence_signal_timeout_enabled) && + atomic_read(&kcpu_queue->fence_signal_pending_cnt)) { + fence_signal_timeout_start(kcpu_queue); + dev_dbg(kbdev->dev, + "Fence signal timeout on KCPU queue(%lu), kctx (%d_%d) has been resumed on system resume", + queue_idx, kctx->tgid, kctx->id); + } + mutex_unlock(&kcpu_queue->lock); + } + mutex_unlock(&kcpu_ctx->lock); + } +} diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h index 41c6e07..4a8d937 100644 --- a/mali_kbase/csf/mali_kbase_csf_kcpu.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h @@ -53,6 +53,7 @@ struct kbase_kcpu_command_import_info { * @fence_cb: Fence callback * @fence: Fence * @kcpu_queue: kcpu command queue + * @fence_has_force_signaled: fence has forced signaled after fence timeouted */ struct kbase_kcpu_command_fence_info { #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) @@ -63,6 +64,7 @@ struct kbase_kcpu_command_fence_info { struct dma_fence *fence; #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ struct kbase_kcpu_command_queue *kcpu_queue; + bool fence_has_force_signaled; }; /** @@ -249,10 +251,13 @@ struct kbase_kcpu_command { * enqueued to this command queue. * @csf_kcpu_worker: Dedicated worker for processing kernel CPU command * queues. - * @work: struct work_struct which contains a pointer to + * @work: struct kthread_work which contains a pointer to * the function which handles processing of kcpu * commands enqueued into a kcpu command queue; * part of kernel API for processing workqueues + * @timeout_work: struct kthread_work which contains a pointer to the + * function which handles post-timeout actions + * queue when a fence signal timeout occurs. * @start_offset: Index of the command to be executed next * @id: KCPU command queue ID. * @num_pending_cmds: The number of commands enqueued but not yet @@ -284,6 +289,9 @@ struct kbase_kcpu_command { * @fence_timeout: Timer used to detect the fence wait timeout. * @metadata: Metadata structure containing basic information about * this queue for any fence objects associated with this queue. + * @fence_signal_timeout: Timer used for detect a fence signal command has + * been blocked for too long. + * @fence_signal_pending_cnt: Enqueued fence signal commands in the queue. */ struct kbase_kcpu_command_queue { struct mutex lock; @@ -291,6 +299,7 @@ struct kbase_kcpu_command_queue { struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE]; struct kthread_worker csf_kcpu_worker; struct kthread_work work; + struct kthread_work timeout_work; u8 start_offset; u8 id; u16 num_pending_cmds; @@ -308,6 +317,8 @@ struct kbase_kcpu_command_queue { #if IS_ENABLED(CONFIG_SYNC_FILE) struct kbase_kcpu_dma_fence_meta *metadata; #endif /* CONFIG_SYNC_FILE */ + struct timer_list fence_signal_timeout; + atomic_t fence_signal_pending_cnt; }; /** @@ -382,4 +393,32 @@ int kbase_kcpu_fence_signal_init(struct kbase_kcpu_command_queue *kcpu_queue, struct base_fence *fence, struct sync_file **sync_file, int *fd); #endif /* CONFIG_SYNC_FILE */ +/* + * kbase_csf_kcpu_queue_halt_timers - Halt the KCPU fence timers associated with + * the kbase device. + * + * @kbdev: Kbase device + * + * Note that this function assumes that the caller has ensured that the + * kbase_device::kctx_list does not get updated during this function's runtime. + * At the moment, the function is only safe to call during system suspend, when + * the device PM active count has reached zero. + * + * Return: 0 on success, negative value otherwise. + */ +int kbase_csf_kcpu_queue_halt_timers(struct kbase_device *kbdev); + +/* + * kbase_csf_kcpu_queue_resume_timers - Resume the KCPU fence timers associated + * with the kbase device. + * + * @kbdev: Kbase device + * + * Note that this function assumes that the caller has ensured that the + * kbase_device::kctx_list does not get updated during this function's runtime. + * At the moment, the function is only safe to call during system resume. + */ +void kbase_csf_kcpu_queue_resume_timers(struct kbase_device *kbdev); + +bool kbase_kcpu_command_fence_has_force_signaled(struct kbase_kcpu_command_fence_info *fence_info); #endif /* _KBASE_CSF_KCPU_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c new file mode 100644 index 0000000..cd55f62 --- /dev/null +++ b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ +#include <linux/fs.h> +#include <linux/version.h> +#include <linux/module.h> +#if IS_ENABLED(CONFIG_DEBUG_FS) +#include <linux/debugfs.h> +#endif + +#include <mali_kbase.h> +#include <csf/mali_kbase_csf_kcpu_fence_debugfs.h> +#include <mali_kbase_hwaccess_time.h> + +#define BUF_SIZE 10 + +#if IS_ENABLED(CONFIG_DEBUG_FS) +static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_get(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int ret; + struct kbase_device *kbdev = file->private_data; + + if (atomic_read(&kbdev->fence_signal_timeout_enabled)) + ret = simple_read_from_buffer(buf, count, ppos, "1\n", 2); + else + ret = simple_read_from_buffer(buf, count, ppos, "0\n", 2); + + return ret; +}; + +static ssize_t kbase_csf_kcpu_queue_fence_signal_enabled_set(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + int ret; + unsigned int enabled; + struct kbase_device *kbdev = file->private_data; + + ret = kstrtouint_from_user(buf, count, 10, &enabled); + if (ret < 0) + return ret; + + atomic_set(&kbdev->fence_signal_timeout_enabled, enabled); + + return count; +} + +static const struct file_operations kbase_csf_kcpu_queue_fence_signal_fops = { + .owner = THIS_MODULE, + .read = kbase_csf_kcpu_queue_fence_signal_enabled_get, + .write = kbase_csf_kcpu_queue_fence_signal_enabled_set, + .open = simple_open, + .llseek = default_llseek, +}; + +static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_get(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int size; + char buffer[BUF_SIZE]; + struct kbase_device *kbdev = file->private_data; + unsigned int timeout_ms = kbase_get_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT); + + size = scnprintf(buffer, sizeof(buffer), "%u\n", timeout_ms); + return simple_read_from_buffer(buf, count, ppos, buffer, size); +} + +static ssize_t kbase_csf_kcpu_queue_fence_signal_timeout_set(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + int ret; + unsigned int timeout_ms; + struct kbase_device *kbdev = file->private_data; + + ret = kstrtouint_from_user(buf, count, 10, &timeout_ms); + if (ret < 0) + return ret; + + /* The timeout passed by the user is bounded when trying to insert it into + * the precomputed timeout table, so we don't need to do any more validation + * before-hand. + */ + kbase_device_set_timeout_ms(kbdev, KCPU_FENCE_SIGNAL_TIMEOUT, timeout_ms); + + return count; +} + +static const struct file_operations kbase_csf_kcpu_queue_fence_signal_timeout_fops = { + .owner = THIS_MODULE, + .read = kbase_csf_kcpu_queue_fence_signal_timeout_get, + .write = kbase_csf_kcpu_queue_fence_signal_timeout_set, + .open = simple_open, + .llseek = default_llseek, +}; + +int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev) +{ + struct dentry *file; + const mode_t mode = 0644; + + if (WARN_ON(IS_ERR_OR_NULL(kbdev->mali_debugfs_directory))) + return -1; + + file = debugfs_create_file("fence_signal_timeout_enable", mode, + kbdev->mali_debugfs_directory, kbdev, + &kbase_csf_kcpu_queue_fence_signal_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kbdev->dev, "Unable to create fence signal timer toggle entry"); + return -1; + } + + file = debugfs_create_file("fence_signal_timeout_ms", mode, kbdev->mali_debugfs_directory, + kbdev, &kbase_csf_kcpu_queue_fence_signal_timeout_fops); + + if (IS_ERR_OR_NULL(file)) { + dev_warn(kbdev->dev, "Unable to create fence signal timeout entry"); + return -1; + } + return 0; +} + +#else +int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev) +{ + return 0; +} + +#endif +void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev) +{ +} diff --git a/mali_kbase/mali_kbase_bits.h b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.h index a085fd8..e3799fb 100644 --- a/mali_kbase/mali_kbase_bits.h +++ b/mali_kbase/csf/mali_kbase_csf_kcpu_fence_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -18,14 +18,25 @@ * http://www.gnu.org/licenses/gpl-2.0.html. * */ +#ifndef _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ +#define _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ -#ifndef _KBASE_BITS_H_ -#define _KBASE_BITS_H_ +struct kbase_device; -#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) -#include <linux/bits.h> -#else -#include <linux/bitops.h> -#endif +/* + * kbase_csf_fence_timer_debugfs_init - Initialize fence signal timeout debugfs + * entries. + * @kbdev: Kbase device. + * + * Return: 0 on success, -1 on failure. + */ +int kbase_csf_fence_timer_debugfs_init(struct kbase_device *kbdev); + +/* + * kbase_csf_fence_timer_debugfs_term - Terminate fence signal timeout debugfs + * entries. + * @kbdev: Kbase device. + */ +void kbase_csf_fence_timer_debugfs_term(struct kbase_device *kbdev); -#endif /* _KBASE_BITS_H_ */ +#endif /* _KBASE_CSF_KCPU_FENCE_SIGNAL_DEBUGFS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c index bb5a092..863cf10 100644 --- a/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c +++ b/mali_kbase/csf/mali_kbase_csf_mcu_shared_reg.c @@ -83,7 +83,7 @@ static unsigned long get_userio_mmu_flags(struct kbase_device *kbdev) static void set_page_meta_status_not_movable(struct tagged_addr phy) { - if (kbase_page_migration_enabled) { + if (kbase_is_page_migration_enabled()) { struct kbase_page_metadata *page_md = kbase_page_private(as_page(phy)); if (page_md) { @@ -117,7 +117,7 @@ static inline int insert_dummy_pages(struct kbase_device *kbdev, u64 vpfn, u32 n return kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, nr_pages, mem_flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, - mmu_sync_info, NULL, false); + mmu_sync_info, NULL); } /* Reset consecutive retry count to zero */ @@ -613,8 +613,7 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, int err, i; INIT_LIST_HEAD(&csg_reg->link); - reg = kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, 0, nr_csg_reg_pages, - KBASE_REG_ZONE_MCU_SHARED); + reg = kbase_alloc_free_region(&kbdev->csf.mcu_shared_zone, 0, nr_csg_reg_pages); if (!reg) { dev_err(kbdev->dev, "%s: Failed to allocate a MCU shared region for %zu pages\n", @@ -667,18 +666,19 @@ static int shared_mcu_csg_reg_init(struct kbase_device *kbdev, fail_userio_pages_map_fail: while (i-- > 0) { vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, - MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, + shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR); } vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); fail_pmod_map_fail: vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); fail_susp_map_fail: mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); @@ -701,17 +701,18 @@ static void shared_mcu_csg_reg_term(struct kbase_device *kbdev, for (i = 0; i < nr_csis; i++) { vpfn = CSG_REG_USERIO_VPFN(reg, i, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - KBASEP_NUM_CS_USER_IO_PAGES, KBASEP_NUM_CS_USER_IO_PAGES, - MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, + shared_regs->dummy_phys, + KBASEP_NUM_CS_USER_IO_PAGES, + KBASEP_NUM_CS_USER_IO_PAGES, MCU_AS_NR); } vpfn = CSG_REG_PMOD_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); vpfn = CSG_REG_SUSP_BUF_VPFN(reg, nr_susp_pages); - kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, - nr_susp_pages, nr_susp_pages, MCU_AS_NR, true); + kbase_mmu_teardown_firmware_pages(kbdev, &kbdev->csf.mcu_mmu, vpfn, shared_regs->dummy_phys, + nr_susp_pages, nr_susp_pages, MCU_AS_NR); mutex_lock(&kbdev->csf.reg_lock); kbase_remove_va_region(kbdev, reg); diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h index b5bf7bb..b5ca885 100644 --- a/mali_kbase/csf/mali_kbase_csf_registers.h +++ b/mali_kbase/csf/mali_kbase_csf_registers.h @@ -143,12 +143,15 @@ #define CSG_ACK_IRQ_MASK 0x0004 /* () Global acknowledge interrupt mask */ #define CSG_DB_REQ 0x0008 /* () Global doorbell request */ #define CSG_IRQ_ACK 0x000C /* () CS IRQ acknowledge */ + + #define CSG_ALLOW_COMPUTE_LO 0x0020 /* () Allowed compute endpoints, low word */ #define CSG_ALLOW_COMPUTE_HI 0x0024 /* () Allowed compute endpoints, high word */ #define CSG_ALLOW_FRAGMENT_LO 0x0028 /* () Allowed fragment endpoints, low word */ #define CSG_ALLOW_FRAGMENT_HI 0x002C /* () Allowed fragment endpoints, high word */ #define CSG_ALLOW_OTHER 0x0030 /* () Allowed other endpoints */ -#define CSG_EP_REQ 0x0034 /* () Maximum number of endpoints allowed */ +#define CSG_EP_REQ_LO 0x0034 /* () Maximum number of endpoints allowed, low word */ +#define CSG_EP_REQ_HI 0x0038 /* () Maximum number of endpoints allowed, high word */ #define CSG_SUSPEND_BUF_LO 0x0040 /* () Normal mode suspend buffer, low word */ #define CSG_SUSPEND_BUF_HI 0x0044 /* () Normal mode suspend buffer, high word */ #define CSG_PROTM_SUSPEND_BUF_LO 0x0048 /* () Protected mode suspend buffer, low word */ @@ -645,6 +648,7 @@ (((reg_val) & ~CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK) | \ (((value) << CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_SHIFT) & CS_STATUS_REQ_RESOURCE_IDVS_RESOURCES_MASK)) + /* CS_STATUS_WAIT_SYNC_POINTER register */ #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_SHIFT 0 #define CS_STATUS_WAIT_SYNC_POINTER_POINTER_MASK \ @@ -953,41 +957,46 @@ /* CSG_EP_REQ register */ #define CSG_EP_REQ_COMPUTE_EP_SHIFT 0 -#define CSG_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) +#define CSG_EP_REQ_COMPUTE_EP_MASK ((u64)0xFF << CSG_EP_REQ_COMPUTE_EP_SHIFT) #define CSG_EP_REQ_COMPUTE_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_COMPUTE_EP_MASK) >> CSG_EP_REQ_COMPUTE_EP_SHIFT) -#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ - (((value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) +#define CSG_EP_REQ_COMPUTE_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_COMPUTE_EP_MASK) | \ + ((((u64)value) << CSG_EP_REQ_COMPUTE_EP_SHIFT) & CSG_EP_REQ_COMPUTE_EP_MASK)) #define CSG_EP_REQ_FRAGMENT_EP_SHIFT 8 -#define CSG_EP_REQ_FRAGMENT_EP_MASK (0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) +#define CSG_EP_REQ_FRAGMENT_EP_MASK ((u64)0xFF << CSG_EP_REQ_FRAGMENT_EP_SHIFT) #define CSG_EP_REQ_FRAGMENT_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_FRAGMENT_EP_MASK) >> CSG_EP_REQ_FRAGMENT_EP_SHIFT) -#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ - (((value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) +#define CSG_EP_REQ_FRAGMENT_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_FRAGMENT_EP_MASK) | \ + ((((u64)value) << CSG_EP_REQ_FRAGMENT_EP_SHIFT) & CSG_EP_REQ_FRAGMENT_EP_MASK)) #define CSG_EP_REQ_TILER_EP_SHIFT 16 -#define CSG_EP_REQ_TILER_EP_MASK (0xF << CSG_EP_REQ_TILER_EP_SHIFT) +#define CSG_EP_REQ_TILER_EP_MASK ((u64)0xF << CSG_EP_REQ_TILER_EP_SHIFT) #define CSG_EP_REQ_TILER_EP_GET(reg_val) (((reg_val)&CSG_EP_REQ_TILER_EP_MASK) >> CSG_EP_REQ_TILER_EP_SHIFT) -#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | (((value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) +#define CSG_EP_REQ_TILER_EP_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_TILER_EP_MASK) | \ + ((((u64)value) << CSG_EP_REQ_TILER_EP_SHIFT) & CSG_EP_REQ_TILER_EP_MASK)) #define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT 20 -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) #define CSG_EP_REQ_EXCLUSIVE_COMPUTE_GET(reg_val) \ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) >> CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ - (((value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_COMPUTE_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK) | \ + ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_COMPUTE_SHIFT) & \ + CSG_EP_REQ_EXCLUSIVE_COMPUTE_MASK)) #define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT 21 -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK (0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK ((u64)0x1 << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) #define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_GET(reg_val) \ (((reg_val)&CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) >> CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) -#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ - (((value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) +#define CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ + ((((u64)value) << CSG_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & \ + CSG_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) #define CSG_EP_REQ_PRIORITY_SHIFT 28 -#define CSG_EP_REQ_PRIORITY_MASK (0xF << CSG_EP_REQ_PRIORITY_SHIFT) +#define CSG_EP_REQ_PRIORITY_MASK ((u64)0xF << CSG_EP_REQ_PRIORITY_SHIFT) #define CSG_EP_REQ_PRIORITY_GET(reg_val) (((reg_val)&CSG_EP_REQ_PRIORITY_MASK) >> CSG_EP_REQ_PRIORITY_SHIFT) -#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ - (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | (((value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) +#define CSG_EP_REQ_PRIORITY_SET(reg_val, value) \ + (((reg_val) & ~CSG_EP_REQ_PRIORITY_MASK) | \ + ((((u64)value) << CSG_EP_REQ_PRIORITY_SHIFT) & CSG_EP_REQ_PRIORITY_MASK)) + /* CSG_SUSPEND_BUF register */ #define CSG_SUSPEND_BUF_POINTER_SHIFT 0 @@ -1096,6 +1105,7 @@ (((reg_val) & ~CSG_STATUS_EP_CURRENT_TILER_EP_MASK) | \ (((value) << CSG_STATUS_EP_CURRENT_TILER_EP_SHIFT) & CSG_STATUS_EP_CURRENT_TILER_EP_MASK)) + /* CSG_STATUS_EP_REQ register */ #define CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT 0 #define CSG_STATUS_EP_REQ_COMPUTE_EP_MASK (0xFF << CSG_STATUS_EP_REQ_COMPUTE_EP_SHIFT) @@ -1133,6 +1143,7 @@ (((reg_val) & ~CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK) | \ (((value) << CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_SHIFT) & CSG_STATUS_EP_REQ_EXCLUSIVE_FRAGMENT_MASK)) + /* End of CSG_OUTPUT_BLOCK register set definitions */ /* STREAM_CONTROL_BLOCK register set definitions */ @@ -1481,6 +1492,20 @@ #define GLB_PWROFF_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 /* End of GLB_PWROFF_TIMER_TIMER_SOURCE values */ +/* GLB_PWROFF_TIMER_CONFIG register */ +#ifndef GLB_PWROFF_TIMER_CONFIG +#define GLB_PWROFF_TIMER_CONFIG 0x0088 /* () Configuration fields for GLB_PWROFF_TIMER */ +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT 0 +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_GET(reg_val) \ + (((reg_val)&GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) >> \ + GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value) \ + (((reg_val) & ~GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK) | \ + (((value) << GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \ + GLB_PWROFF_TIMER_CONFIG_NO_MODIFIER_MASK)) +#endif /* End of GLB_PWROFF_TIMER_CONFIG values */ + /* GLB_ALLOC_EN register */ #define GLB_ALLOC_EN_MASK_SHIFT 0 #define GLB_ALLOC_EN_MASK_MASK (GPU_ULL(0xFFFFFFFFFFFFFFFF) << GLB_ALLOC_EN_MASK_SHIFT) @@ -1546,6 +1571,20 @@ #define GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER 0x1 /* End of GLB_IDLE_TIMER_TIMER_SOURCE values */ +/* GLB_IDLE_TIMER_CONFIG values */ +#ifndef GLB_IDLE_TIMER_CONFIG +#define GLB_IDLE_TIMER_CONFIG 0x0084 /* () Configuration fields for GLB_IDLE_TIMER */ +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT 0 +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK (0x1 << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_GET(reg_val) \ + (((reg_val)&GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) >> \ + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) +#define GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SET(reg_val, value) \ + (((reg_val) & ~GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK) | \ + (((value) << GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_SHIFT) & \ + GLB_IDLE_TIMER_CONFIG_NO_MODIFIER_MASK)) +#endif /* End of GLB_IDLE_TIMER_CONFIG values */ + /* GLB_INSTR_FEATURES register */ #define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT (0) #define GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_MASK ((u32)0xF << GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_SHIFT) @@ -1670,6 +1709,7 @@ (((reg_val) & ~GLB_DEBUG_ACK_RUN_MODE_MASK) | \ (((value) << GLB_DEBUG_ACK_RUN_MODE_SHIFT) & GLB_DEBUG_ACK_RUN_MODE_MASK)) + /* RUN_MODE values */ #define GLB_DEBUG_RUN_MODE_TYPE_NOP 0x0 #define GLB_DEBUG_RUN_MODE_TYPE_CORE_DUMP 0x1 diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c index d076f3d..b8ad3a4 100644 --- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c +++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -196,7 +196,7 @@ static void kbase_csf_reset_begin_hw_access_sync( */ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_lock_flags); kbase_csf_scheduler_spin_lock(kbdev, &scheduler_spin_lock_flags); - atomic_set(&kbdev->csf.reset.state, KBASE_RESET_GPU_HAPPENING); + atomic_set(&kbdev->csf.reset.state, KBASE_CSF_RESET_GPU_HAPPENING); kbase_csf_scheduler_spin_unlock(kbdev, scheduler_spin_lock_flags); spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_lock_flags); } @@ -257,14 +257,15 @@ void kbase_csf_debug_dump_registers(struct kbase_device *kbdev) kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_STATUS)), kbase_reg_read(kbdev, GPU_CONTROL_REG(MCU_STATUS))); - dev_err(kbdev->dev, " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", + dev_err(kbdev->dev, + " JOB_IRQ_RAWSTAT=0x%08x MMU_IRQ_RAWSTAT=0x%08x GPU_FAULTSTATUS=0x%08x", kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_RAWSTAT)), + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)), kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS))); dev_err(kbdev->dev, " GPU_IRQ_MASK=0x%08x JOB_IRQ_MASK=0x%08x MMU_IRQ_MASK=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK)), kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK)), - kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK))); + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK))); dev_err(kbdev->dev, " PWR_OVERRIDE0=0x%08x PWR_OVERRIDE1=0x%08x", kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE0)), kbase_reg_read(kbdev, GPU_CONTROL_REG(PWR_OVERRIDE1))); @@ -388,10 +389,12 @@ static enum kbasep_soft_reset_status kbase_csf_reset_gpu_once(struct kbase_devic rt_mutex_unlock(&kbdev->pm.lock); if (err) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); if (!kbase_pm_l2_is_in_desired_state(kbdev)) ret = L2_ON_FAILED; else if (!kbase_pm_mcu_is_in_desired_state(kbdev)) ret = MCU_REINIT_FAILED; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); } return ret; diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index f21067f..2573e3f 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -19,6 +19,8 @@ * */ +#include <linux/kthread.h> + #include <mali_kbase.h> #include "mali_kbase_config_defaults.h" #include <mali_kbase_ctx_sched.h> @@ -36,6 +38,11 @@ #include "mali_kbase_csf_tiler_heap.h" #include "mali_kbase_csf_tiler_heap_reclaim.h" #include "mali_kbase_csf_mcu_shared_reg.h" +#include <linux/version_compat_defs.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> +#include <csf/mali_kbase_csf_trace_buffer.h> +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -202,6 +209,222 @@ static bool queue_empty_or_blocked(struct kbase_queue *queue) } #endif +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists, + * otherwise allocate and initialise one. + * + * @kctx: Pointer to the Kbase context. + * + * The GPU metrics context represents an "Application" for the purposes of GPU metrics + * reporting. There may be multiple kbase_contexts contributing data to a single GPU + * metrics context. + * This function takes a reference on GPU metrics context if it already exists + * corresponding to the Application that is creating the Kbase context, otherwise + * memory is allocated for it and initialised. + * + * Return: 0 on success, or negative on failure. + */ +static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; + struct kbase_device *kbdev = kctx->kbdev; + int ret = 0; + + const struct cred *cred = get_current_cred(); + const unsigned int aid = cred->euid.val; + + put_cred(cred); + + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return 0; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kbdev->kctx_list_lock); + rt_mutex_lock(&kbdev->csf.scheduler.lock); + gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid); + rt_mutex_unlock(&kbdev->csf.scheduler.lock); + + if (!gpu_metrics_ctx) { + gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL); + + if (gpu_metrics_ctx) { + rt_mutex_lock(&kbdev->csf.scheduler.lock); + kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid); + rt_mutex_unlock(&kbdev->csf.scheduler.lock); + } else { + dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed"); + ret = -ENOMEM; + } + } + + kctx->gpu_metrics_ctx = gpu_metrics_ctx; + mutex_unlock(&kbdev->kctx_list_lock); + + return ret; +} + +/** + * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it + * if the refcount becomes 0. + * + * @kctx: Pointer to the Kbase context. + */ +static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) +{ + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kctx->kbdev->kctx_list_lock); + rt_mutex_lock(&kctx->kbdev->csf.scheduler.lock); + kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx); + rt_mutex_unlock(&kctx->kbdev->csf.scheduler.lock); + mutex_unlock(&kctx->kbdev->kctx_list_lock); +} + +/** + * struct gpu_metrics_event - A GPU metrics event recorded in trace buffer. + * + * @csg_slot_act: The 32bit data consisting of a GPU metrics event. + * 5 bits[4:0] represents CSG slot number. + * 1 bit [5] represents the transition of the CSG group on the slot. + * '1' means idle->active whilst '0' does active->idle. + * @timestamp: 64bit timestamp consisting of a GPU metrics event. + * + * Note: It's packed and word-aligned as agreed layout with firmware. + */ +struct gpu_metrics_event { + u32 csg_slot_act; + u64 timestamp; +} __packed __aligned(4); +#define GPU_METRICS_EVENT_SIZE sizeof(struct gpu_metrics_event) + +#define GPU_METRICS_ACT_SHIFT 5 +#define GPU_METRICS_ACT_MASK (0x1 << GPU_METRICS_ACT_SHIFT) +#define GPU_METRICS_ACT_GET(val) (((val)&GPU_METRICS_ACT_MASK) >> GPU_METRICS_ACT_SHIFT) + +#define GPU_METRICS_CSG_MASK 0x1f +#define GPU_METRICS_CSG_GET(val) ((val)&GPU_METRICS_CSG_MASK) + +/** + * gpu_metrics_read_event() - Read a GPU metrics trace from trace buffer + * + * @kbdev: Pointer to the device + * @kctx: Kcontext that is derived from CSG slot field of a GPU metrics. + * @prev_act: Previous CSG activity transition in a GPU metrics. + * @cur_act: Current CSG activity transition in a GPU metrics. + * @ts: CSG activity transition timestamp in a GPU metrics. + * + * This function reads firmware trace buffer, named 'gpu_metrics' and + * parse one 12-byte data packet into following information. + * - The number of CSG slot on which CSG was transitioned to active or idle. + * - Activity transition (1: idle->active, 0: active->idle). + * - Timestamp in nanoseconds when the transition occurred. + * + * Return: true on success. + */ +static bool gpu_metrics_read_event(struct kbase_device *kbdev, struct kbase_context **kctx, + bool *prev_act, bool *cur_act, uint64_t *ts) +{ + struct firmware_trace_buffer *tb = kbdev->csf.scheduler.gpu_metrics_tb; + struct gpu_metrics_event e; + + if (kbase_csf_firmware_trace_buffer_read_data(tb, (u8 *)&e, GPU_METRICS_EVENT_SIZE) == + GPU_METRICS_EVENT_SIZE) { + const u8 slot = GPU_METRICS_CSG_GET(e.csg_slot_act); + struct kbase_queue_group *group = + kbdev->csf.scheduler.csg_slots[slot].resident_group; + + if (unlikely(!group)) { + dev_err(kbdev->dev, "failed to find CSG group from CSG slot(%u)", slot); + return false; + } + + *cur_act = GPU_METRICS_ACT_GET(e.csg_slot_act); + *ts = kbase_backend_time_convert_gpu_to_cpu(kbdev, e.timestamp); + *kctx = group->kctx; + + *prev_act = group->prev_act; + group->prev_act = *cur_act; + + return true; + } + + dev_err(kbdev->dev, "failed to read a GPU metrics from trace buffer"); + + return false; +} + +/** + * emit_gpu_metrics_to_frontend() - Emit GPU metrics events to the frontend. + * + * @kbdev: Pointer to the device + * + * This function must be called to emit GPU metrics data to the + * frontend whenever needed. + * Calls to this function will be serialized by scheduler lock. + * + * Kbase reports invalid activity traces when detected. + */ +static void emit_gpu_metrics_to_frontend(struct kbase_device *kbdev) +{ + u64 system_time = 0; + u64 ts_before_drain; + u64 ts = 0; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + +#if IS_ENABLED(CONFIG_MALI_NO_MALI) + return; +#endif + + if (WARN_ON_ONCE(kbdev->csf.scheduler.state == SCHED_SUSPENDED)) + return; + + kbase_backend_get_gpu_time_norequest(kbdev, NULL, &system_time, NULL); + ts_before_drain = kbase_backend_time_convert_gpu_to_cpu(kbdev, system_time); + + while (!kbase_csf_firmware_trace_buffer_is_empty(kbdev->csf.scheduler.gpu_metrics_tb)) { + struct kbase_context *kctx; + bool prev_act; + bool cur_act; + + if (gpu_metrics_read_event(kbdev, &kctx, &prev_act, &cur_act, &ts)) { + if (prev_act == cur_act) { + /* Error handling + * + * In case of active CSG, Kbase will try to recover the + * lost event by ending previously active event and + * starting a new one. + * + * In case of inactive CSG, the event is drop as Kbase + * cannot recover. + */ + dev_err(kbdev->dev, + "Invalid activity state transition. (prev_act = %u, cur_act = %u)", + prev_act, cur_act); + if (cur_act) { + kbase_gpu_metrics_ctx_end_activity(kctx, ts); + kbase_gpu_metrics_ctx_start_activity(kctx, ts); + } + } else { + /* Normal handling */ + if (cur_act) + kbase_gpu_metrics_ctx_start_activity(kctx, ts); + else + kbase_gpu_metrics_ctx_end_activity(kctx, ts); + } + } else + break; + } + + kbase_gpu_metrics_emit_tracepoint(kbdev, ts >= ts_before_drain ? ts + 1 : ts_before_drain); +} +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + /** * wait_for_dump_complete_on_group_deschedule() - Wait for dump on fault and * scheduling tick/tock to complete before the group deschedule. @@ -424,79 +647,20 @@ out: * * @timer: Pointer to the scheduling tick hrtimer * - * This function will enqueue the scheduling tick work item for immediate - * execution, if it has not been queued already. + * This function will wake up kbase_csf_scheduler_kthread() to process a + * pending scheduling tick. It will be restarted manually once a tick has been + * processed if appropriate. * * Return: enum value to indicate that timer should not be restarted. */ static enum hrtimer_restart tick_timer_callback(struct hrtimer *timer) { - struct kbase_device *kbdev = container_of(timer, struct kbase_device, - csf.scheduler.tick_timer); - - kbase_csf_scheduler_tick_advance(kbdev); - return HRTIMER_NORESTART; -} - -/** - * start_tick_timer() - Start the scheduling tick hrtimer. - * - * @kbdev: Pointer to the device - * - * This function will start the scheduling tick hrtimer and is supposed to - * be called only from the tick work item function. The tick hrtimer should - * not be active already. - */ -static void start_tick_timer(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - lockdep_assert_held(&scheduler->lock); - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - if (likely(!scheduler->tick_timer_active)) { - scheduler->tick_timer_active = true; - - hrtimer_start(&scheduler->tick_timer, - HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), - HRTIMER_MODE_REL); - } - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -} - -/** - * cancel_tick_timer() - Cancel the scheduling tick hrtimer - * - * @kbdev: Pointer to the device - */ -static void cancel_tick_timer(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - scheduler->tick_timer_active = false; - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); - hrtimer_cancel(&scheduler->tick_timer); -} - -/** - * enqueue_tick_work() - Enqueue the scheduling tick work item - * - * @kbdev: Pointer to the device - * - * This function will queue the scheduling tick work item for immediate - * execution. This shall only be called when both the tick hrtimer and tick - * work item are not active/pending. - */ -static void enqueue_tick_work(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - - lockdep_assert_held(&scheduler->lock); + struct kbase_device *kbdev = + container_of(timer, struct kbase_device, csf.scheduler.tick_timer); kbase_csf_scheduler_invoke_tick(kbdev); + + return HRTIMER_NORESTART; } static void release_doorbell(struct kbase_device *kbdev, int doorbell_nr) @@ -642,8 +806,14 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev) if (queue && queue->user_io_addr) { u64 const *const output_addr = - (u64 const *)(queue->user_io_addr + PAGE_SIZE); + (u64 const *)(queue->user_io_addr + + PAGE_SIZE / sizeof(u64)); + /* + * This 64-bit read will be atomic on a 64-bit kernel but may not + * be atomic on 32-bit kernels. Support for 32-bit kernels is + * limited to build-only. + */ queue->extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; } } @@ -698,7 +868,7 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) * updated whilst gpu_idle_worker() is executing. */ scheduler->fast_gpu_idle_handling = - (kbdev->csf.gpu_idle_hysteresis_us == 0) || + (kbdev->csf.gpu_idle_hysteresis_ns == 0) || !kbase_csf_scheduler_all_csgs_idle(kbdev); /* The GPU idle worker relies on update_on_slot_queues_offsets() to have @@ -713,8 +883,8 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev) } #endif } else { - /* Advance the scheduling tick to get the non-idle suspended groups loaded soon */ - kbase_csf_scheduler_tick_advance_nolock(kbdev); + /* Invoke the scheduling tick to get the non-idle suspended groups loaded soon */ + kbase_csf_scheduler_invoke_tick(kbdev); } return ack_gpu_idle_event; @@ -806,6 +976,14 @@ static bool queue_group_scheduled_locked(struct kbase_queue_group *group) return queue_group_scheduled(group); } +static void update_idle_protm_group_state_to_runnable(struct kbase_queue_group *group) +{ + lockdep_assert_held(&group->kctx->kbdev->csf.scheduler.lock); + + group->run_state = KBASE_CSF_GROUP_RUNNABLE; + KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_RUNNABLE, group, group->run_state); +} + /** * scheduler_protm_wait_quit() - Wait for GPU to exit protected mode. * @@ -889,24 +1067,6 @@ static void scheduler_force_protm_exit(struct kbase_device *kbdev) } /** - * scheduler_timer_is_enabled_nolock() - Check if the scheduler wakes up - * automatically for periodic tasks. - * - * @kbdev: Pointer to the device - * - * This is a variant of kbase_csf_scheduler_timer_is_enabled() that assumes the - * CSF scheduler lock to already have been held. - * - * Return: true if the scheduler is configured to wake up periodically - */ -static bool scheduler_timer_is_enabled_nolock(struct kbase_device *kbdev) -{ - lockdep_assert_held(&kbdev->csf.scheduler.lock); - - return kbdev->csf.scheduler.timer_enabled; -} - -/** * scheduler_pm_active_handle_suspend() - Acquire the PM reference count for * Scheduler * @@ -1694,9 +1854,9 @@ static void update_hw_active(struct kbase_queue *queue, bool active) { #if IS_ENABLED(CONFIG_MALI_NO_MALI) if (queue && queue->enabled) { - u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); + u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64); - output_addr[CS_ACTIVE / sizeof(u32)] = active; + output_addr[CS_ACTIVE / sizeof(*output_addr)] = active; } #else CSTD_UNUSED(queue); @@ -1706,11 +1866,16 @@ static void update_hw_active(struct kbase_queue *queue, bool active) static void program_cs_extract_init(struct kbase_queue *queue) { - u64 *input_addr = (u64 *)queue->user_io_addr; - u64 *output_addr = (u64 *)(queue->user_io_addr + PAGE_SIZE); + u64 *input_addr = queue->user_io_addr; + u64 *output_addr = queue->user_io_addr + PAGE_SIZE / sizeof(u64); - input_addr[CS_EXTRACT_INIT_LO / sizeof(u64)] = - output_addr[CS_EXTRACT_LO / sizeof(u64)]; + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel but may + * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to + * build-only. + */ + input_addr[CS_EXTRACT_INIT_LO / sizeof(*input_addr)] = + output_addr[CS_EXTRACT_LO / sizeof(*output_addr)]; } static void program_cs_trace_cfg(struct kbase_csf_cmd_stream_info *stream, @@ -1930,7 +2095,7 @@ int kbase_csf_scheduler_queue_start(struct kbase_queue *queue) kbase_reset_gpu_assert_prevented(kbdev); lockdep_assert_held(&queue->kctx->csf.lock); - if (WARN_ON(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) + if (WARN_ON_ONCE(!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND)) return -EINVAL; rt_mutex_lock(&kbdev->csf.scheduler.lock); @@ -2402,7 +2567,7 @@ static void schedule_in_cycle(struct kbase_queue_group *group, bool force) * of work needs to be enforced in situation such as entering into * protected mode). */ - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) || force) { + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) || force) { dev_dbg(kbdev->dev, "Kicking async for group %d\n", group->handle); kbase_csf_scheduler_invoke_tock(kbdev); @@ -2485,13 +2650,12 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, scheduler->total_runnable_grps++; - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && - (scheduler->total_runnable_grps == 1 || - scheduler->state == SCHED_SUSPENDED || + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) && + (scheduler->total_runnable_grps == 1 || scheduler->state == SCHED_SUSPENDED || scheduler->state == SCHED_SLEEPING)) { dev_dbg(kbdev->dev, "Kicking scheduler on first runnable group\n"); /* Fire a scheduling to start the time-slice */ - enqueue_tick_work(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); } else schedule_in_cycle(group, false); @@ -2501,6 +2665,17 @@ void insert_group_to_runnable(struct kbase_csf_scheduler *const scheduler, scheduler_wakeup(kbdev, false); } +static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) +{ + hrtimer_cancel(&scheduler->tick_timer); + atomic_set(&scheduler->pending_tick_work, false); +} + +static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) +{ + atomic_set(&scheduler->pending_tock_work, false); +} + static void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, struct kbase_queue_group *group, @@ -2595,7 +2770,7 @@ void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler, scheduler->total_runnable_grps--; if (!scheduler->total_runnable_grps) { dev_dbg(kctx->kbdev->dev, "Scheduler idle has no runnable groups"); - cancel_tick_timer(kctx->kbdev); + cancel_tick_work(scheduler); WARN_ON(atomic_read(&scheduler->non_idle_offslot_grps)); if (scheduler->state != SCHED_SUSPENDED) enqueue_gpu_idle_work(scheduler, 0); @@ -2741,7 +2916,7 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) u32 glb_version = iface->version; u64 const *input_addr = (u64 const *)queue->user_io_addr; - u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); + u64 const *output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); if (glb_version >= kbase_csf_interface_version(1, 0, 0)) { /* CS_STATUS_SCOREBOARD supported from CSF 1.0 */ @@ -2755,6 +2930,11 @@ static bool confirm_cmd_buf_empty(struct kbase_queue const *queue) CS_STATUS_SCOREBOARDS)); } + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel but may + * not be atomic on 32-bit kernels. Support for 32-bit kernels is limited to + * build-only. + */ cs_empty = (input_addr[CS_INSERT_LO / sizeof(u64)] == output_addr[CS_EXTRACT_LO / sizeof(u64)]); cs_idle = cs_empty && (!sb_status); @@ -2858,7 +3038,7 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) s8 slot; struct kbase_csf_csg_slot *csg_slot; unsigned long flags; - u32 i; + u32 csg_req, csg_ack, i; bool as_fault = false; lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -2898,8 +3078,16 @@ static bool cleanup_csg_slot(struct kbase_queue_group *group) as_fault = true; spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + /* now marking the slot is vacant */ spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); + /* Process pending SYNC_UPDATE, if any */ + csg_req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ); + csg_ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); + kbase_csf_handle_csg_sync_update(kbdev, ginfo, group, csg_req, csg_ack); kbdev->csf.scheduler.csg_slots[slot].resident_group = NULL; clear_bit(slot, kbdev->csf.scheduler.csg_slots_idle_mask); @@ -2962,10 +3150,10 @@ static void update_csg_slot_priority(struct kbase_queue_group *group, u8 prio) return; /* Read the csg_ep_cfg back for updating the priority field */ - ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ); + ep_cfg = kbase_csf_firmware_csg_input_read(ginfo, CSG_EP_REQ_LO); prev_prio = CSG_EP_REQ_PRIORITY_GET(ep_cfg); ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); - kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg); spin_lock_irqsave(&kbdev->csf.scheduler.interrupt_lock, flags); csg_req = kbase_csf_firmware_csg_output(ginfo, CSG_ACK); @@ -2999,12 +3187,11 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, const u64 compute_mask = shader_core_mask & group->compute_mask; const u64 fragment_mask = shader_core_mask & group->fragment_mask; const u64 tiler_mask = tiler_core_mask & group->tiler_mask; - const u8 num_cores = kbdev->gpu_props.num_cores; - const u8 compute_max = min(num_cores, group->compute_max); - const u8 fragment_max = min(num_cores, group->fragment_max); + const u8 compute_max = min(kbdev->gpu_props.num_cores, group->compute_max); + const u8 fragment_max = min(kbdev->gpu_props.num_cores, group->fragment_max); const u8 tiler_max = min(CSG_TILER_MAX, group->tiler_max); struct kbase_csf_cmd_stream_group_info *ginfo; - u32 ep_cfg = 0; + u64 ep_cfg = 0; u32 csg_req; u32 state; int i; @@ -3078,6 +3265,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, fragment_mask & U32_MAX); kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_FRAGMENT_HI, fragment_mask >> 32); + kbase_csf_firmware_csg_input(ginfo, CSG_ALLOW_OTHER, tiler_mask & U32_MAX); @@ -3089,7 +3277,7 @@ static void program_csg_slot(struct kbase_queue_group *group, s8 slot, ep_cfg = CSG_EP_REQ_FRAGMENT_EP_SET(ep_cfg, fragment_max); ep_cfg = CSG_EP_REQ_TILER_EP_SET(ep_cfg, tiler_max); ep_cfg = CSG_EP_REQ_PRIORITY_SET(ep_cfg, prio); - kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ, ep_cfg); + kbase_csf_firmware_csg_input(ginfo, CSG_EP_REQ_LO, ep_cfg & U32_MAX); /* Program the address space number assigned to the context */ kbase_csf_firmware_csg_input(ginfo, CSG_CONFIG, kctx->as_nr); @@ -3719,7 +3907,6 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS); DECLARE_BITMAP(evicted_mask, MAX_SUPPORTED_CSGS) = {0}; bool suspend_wait_failed = false; - long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); lockdep_assert_held(&kbdev->csf.scheduler.lock); @@ -3731,6 +3918,7 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) while (!bitmap_empty(slot_mask, MAX_SUPPORTED_CSGS)) { DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); + long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); bitmap_copy(changed, slot_mask, MAX_SUPPORTED_CSGS); @@ -3752,15 +3940,18 @@ static void program_suspending_csg_slots(struct kbase_device *kbdev) /* The on slot csg is now stopped */ clear_bit(i, slot_mask); - KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); - if (likely(group)) { bool as_fault; /* Only do save/cleanup if the * group is not terminated during * the sleep. */ + + /* Only emit suspend, if there was no AS fault */ + if (kctx_as_enabled(group->kctx) && !group->faulted) + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, i); save_csg_slot(group); as_fault = cleanup_csg_slot(group); /* If AS fault detected, evict it */ @@ -4258,16 +4449,13 @@ static void protm_enter_set_next_pending_seq(struct kbase_device *const kbdev) struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; u32 num_groups = kbdev->csf.global_iface.group_num; u32 num_csis = kbdev->csf.global_iface.groups[0].stream_num; - DECLARE_BITMAP(active_csgs, MAX_SUPPORTED_CSGS) = { 0 }; u32 i; kbase_csf_scheduler_spin_lock_assert_held(kbdev); - bitmap_xor(active_csgs, scheduler->csg_slots_idle_mask, scheduler->csg_inuse_bitmap, - num_groups); /* Reset the tick's pending protm seq number to invalid initially */ scheduler->tick_protm_pending_seq = KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID; - for_each_set_bit(i, active_csgs, num_groups) { + for_each_set_bit(i, scheduler->csg_inuse_bitmap, num_groups) { struct kbase_queue_group *group = scheduler->csg_slots[i].resident_group; /* Set to the next pending protm group's scan_seq_number */ @@ -4508,8 +4696,9 @@ static void scheduler_apply(struct kbase_device *kbdev) program_suspending_csg_slots(kbdev); } -static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, - struct kbase_context *kctx, int priority) +static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, struct kbase_context *kctx, + int priority, struct list_head *privileged_groups, + struct list_head *active_groups) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *group; @@ -4523,8 +4712,9 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, if (!kctx_as_enabled(kctx)) return; - list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], - link) { + list_for_each_entry(group, &kctx->csf.sched.runnable_groups[priority], link) { + bool protm_req; + if (WARN_ON(!list_empty(&group->link_to_schedule))) /* This would be a bug */ list_del_init(&group->link_to_schedule); @@ -4535,33 +4725,30 @@ static void scheduler_ctx_scan_groups(struct kbase_device *kbdev, /* Set the scanout sequence number, starting from 0 */ group->scan_seq_num = scheduler->csg_scan_count_for_tick++; + protm_req = !bitmap_empty(group->protm_pending_bitmap, + kbdev->csf.global_iface.groups[0].stream_num); + if (scheduler->tick_protm_pending_seq == - KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { - if (!bitmap_empty(group->protm_pending_bitmap, - kbdev->csf.global_iface.groups[0].stream_num)) - scheduler->tick_protm_pending_seq = - group->scan_seq_num; + KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID) { + if (protm_req) + scheduler->tick_protm_pending_seq = group->scan_seq_num; } - if (queue_group_idle_locked(group)) { + if (protm_req && on_slot_group_idle_locked(group)) + update_idle_protm_group_state_to_runnable(group); + else if (queue_group_idle_locked(group)) { if (can_schedule_idle_group(group)) list_add_tail(&group->link_to_schedule, &scheduler->idle_groups_to_schedule); continue; } - if (!scheduler->ngrp_to_schedule) { - /* keep the top csg's origin */ - scheduler->top_ctx = kctx; - scheduler->top_grp = group; + if (protm_req && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) { + list_add_tail(&group->link_to_schedule, privileged_groups); + continue; } - list_add_tail(&group->link_to_schedule, - &scheduler->groups_to_schedule); - group->prepared_seq_num = scheduler->ngrp_to_schedule++; - - kctx->csf.sched.ngrp_to_schedule++; - count_active_address_space(kbdev, kctx); + list_add_tail(&group->link_to_schedule, active_groups); } } @@ -4891,18 +5078,16 @@ static void scheduler_handle_idle_slots(struct kbase_device *kbdev) spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } -static void scheduler_scan_idle_groups(struct kbase_device *kbdev) +static void scheduler_scan_group_list(struct kbase_device *kbdev, struct list_head *groups) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; struct kbase_queue_group *group, *n; - list_for_each_entry_safe(group, n, &scheduler->idle_groups_to_schedule, - link_to_schedule) { - WARN_ON(!can_schedule_idle_group(group)); - + list_for_each_entry_safe(group, n, groups, link_to_schedule) { if (!scheduler->ngrp_to_schedule) { /* keep the top csg's origin */ scheduler->top_ctx = group->kctx; + /* keep the top csg''s origin */ scheduler->top_grp = group; } @@ -5049,7 +5234,12 @@ static bool all_on_slot_groups_remained_idle(struct kbase_device *kbdev) if (!queue || !queue->user_io_addr) continue; - output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE); + output_addr = (u64 const *)(queue->user_io_addr + PAGE_SIZE / sizeof(u64)); + /* + * These 64-bit reads and writes will be atomic on a 64-bit kernel + * but may not be atomic on 32-bit kernels. Support for 32-bit + * kernels is limited to build-only. + */ cur_extract_ofs = output_addr[CS_EXTRACT_LO / sizeof(u64)]; if (cur_extract_ofs != queue->extract_ofs) { /* More work has been executed since the idle @@ -5141,10 +5331,13 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Scheduler to be put to sleep on GPU becoming idle"); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); scheduler_pm_idle_before_sleep(kbdev); scheduler->state = SCHED_SLEEPING; KBASE_KTRACE_ADD(kbdev, SCHED_SLEEPING, NULL, scheduler->state); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ } #endif @@ -5162,6 +5355,7 @@ static void scheduler_sleep_on_idle(struct kbase_device *kbdev) */ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) { + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int ret = suspend_active_groups_on_powerdown(kbdev, false); if (ret) { @@ -5169,7 +5363,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) atomic_read( &kbdev->csf.scheduler.non_idle_offslot_grps)); /* Bring forward the next tick */ - kbase_csf_scheduler_tick_advance(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); return false; } @@ -5180,7 +5374,7 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev) dev_dbg(kbdev->dev, "Scheduler to be suspended on GPU becoming idle"); scheduler_suspend(kbdev); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); return true; } @@ -5514,6 +5708,7 @@ static void sc_rails_off_worker(struct work_struct *work) static int scheduler_prepare(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + struct list_head privileged_groups, active_groups; unsigned long flags; int i; @@ -5539,6 +5734,8 @@ static int scheduler_prepare(struct kbase_device *kbdev) scheduler->num_active_address_spaces = 0; scheduler->num_csg_slots_for_tick = 0; bitmap_zero(scheduler->csg_slots_prio_update, MAX_SUPPORTED_CSGS); + INIT_LIST_HEAD(&privileged_groups); + INIT_LIST_HEAD(&active_groups); spin_lock_irqsave(&scheduler->interrupt_lock, flags); scheduler->tick_protm_pending_seq = @@ -5548,10 +5745,17 @@ static int scheduler_prepare(struct kbase_device *kbdev) struct kbase_context *kctx; list_for_each_entry(kctx, &scheduler->runnable_kctxs, csf.link) - scheduler_ctx_scan_groups(kbdev, kctx, i); + scheduler_ctx_scan_groups(kbdev, kctx, i, &privileged_groups, + &active_groups); } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + /* Adds privileged (RT + p.mode) groups to the scanout list */ + scheduler_scan_group_list(kbdev, &privileged_groups); + + /* Adds remainder of active groups to the scanout list */ + scheduler_scan_group_list(kbdev, &active_groups); + /* Update this tick's non-idle groups */ scheduler->non_idle_scanout_grps = scheduler->ngrp_to_schedule; @@ -5566,7 +5770,7 @@ static int scheduler_prepare(struct kbase_device *kbdev) scheduler->non_idle_scanout_grps); /* Adds those idle but runnable groups to the scanout list */ - scheduler_scan_idle_groups(kbdev); + scheduler_scan_group_list(kbdev, &scheduler->idle_groups_to_schedule); WARN_ON(scheduler->csg_scan_count_for_tick < scheduler->ngrp_to_schedule); @@ -5668,11 +5872,9 @@ static int prepare_fast_local_tock(struct kbase_device *kbdev) return bitmap_weight(csg_bitmap, num_groups); } -static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask, - unsigned int timeout_ms) +static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slot_mask) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - long remaining = kbase_csf_timeout_in_jiffies(timeout_ms); u32 num_groups = kbdev->csf.global_iface.group_num; int err = 0; DECLARE_BITMAP(slot_mask_local, MAX_SUPPORTED_CSGS); @@ -5681,11 +5883,11 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo bitmap_copy(slot_mask_local, slot_mask, MAX_SUPPORTED_CSGS); - while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS) && remaining) { + while (!bitmap_empty(slot_mask_local, MAX_SUPPORTED_CSGS)) { + long remaining = kbase_csf_timeout_in_jiffies(kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT)); DECLARE_BITMAP(changed, MAX_SUPPORTED_CSGS); bitmap_copy(changed, slot_mask_local, MAX_SUPPORTED_CSGS); - remaining = wait_event_timeout( kbdev->csf.event_wait, slots_state_changed(kbdev, changed, csg_slot_stopped_locked), remaining); @@ -5702,18 +5904,23 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo /* The on slot csg is now stopped */ clear_bit(i, slot_mask_local); - KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( - kbdev, kbdev->gpu_props.props.raw_props.gpu_id, i); - group = scheduler->csg_slots[i].resident_group; if (likely(group)) { /* Only do save/cleanup if the * group is not terminated during * the sleep. */ + + /* Only emit suspend, if there was no AS fault */ + if (kctx_as_enabled(group->kctx) && !group->faulted) + KBASE_TLSTREAM_TL_KBASE_DEVICE_SUSPEND_CSG( + kbdev, + kbdev->gpu_props.props.raw_props.gpu_id, i); + save_csg_slot(group); - if (cleanup_csg_slot(group)) + if (cleanup_csg_slot(group)) { sched_evict_group(group, true, true); + } } } } else { @@ -5724,8 +5931,8 @@ static int wait_csg_slots_suspend(struct kbase_device *kbdev, unsigned long *slo slot_mask_local[0]); /* Return the bitmask of the timed out slots to the caller */ bitmap_copy(slot_mask, slot_mask_local, MAX_SUPPORTED_CSGS); - err = -ETIMEDOUT; + break; } } @@ -5787,7 +5994,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) * idle. */ if ((group->run_state == KBASE_CSF_GROUP_IDLE) && - (group->priority != BASE_QUEUE_GROUP_PRIORITY_REALTIME) && + (group->priority != KBASE_QUEUE_GROUP_PRIORITY_REALTIME) && ((lru_idle_group == NULL) || (lru_idle_group->prepared_seq_num < group->prepared_seq_num))) { if (WARN_ON(group->kctx->as_nr < 0)) @@ -5809,7 +6016,7 @@ static void evict_lru_or_blocked_csg(struct kbase_device *kbdev) lru_idle_group->handle, lru_idle_group->kctx->tgid, lru_idle_group->kctx->id, lru_idle_group->csg_nr); suspend_queue_group(lru_idle_group); - if (wait_csg_slots_suspend(kbdev, &slot_mask, kbdev->csf.fw_timeout_ms)) { + if (wait_csg_slots_suspend(kbdev, &slot_mask)) { enum dumpfault_error_type error_type = DF_CSG_SUSPEND_TIMEOUT; dev_warn( @@ -6033,10 +6240,8 @@ static bool can_skip_scheduling(struct kbase_device *kbdev) return false; } -static void schedule_on_tock(struct kthread_work *work) +static void schedule_on_tock(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.tock_work.work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err; @@ -6071,12 +6276,12 @@ static void schedule_on_tock(struct kthread_work *work) KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); if (!scheduler->total_runnable_grps) enqueue_gpu_idle_work(scheduler, 0); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ rt_mutex_unlock(&scheduler->lock); kbase_reset_gpu_allow(kbdev); - dev_dbg(kbdev->dev, - "Waking up for event after schedule-on-tock completes."); - wake_up_all(&kbdev->csf.event_wait); KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_END, NULL, 0u); return; @@ -6085,10 +6290,8 @@ exit_no_schedule_unlock: kbase_reset_gpu_allow(kbdev); } -static void schedule_on_tick(struct kthread_work *work) +static void schedule_on_tick(struct kbase_device *kbdev) { - struct kbase_device *kbdev = - container_of(work, struct kbase_device, csf.scheduler.tick_work); struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; int err = kbase_reset_gpu_try_prevent(kbdev); @@ -6115,23 +6318,25 @@ static void schedule_on_tick(struct kthread_work *work) scheduler->last_schedule = jiffies; /* Kicking next scheduling if needed */ - if (likely(scheduler_timer_is_enabled_nolock(kbdev)) && - (scheduler->total_runnable_grps > 0)) { - start_tick_timer(kbdev); - dev_dbg(kbdev->dev, - "scheduling for next tick, num_runnable_groups:%u\n", + if (likely(kbase_csf_scheduler_timer_is_enabled(kbdev)) && + (scheduler->total_runnable_grps > 0)) { + hrtimer_start(&scheduler->tick_timer, + HR_TIMER_DELAY_MSEC(scheduler->csg_scheduling_period_ms), + HRTIMER_MODE_REL); + dev_dbg(kbdev->dev, "scheduling for next tick, num_runnable_groups:%u\n", scheduler->total_runnable_grps); } else if (!scheduler->total_runnable_grps) { enqueue_gpu_idle_work(scheduler, 0); } scheduler->state = SCHED_INACTIVE; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + emit_gpu_metrics_to_frontend(kbdev); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ rt_mutex_unlock(&scheduler->lock); KBASE_KTRACE_ADD(kbdev, SCHED_INACTIVE, NULL, scheduler->state); kbase_reset_gpu_allow(kbdev); - dev_dbg(kbdev->dev, "Waking up for event after schedule-on-tick completes."); - wake_up_all(&kbdev->csf.event_wait); KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_END, NULL, scheduler->total_runnable_grps); return; @@ -6161,7 +6366,7 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, } } - ret = wait_csg_slots_suspend(kbdev, slot_mask, kbdev->reset_timeout_ms); + ret = wait_csg_slots_suspend(kbdev, slot_mask); return ret; } @@ -6180,7 +6385,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) dev_warn(kbdev->dev, "Timeout waiting for CSG slots to suspend before reset, slot_mask: 0x%*pb\n", kbdev->csf.global_iface.group_num, slot_mask); //TODO: should introduce SSCD report if this happens. - kbase_gpu_timeout_debug_message(kbdev); + kbase_gpu_timeout_debug_message(kbdev, ""); dev_warn(kbdev->dev, "[%llu] Firmware ping %d", kbase_backend_get_cycle_cnt(kbdev), kbase_csf_firmware_ping_wait(kbdev, 0)); @@ -6201,11 +6406,10 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev) * overflow. */ kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC); - ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, - kbdev->reset_timeout_ms); + ret2 = kbase_gpu_wait_cache_clean_timeout(kbdev, kbdev->mmu_or_gpu_cache_op_wait_time_ms); if (ret2) { - dev_warn(kbdev->dev, "[%llu] Timeout waiting for cache clean to complete before reset", - kbase_backend_get_cycle_cnt(kbdev)); + dev_err(kbdev->dev, "[%llu] Timeout waiting for CACHE_CLN_INV_L2_LSC", + kbase_backend_get_cycle_cnt(kbdev)); if (!ret) ret = ret2; } @@ -6323,17 +6527,6 @@ unlock: return suspend_on_slot_groups; } -static void cancel_tick_work(struct kbase_csf_scheduler *const scheduler) -{ - kthread_cancel_work_sync(&scheduler->tick_work); -} - -static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler) -{ - atomic_set(&scheduler->pending_tock_work, false); - kthread_cancel_delayed_work_sync(&scheduler->tock_work); -} - static void scheduler_inner_reset(struct kbase_device *kbdev) { u32 const num_groups = kbdev->csf.global_iface.group_num; @@ -6348,7 +6541,6 @@ static void scheduler_inner_reset(struct kbase_device *kbdev) #else cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work); #endif - cancel_tick_timer(kbdev); cancel_tick_work(scheduler); cancel_tock_work(scheduler); cancel_delayed_work_sync(&scheduler->ping_work); @@ -6547,8 +6739,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, if (!WARN_ON(scheduler->state == SCHED_SUSPENDED)) suspend_queue_group(group); - err = wait_csg_slots_suspend(kbdev, slot_mask, - kbdev->csf.fw_timeout_ms); + err = wait_csg_slots_suspend(kbdev, slot_mask); if (err) { const struct gpu_uevent evt = { .type = GPU_UEVENT_TYPE_KMD_ERROR, @@ -6593,7 +6784,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, target_page_nr < sus_buf->nr_pages; i++) { struct page *pg = as_page(group->normal_suspend_buf.phy[i]); - void *sus_page = kmap(pg); + void *sus_page = kbase_kmap(pg); if (sus_page) { kbase_sync_single_for_cpu(kbdev, @@ -6604,7 +6795,7 @@ int kbase_csf_scheduler_group_copy_suspend_buf(struct kbase_queue_group *group, sus_buf->pages, sus_page, &to_copy, sus_buf->nr_pages, &target_page_nr, offset); - kunmap(pg); + kbase_kunmap(pg, sus_page); if (err) break; } else { @@ -6720,12 +6911,21 @@ static struct kbase_queue_group *scheduler_get_protm_enter_async_group( spin_lock_irqsave(&scheduler->interrupt_lock, flags); - if (kbase_csf_scheduler_protected_mode_in_use(kbdev) || - bitmap_empty(pending, ginfo->stream_num)) + if (bitmap_empty(pending, ginfo->stream_num)) { + dev_dbg(kbdev->dev, + "Pmode requested for group %d of ctx %d_%d with no pending queues", + input_grp->handle, input_grp->kctx->tgid, input_grp->kctx->id); + input_grp = NULL; + } else if (kbase_csf_scheduler_protected_mode_in_use(kbdev)) { + kbase_csf_scheduler_invoke_tock(kbdev); input_grp = NULL; + } spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); } else { + if (group && (group->priority == KBASE_QUEUE_GROUP_PRIORITY_REALTIME)) + kbase_csf_scheduler_invoke_tock(kbdev); + input_grp = NULL; } @@ -6753,11 +6953,8 @@ void kbase_csf_scheduler_group_protm_enter(struct kbase_queue_group *group) rt_mutex_lock(&scheduler->lock); - if (group->run_state == KBASE_CSF_GROUP_IDLE) { - group->run_state = KBASE_CSF_GROUP_RUNNABLE; - KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSF_GROUP_RUNNABLE, group, - group->run_state); - } + if (on_slot_group_idle_locked(group)) + update_idle_protm_group_state_to_runnable(group); /* Check if the group is now eligible for execution in protected mode. */ if (scheduler_get_protm_enter_async_group(kbdev, group)) scheduler_group_check_protm_enter(kbdev, group); @@ -7084,6 +7281,13 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) { int priority; int err; + struct kbase_device *kbdev = kctx->kbdev; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + err = gpu_metrics_ctx_init(kctx); + if (err) + return err; +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ kbase_ctx_sched_init_ctx(kctx); @@ -7115,8 +7319,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx) err = kbase_csf_event_wait_add(kctx, check_group_sync_update_cb, kctx); if (err) { - dev_err(kctx->kbdev->dev, - "Failed to register a sync update callback"); + dev_err(kbdev->dev, "Failed to register a sync update callback"); goto event_wait_add_failed; } @@ -7126,6 +7329,9 @@ event_wait_add_failed: kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker); alloc_wq_failed: kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ return err; } @@ -7136,6 +7342,74 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx) kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker); kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ +} + +static int kbase_csf_scheduler_kthread(void *data) +{ + struct kbase_device *const kbdev = data; + struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; + + while (scheduler->kthread_running) { + struct kbase_queue *queue; + + if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0) + continue; + reinit_completion(&scheduler->kthread_signal); + + /* Iterate through queues with pending kicks */ + do { + u8 prio; + + spin_lock(&kbdev->csf.pending_gpuq_kicks_lock); + queue = NULL; + for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) { + if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) { + queue = list_first_entry( + &kbdev->csf.pending_gpuq_kicks[prio], + struct kbase_queue, pending_kick_link); + list_del_init(&queue->pending_kick_link); + break; + } + } + spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock); + + if (queue != NULL) { + WARN_ONCE( + prio != queue->group_priority, + "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu", + (void *)queue, queue->group_priority, prio); + + kbase_csf_process_queue_kick(queue); + + /* Perform a scheduling tock for high-priority queue groups if + * required. + */ + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0); + BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1); + if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) && + atomic_read(&scheduler->pending_tock_work)) + schedule_on_tock(kbdev); + } + } while (queue != NULL); + + /* Check if we need to perform a scheduling tick/tock. A tick + * event shall override a tock event but not vice-versa. + */ + if (atomic_cmpxchg(&scheduler->pending_tick_work, true, false) == true) { + atomic_set(&scheduler->pending_tock_work, false); + schedule_on_tick(kbdev); + } else if (atomic_read(&scheduler->pending_tock_work)) { + schedule_on_tock(kbdev); + } + + dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration."); + wake_up_all(&kbdev->csf.event_wait); + } + + return 0; } int kbase_csf_scheduler_init(struct kbase_device *kbdev) @@ -7154,33 +7428,51 @@ int kbase_csf_scheduler_init(struct kbase_device *kbdev) return -ENOMEM; } + init_completion(&scheduler->kthread_signal); + scheduler->kthread_running = true; + scheduler->gpuq_kthread = + kthread_run(&kbase_csf_scheduler_kthread, kbdev, "mali-gpuq-kthread"); + if (!scheduler->gpuq_kthread) { + kfree(scheduler->csg_slots); + scheduler->csg_slots = NULL; + + dev_err(kbdev->dev, "Failed to spawn the GPU queue submission worker thread"); + return -ENOMEM; + } +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) && !IS_ENABLED(CONFIG_MALI_NO_MALI) + scheduler->gpu_metrics_tb = + kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_GPU_METRICS_BUF_NAME); + if (!scheduler->gpu_metrics_tb) { + scheduler->kthread_running = false; + complete(&scheduler->kthread_signal); + kthread_stop(scheduler->gpuq_kthread); + scheduler->gpuq_kthread = NULL; + + kfree(scheduler->csg_slots); + scheduler->csg_slots = NULL; + + dev_err(kbdev->dev, "Failed to get the handler of gpu_metrics from trace buffer"); + return -ENOENT; + } +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ + return kbase_csf_mcu_shared_regs_data_init(kbdev); } int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) { - int err; struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - scheduler->timer_enabled = true; + atomic_set(&scheduler->timer_enabled, true); - err = kbase_create_realtime_thread(kbdev, kthread_worker_fn, &scheduler->csf_worker, - "csf_scheduler"); - if (err) { - dev_err(kbdev->dev, "Failed to allocate scheduler kworker\n"); - return -ENOMEM; - } scheduler->idle_wq = alloc_ordered_workqueue( "csf_scheduler_gpu_idle_wq", WQ_HIGHPRI); if (!scheduler->idle_wq) { - dev_err(kbdev->dev, - "Failed to allocate GPU idle scheduler workqueue\n"); - kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker); + dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n"); return -ENOMEM; } - kthread_init_work(&scheduler->tick_work, schedule_on_tick); - kthread_init_delayed_work(&scheduler->tock_work, schedule_on_tock); + atomic_set(&scheduler->pending_tick_work, false); atomic_set(&scheduler->pending_tock_work, false); INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor); @@ -7223,7 +7515,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); scheduler->tick_timer.function = tick_timer_callback; - scheduler->tick_timer_active = false; kbase_csf_tiler_heap_reclaim_mgr_init(kbdev); @@ -7232,6 +7523,14 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev) void kbase_csf_scheduler_term(struct kbase_device *kbdev) { + struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; + + if (scheduler->gpuq_kthread) { + scheduler->kthread_running = false; + complete(&scheduler->kthread_signal); + kthread_stop(scheduler->gpuq_kthread); + } + if (kbdev->csf.scheduler.csg_slots) { WARN_ON(atomic_read(&kbdev->csf.scheduler.non_idle_offslot_grps)); /* The unload of Driver can take place only when all contexts have @@ -7261,9 +7560,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev) rt_mutex_unlock(&kbdev->csf.scheduler.lock); cancel_delayed_work_sync(&kbdev->csf.scheduler.ping_work); - cancel_tick_timer(kbdev); - cancel_tick_work(&kbdev->csf.scheduler); - cancel_tock_work(&kbdev->csf.scheduler); kfree(kbdev->csf.scheduler.csg_slots); kbdev->csf.scheduler.csg_slots = NULL; } @@ -7277,8 +7573,6 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev) { if (kbdev->csf.scheduler.idle_wq) destroy_workqueue(kbdev->csf.scheduler.idle_wq); - if (kbdev->csf.scheduler.csf_worker.task) - kbase_destroy_kworker_stack(&kbdev->csf.scheduler.csf_worker); kbase_csf_tiler_heap_reclaim_mgr_term(kbdev); } @@ -7299,7 +7593,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) lockdep_assert_held(&kbdev->csf.scheduler.lock); - if (unlikely(!scheduler_timer_is_enabled_nolock(kbdev))) + if (unlikely(!kbase_csf_scheduler_timer_is_enabled(kbdev))) return; WARN_ON((scheduler->state != SCHED_INACTIVE) && @@ -7307,7 +7601,7 @@ static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev) (scheduler->state != SCHED_SLEEPING)); if (scheduler->total_runnable_grps > 0) { - enqueue_tick_work(kbdev); + kbase_csf_scheduler_invoke_tick(kbdev); dev_dbg(kbdev->dev, "Re-enabling the scheduler timer\n"); } else if (scheduler->state != SCHED_SUSPENDED) { enqueue_gpu_idle_work(scheduler, 0); @@ -7321,43 +7615,24 @@ void kbase_csf_scheduler_enable_tick_timer(struct kbase_device *kbdev) rt_mutex_unlock(&kbdev->csf.scheduler.lock); } -bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - bool enabled; - - rt_mutex_lock(&scheduler->lock); - enabled = scheduler_timer_is_enabled_nolock(kbdev); - rt_mutex_unlock(&scheduler->lock); - - return enabled; -} - void kbase_csf_scheduler_timer_set_enabled(struct kbase_device *kbdev, bool enable) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; bool currently_enabled; + /* This lock is taken to prevent this code being executed concurrently + * by userspace. + */ rt_mutex_lock(&scheduler->lock); - currently_enabled = scheduler_timer_is_enabled_nolock(kbdev); + currently_enabled = kbase_csf_scheduler_timer_is_enabled(kbdev); if (currently_enabled && !enable) { - scheduler->timer_enabled = false; - cancel_tick_timer(kbdev); - rt_mutex_unlock(&scheduler->lock); - /* The non-sync version to cancel the normal work item is not - * available, so need to drop the lock before cancellation. - */ + atomic_set(&scheduler->timer_enabled, false); cancel_tick_work(scheduler); - cancel_tock_work(scheduler); - return; - } - - if (!currently_enabled && enable) { - scheduler->timer_enabled = true; - - scheduler_enable_tick_timer_nolock(kbdev); + } else if (!currently_enabled && enable) { + atomic_set(&scheduler->timer_enabled, true); + kbase_csf_scheduler_invoke_tick(kbdev); } rt_mutex_unlock(&scheduler->lock); @@ -7367,17 +7642,17 @@ void kbase_csf_scheduler_kick(struct kbase_device *kbdev) { struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler; - rt_mutex_lock(&scheduler->lock); + if (unlikely(kbase_csf_scheduler_timer_is_enabled(kbdev))) + return; - if (unlikely(scheduler_timer_is_enabled_nolock(kbdev))) - goto out; + /* This lock is taken to prevent this code being executed concurrently + * by userspace. + */ + rt_mutex_lock(&scheduler->lock); - if (scheduler->total_runnable_grps > 0) { - enqueue_tick_work(kbdev); - dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); - } + kbase_csf_scheduler_invoke_tick(kbdev); + dev_dbg(kbdev->dev, "Kicking the scheduler manually\n"); -out: rt_mutex_unlock(&scheduler->lock); } @@ -7414,7 +7689,7 @@ int kbase_csf_scheduler_pm_suspend_no_lock(struct kbase_device *kbdev) } else { dev_dbg(kbdev->dev, "Scheduler PM suspend"); scheduler_suspend(kbdev); - cancel_tick_timer(kbdev); + cancel_tick_work(scheduler); } } @@ -7492,7 +7767,7 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev) } KBASE_EXPORT_TEST_API(kbase_csf_scheduler_pm_idle); -int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) +static int scheduler_wait_mcu_active(struct kbase_device *kbdev, bool killable_wait) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; unsigned long flags; @@ -7505,9 +7780,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); kbase_pm_unlock(kbdev); - kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (killable_wait) + err = kbase_pm_killable_wait_for_poweroff_work_complete(kbdev); + else + err = kbase_pm_wait_for_poweroff_work_complete(kbdev); + if (err) + return err; - err = kbase_pm_wait_for_desired_state(kbdev); + if (killable_wait) + err = kbase_pm_killable_wait_for_desired_state(kbdev); + else + err = kbase_pm_wait_for_desired_state(kbdev); if (!err) { spin_lock_irqsave(&kbdev->hwaccess_lock, flags); WARN_ON(kbdev->pm.backend.mcu_state != KBASE_MCU_ON); @@ -7516,6 +7799,17 @@ int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) return err; } + +int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev) +{ + return scheduler_wait_mcu_active(kbdev, true); +} + +int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev) +{ + return scheduler_wait_mcu_active(kbdev, false); +} + KBASE_EXPORT_TEST_API(kbase_csf_scheduler_wait_mcu_active); #ifdef KBASE_PM_RUNTIME @@ -7594,8 +7888,7 @@ void kbase_csf_scheduler_force_sleep(struct kbase_device *kbdev) struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; rt_mutex_lock(&scheduler->lock); - if (kbase_pm_gpu_sleep_allowed(kbdev) && - (scheduler->state == SCHED_INACTIVE)) + if (kbase_pm_gpu_sleep_allowed(kbdev) && (scheduler->state == SCHED_INACTIVE)) scheduler_sleep_on_idle(kbdev); rt_mutex_unlock(&scheduler->lock); } diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h index 4062d78..88521f0 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.h +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -338,7 +338,10 @@ kbase_csf_scheduler_spin_lock_assert_held(struct kbase_device *kbdev) * * Return: true if the scheduler is configured to wake up periodically */ -bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev); +static inline bool kbase_csf_scheduler_timer_is_enabled(struct kbase_device *kbdev) +{ + return atomic_read(&kbdev->csf.scheduler.timer_enabled); +} /** * kbase_csf_scheduler_timer_set_enabled() - Enable/disable periodic @@ -412,6 +415,22 @@ void kbase_csf_scheduler_pm_idle(struct kbase_device *kbdev); int kbase_csf_scheduler_wait_mcu_active(struct kbase_device *kbdev); /** + * kbase_csf_scheduler_killable_wait_mcu_active - Wait for the MCU to actually become + * active in killable state. + * + * @kbdev: Instance of a GPU platform device that implements a CSF interface. + * + * This function is same as kbase_csf_scheduler_wait_mcu_active(), expect that + * it would allow the SIGKILL signal to interrupt the wait. + * This function is supposed to be called from the code that is executed in ioctl or + * Userspace context, wherever it is safe to do so. + * + * Return: 0 if the MCU was successfully activated, or -ETIMEDOUT code on timeout error or + * -ERESTARTSYS if the wait was interrupted. + */ +int kbase_csf_scheduler_killable_wait_mcu_active(struct kbase_device *kbdev); + +/** * kbase_csf_scheduler_pm_resume_no_lock - Reactivate the scheduler on system resume * * @kbdev: Instance of a GPU platform device that implements a CSF interface. @@ -474,69 +493,24 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev) } /** - * kbase_csf_scheduler_tick_advance_nolock() - Advance the scheduling tick - * - * @kbdev: Pointer to the device - * - * This function advances the scheduling tick by enqueing the tick work item for - * immediate execution, but only if the tick hrtimer is active. If the timer - * is inactive then the tick work item is already in flight. - * The caller must hold the interrupt lock. - */ -static inline void -kbase_csf_scheduler_tick_advance_nolock(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - - lockdep_assert_held(&scheduler->interrupt_lock); - - if (scheduler->tick_timer_active) { - KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_ADVANCE, NULL, 0u); - scheduler->tick_timer_active = false; - kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work); - } else { - KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_NOADVANCE, NULL, 0u); - } -} - -/** - * kbase_csf_scheduler_tick_advance() - Advance the scheduling tick - * - * @kbdev: Pointer to the device - * - * This function advances the scheduling tick by enqueing the tick work item for - * immediate execution, but only if the tick hrtimer is active. If the timer - * is inactive then the tick work item is already in flight. - */ -static inline void kbase_csf_scheduler_tick_advance(struct kbase_device *kbdev) -{ - struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; - - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - kbase_csf_scheduler_tick_advance_nolock(kbdev); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); -} - -/** * kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick * * @kbdev: Pointer to the device * - * This function will queue the scheduling tick work item for immediate - * execution if tick timer is not active. This can be called from interrupt - * context to resume the scheduling after GPU was put to sleep. + * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling + * tick regardless of whether the tick timer is enabled. This can be called + * from interrupt context to resume the scheduling after GPU was put to sleep. + * + * Caller is expected to check kbase_csf_scheduler.timer_enabled as required + * to see whether it is appropriate before calling this function. */ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) { struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler; - unsigned long flags; KBASE_KTRACE_ADD(kbdev, SCHEDULER_TICK_INVOKE, NULL, 0u); - spin_lock_irqsave(&scheduler->interrupt_lock, flags); - if (!scheduler->tick_timer_active) - kthread_queue_work(&scheduler->csf_worker, &scheduler->tick_work); - spin_unlock_irqrestore(&scheduler->interrupt_lock, flags); + if (atomic_cmpxchg(&scheduler->pending_tick_work, false, true) == false) + complete(&scheduler->kthread_signal); } /** @@ -544,8 +518,11 @@ static inline void kbase_csf_scheduler_invoke_tick(struct kbase_device *kbdev) * * @kbdev: Pointer to the device * - * This function will queue the scheduling tock work item for immediate - * execution. + * This function wakes up kbase_csf_scheduler_kthread() to perform a scheduling + * tock. + * + * Caller is expected to check kbase_csf_scheduler.timer_enabled as required + * to see whether it is appropriate before calling this function. */ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) { @@ -553,7 +530,7 @@ static inline void kbase_csf_scheduler_invoke_tock(struct kbase_device *kbdev) KBASE_KTRACE_ADD(kbdev, SCHEDULER_TOCK_INVOKE, NULL, 0u); if (atomic_cmpxchg(&scheduler->pending_tock_work, false, true) == false) - kthread_mod_delayed_work(&scheduler->csf_worker, &scheduler->tock_work, 0); + complete(&scheduler->kthread_signal); } /** diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c index a5e0ab5..72c0b6f 100644 --- a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,49 +23,46 @@ #include "mali_kbase_csf_csg_debugfs.h" #include <mali_kbase.h> #include <linux/seq_file.h> +#include <linux/version_compat_defs.h> #if IS_ENABLED(CONFIG_SYNC_FILE) #include "mali_kbase_sync.h" #endif -#if IS_ENABLED(CONFIG_DEBUG_FS) - #define CQS_UNREADABLE_LIVE_VALUE "(unavailable)" -/* GPU queue related values */ -#define GPU_CSF_MOVE_OPCODE ((u64)0x1) -#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) -#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) -#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) -#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) -#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) -#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) -#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) -#define GPU_CSF_CALL_OPCODE ((u64)0x20) +#define CSF_SYNC_DUMP_SIZE 256 -#define MAX_NR_GPU_CALLS (5) -#define INSTR_OPCODE_MASK ((u64)0xFF << 56) -#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) -#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) -#define MOVE_DEST_MASK ((u64)0xFF << 48) -#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) -#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) -#define SYNC_SRC0_MASK ((u64)0xFF << 40) -#define SYNC_SRC1_MASK ((u64)0xFF << 32) -#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) -#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) -#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) -#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) - -/* Enumeration for types of GPU queue sync events for - * the purpose of dumping them through debugfs. +/** + * kbasep_print() - Helper function to print to either debugfs file or dmesg. + * + * @kctx: The kbase context + * @file: The seq_file for printing to. This is NULL if printing to dmesg. + * @fmt: The message to print. + * @...: Arguments to format the message. */ -enum debugfs_gpu_sync_type { - DEBUGFS_GPU_SYNC_WAIT, - DEBUGFS_GPU_SYNC_SET, - DEBUGFS_GPU_SYNC_ADD, - NUM_DEBUGFS_GPU_SYNC_TYPES -}; +__attribute__((format(__printf__, 3, 4))) static void +kbasep_print(struct kbase_context *kctx, struct seq_file *file, const char *fmt, ...) +{ + int len = 0; + char buffer[CSF_SYNC_DUMP_SIZE]; + va_list arglist; + + va_start(arglist, fmt); + len = vsnprintf(buffer, CSF_SYNC_DUMP_SIZE, fmt, arglist); + if (len <= 0) { + pr_err("message write to the buffer failed"); + goto exit; + } + + if (file) + seq_printf(file, buffer); + else + dev_warn(kctx->kbdev->dev, buffer); + +exit: + va_end(arglist); +} /** * kbasep_csf_debugfs_get_cqs_live_u32() - Obtain live (u32) value for a CQS object. @@ -120,11 +117,12 @@ static int kbasep_csf_debugfs_get_cqs_live_u64(struct kbase_context *kctx, u64 o * or Fence Signal command, contained in a * KCPU queue. * - * @file: The seq_file for printing to. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. * @cmd: The KCPU Command to be printed. * @cmd_name: The name of the command: indicates either a fence SIGNAL or WAIT. */ -static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *file, +static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(char *buffer, int *length, struct kbase_kcpu_command *cmd, const char *cmd_name) { @@ -133,38 +131,46 @@ static void kbasep_csf_sync_print_kcpu_fence_wait_or_signal(struct seq_file *fil #else struct dma_fence *fence = NULL; #endif /* LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) */ - + struct kbase_kcpu_command_fence_info *fence_info; struct kbase_sync_fence_info info; const char *timeline_name = NULL; bool is_signaled = false; - fence = cmd->info.fence.fence; + fence_info = &cmd->info.fence; + if (kbase_kcpu_command_fence_has_force_signaled(fence_info)) + return; + + fence = kbase_fence_get(fence_info); if (WARN_ON(!fence)) return; - kbase_sync_fence_info_get(cmd->info.fence.fence, &info); + kbase_sync_fence_info_get(fence, &info); timeline_name = fence->ops->get_timeline_name(fence); is_signaled = info.status > 0; - seq_printf(file, "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, cmd->info.fence.fence, - is_signaled); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:%s obj:0x%pK live_value:0x%.8x | ", cmd_name, fence, is_signaled); /* Note: fence->seqno was u32 until 5.1 kernel, then u64 */ - seq_printf(file, "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", - timeline_name, fence->context, (u64)fence->seqno); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "timeline_name:%s timeline_context:0x%.16llx fence_seqno:0x%.16llx", + timeline_name, fence->context, (u64)fence->seqno); + + kbase_fence_put(fence); } /** * kbasep_csf_sync_print_kcpu_cqs_wait() - Print details of a CSF SYNC CQS Wait command, * contained in a KCPU queue. * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. */ -static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, - struct kbase_kcpu_command *cmd) +static void kbasep_csf_sync_print_kcpu_cqs_wait(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) { - struct kbase_context *kctx = file->private; size_t i; for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { @@ -174,14 +180,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); bool live_val_valid = (ret >= 0); - seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); if (live_val_valid) - seq_printf(file, "0x%.16llx", (u64)live_val); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - seq_printf(file, " | op:gt arg_value:0x%.8x", cqs_obj->val); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:gt arg_value:0x%.8x", cqs_obj->val); } } @@ -189,13 +200,14 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait(struct seq_file *file, * kbasep_csf_sync_print_kcpu_cqs_set() - Print details of a CSF SYNC CQS * Set command, contained in a KCPU queue. * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. */ -static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file, - struct kbase_kcpu_command *cmd) +static void kbasep_csf_sync_print_kcpu_cqs_set(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) { - struct kbase_context *kctx = file->private; size_t i; for (i = 0; i < cmd->info.cqs_set.nr_objs; i++) { @@ -205,14 +217,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_set(struct seq_file *file, int ret = kbasep_csf_debugfs_get_cqs_live_u32(kctx, cqs_obj->addr, &live_val); bool live_val_valid = (ret >= 0); - seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", cqs_obj->addr); if (live_val_valid) - seq_printf(file, "0x%.16llx", (u64)live_val); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", (u64)live_val); else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - seq_printf(file, " | op:add arg_value:0x%.8x", 1); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:add arg_value:0x%.8x", 1); } } @@ -271,14 +288,15 @@ static const char *kbasep_csf_sync_get_set_op_name(basep_cqs_set_operation_op op * Wait Operation command, contained * in a KCPU queue. * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. */ -static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, - struct kbase_kcpu_command *cmd) +static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) { size_t i; - struct kbase_context *kctx = file->private; for (i = 0; i < cmd->info.cqs_wait.nr_objs; i++) { struct base_cqs_wait_operation_info *wait_op = @@ -290,14 +308,19 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, bool live_val_valid = (ret >= 0); - seq_printf(file, "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_WAIT_OPERATION obj:0x%.16llx live_value:", wait_op->addr); if (live_val_valid) - seq_printf(file, "0x%.16llx", live_val); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, wait_op->val); } } @@ -306,14 +329,15 @@ static void kbasep_csf_sync_print_kcpu_cqs_wait_op(struct seq_file *file, * Set Operation command, contained * in a KCPU queue. * - * @file: The seq_file for printing to. - * @cmd: The KCPU Command to be printed. + * @kctx: The kbase context. + * @buffer: The buffer to write to. + * @length: The length of text in the buffer. + * @cmd: The KCPU Command to be printed. */ -static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file, - struct kbase_kcpu_command *cmd) +static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct kbase_context *kctx, char *buffer, + int *length, struct kbase_kcpu_command *cmd) { size_t i; - struct kbase_context *kctx = file->private; for (i = 0; i < cmd->info.cqs_set_operation.nr_objs; i++) { struct base_cqs_set_operation_info *set_op = &cmd->info.cqs_set_operation.objs[i]; @@ -325,29 +349,35 @@ static void kbasep_csf_sync_print_kcpu_cqs_set_op(struct seq_file *file, bool live_val_valid = (ret >= 0); - seq_printf(file, "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); + *length += + snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "cmd:CQS_SET_OPERATION obj:0x%.16llx live_value:", set_op->addr); if (live_val_valid) - seq_printf(file, "0x%.16llx", live_val); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + "0x%.16llx", live_val); else - seq_puts(file, CQS_UNREADABLE_LIVE_VALUE); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + CQS_UNREADABLE_LIVE_VALUE); - seq_printf(file, " | op:%s arg_value:0x%.16llx", op_name, set_op->val); + *length += snprintf(buffer + *length, CSF_SYNC_DUMP_SIZE - *length, + " | op:%s arg_value:0x%.16llx", op_name, set_op->val); } } /** * kbasep_csf_kcpu_debugfs_print_queue() - Print debug data for a KCPU queue * + * @kctx: The kbase context. * @file: The seq_file to print to. * @queue: Pointer to the KCPU queue. */ -static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file, +static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct kbase_context *kctx, + struct seq_file *file, struct kbase_kcpu_command_queue *queue) { char started_or_pending; struct kbase_kcpu_command *cmd; - struct kbase_context *kctx = file->private; size_t i; if (WARN_ON(!queue)) @@ -357,72 +387,115 @@ static void kbasep_csf_sync_kcpu_debugfs_print_queue(struct seq_file *file, mutex_lock(&queue->lock); for (i = 0; i != queue->num_pending_cmds; ++i) { + char buffer[CSF_SYNC_DUMP_SIZE]; + int length = 0; started_or_pending = ((i == 0) && queue->command_started) ? 'S' : 'P'; - seq_printf(file, "queue:KCPU-%u-%u exec:%c ", kctx->id, queue->id, - started_or_pending); + length += snprintf(buffer, CSF_SYNC_DUMP_SIZE, "queue:KCPU-%d-%d exec:%c ", + kctx->id, queue->id, started_or_pending); - cmd = &queue->commands[queue->start_offset + i]; + cmd = &queue->commands[(u8)(queue->start_offset + i)]; switch (cmd->type) { #if IS_ENABLED(CONFIG_SYNC_FILE) case BASE_KCPU_COMMAND_TYPE_FENCE_SIGNAL: - kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_SIGNAL"); + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd, + "FENCE_SIGNAL"); break; case BASE_KCPU_COMMAND_TYPE_FENCE_WAIT: - kbasep_csf_sync_print_kcpu_fence_wait_or_signal(file, cmd, "FENCE_WAIT"); + kbasep_csf_sync_print_kcpu_fence_wait_or_signal(buffer, &length, cmd, + "FENCE_WAIT"); break; #endif case BASE_KCPU_COMMAND_TYPE_CQS_WAIT: - kbasep_csf_sync_print_kcpu_cqs_wait(file, cmd); + kbasep_csf_sync_print_kcpu_cqs_wait(kctx, buffer, &length, cmd); break; case BASE_KCPU_COMMAND_TYPE_CQS_SET: - kbasep_csf_sync_print_kcpu_cqs_set(file, cmd); + kbasep_csf_sync_print_kcpu_cqs_set(kctx, buffer, &length, cmd); break; case BASE_KCPU_COMMAND_TYPE_CQS_WAIT_OPERATION: - kbasep_csf_sync_print_kcpu_cqs_wait_op(file, cmd); + kbasep_csf_sync_print_kcpu_cqs_wait_op(kctx, buffer, &length, cmd); break; case BASE_KCPU_COMMAND_TYPE_CQS_SET_OPERATION: - kbasep_csf_sync_print_kcpu_cqs_set_op(file, cmd); + kbasep_csf_sync_print_kcpu_cqs_set_op(kctx, buffer, &length, cmd); break; default: - seq_puts(file, ", U, Unknown blocking command"); + length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, + ", U, Unknown blocking command"); break; } - seq_puts(file, "\n"); + length += snprintf(buffer + length, CSF_SYNC_DUMP_SIZE - length, "\n"); + kbasep_print(kctx, file, buffer); } mutex_unlock(&queue->lock); } -/** - * kbasep_csf_sync_kcpu_debugfs_show() - Print CSF KCPU queue sync info - * - * @file: The seq_file for printing to. - * - * Return: Negative error code or 0 on success. - */ -static int kbasep_csf_sync_kcpu_debugfs_show(struct seq_file *file) +int kbasep_csf_sync_kcpu_dump_locked(struct kbase_context *kctx, struct seq_file *file) { - struct kbase_context *kctx = file->private; unsigned long queue_idx; - mutex_lock(&kctx->csf.kcpu_queues.lock); - seq_printf(file, "KCPU queues for ctx %u:\n", kctx->id); + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); + + kbasep_print(kctx, file, "KCPU queues for ctx %d:\n", kctx->id); queue_idx = find_first_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES); while (queue_idx < KBASEP_MAX_KCPU_QUEUES) { - kbasep_csf_sync_kcpu_debugfs_print_queue(file, + kbasep_csf_sync_kcpu_debugfs_print_queue(kctx, file, kctx->csf.kcpu_queues.array[queue_idx]); queue_idx = find_next_bit(kctx->csf.kcpu_queues.in_use, KBASEP_MAX_KCPU_QUEUES, queue_idx + 1); } + return 0; +} + +int kbasep_csf_sync_kcpu_dump(struct kbase_context *kctx, struct seq_file *file) +{ + mutex_lock(&kctx->csf.kcpu_queues.lock); + kbasep_csf_sync_kcpu_dump_locked(kctx, file); mutex_unlock(&kctx->csf.kcpu_queues.lock); return 0; } +#if IS_ENABLED(CONFIG_DEBUG_FS) + +/* GPU queue related values */ +#define GPU_CSF_MOVE_OPCODE ((u64)0x1) +#define GPU_CSF_MOVE32_OPCODE ((u64)0x2) +#define GPU_CSF_SYNC_ADD_OPCODE ((u64)0x25) +#define GPU_CSF_SYNC_SET_OPCODE ((u64)0x26) +#define GPU_CSF_SYNC_WAIT_OPCODE ((u64)0x27) +#define GPU_CSF_SYNC_ADD64_OPCODE ((u64)0x33) +#define GPU_CSF_SYNC_SET64_OPCODE ((u64)0x34) +#define GPU_CSF_SYNC_WAIT64_OPCODE ((u64)0x35) +#define GPU_CSF_CALL_OPCODE ((u64)0x20) + +#define MAX_NR_GPU_CALLS (5) +#define INSTR_OPCODE_MASK ((u64)0xFF << 56) +#define INSTR_OPCODE_GET(value) ((value & INSTR_OPCODE_MASK) >> 56) +#define MOVE32_IMM_MASK ((u64)0xFFFFFFFFFUL) +#define MOVE_DEST_MASK ((u64)0xFF << 48) +#define MOVE_DEST_GET(value) ((value & MOVE_DEST_MASK) >> 48) +#define MOVE_IMM_MASK ((u64)0xFFFFFFFFFFFFUL) +#define SYNC_SRC0_MASK ((u64)0xFF << 40) +#define SYNC_SRC1_MASK ((u64)0xFF << 32) +#define SYNC_SRC0_GET(value) (u8)((value & SYNC_SRC0_MASK) >> 40) +#define SYNC_SRC1_GET(value) (u8)((value & SYNC_SRC1_MASK) >> 32) +#define SYNC_WAIT_CONDITION_MASK ((u64)0xF << 28) +#define SYNC_WAIT_CONDITION_GET(value) (u8)((value & SYNC_WAIT_CONDITION_MASK) >> 28) + +/* Enumeration for types of GPU queue sync events for + * the purpose of dumping them through debugfs. + */ +enum debugfs_gpu_sync_type { + DEBUGFS_GPU_SYNC_WAIT, + DEBUGFS_GPU_SYNC_SET, + DEBUGFS_GPU_SYNC_ADD, + NUM_DEBUGFS_GPU_SYNC_TYPES +}; + /** * kbasep_csf_get_move_immediate_value() - Get the immediate values for sync operations * from a MOVE instruction. @@ -476,10 +549,21 @@ static u64 kbasep_csf_read_ringbuffer_value(struct kbase_queue *queue, u32 ringb u64 page_off = ringbuff_offset >> PAGE_SHIFT; u64 offset_within_page = ringbuff_offset & ~PAGE_MASK; struct page *page = as_page(queue->queue_reg->gpu_alloc->pages[page_off]); - u64 *ringbuffer = kmap_atomic(page); - u64 value = ringbuffer[offset_within_page / sizeof(u64)]; + u64 *ringbuffer = vmap(&page, 1, VM_MAP, pgprot_noncached(PAGE_KERNEL)); + u64 value; + + if (!ringbuffer) { + struct kbase_context *kctx = queue->kctx; + + dev_err(kctx->kbdev->dev, "%s failed to map the buffer page for read a command!", + __func__); + /* Return an alternative 0 for dumpping operation*/ + value = 0; + } else { + value = ringbuffer[offset_within_page / sizeof(u64)]; + vunmap(ringbuffer); + } - kunmap_atomic(ringbuffer); return value; } @@ -559,24 +643,25 @@ static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_con return; /* 5. Print info */ - seq_printf(file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, queue->group->handle, - queue->csi_index, queue->enabled && !follows_wait ? 'S' : 'P', - gpu_sync_type_name[type]); + kbasep_print(kctx, file, "queue:GPU-%u-%u-%u exec:%c cmd:%s ", kctx->id, + queue->group->handle, queue->csi_index, + queue->enabled && !follows_wait ? 'S' : 'P', gpu_sync_type_name[type]); if (queue->group->csg_nr == KBASEP_CSG_NR_INVALID) - seq_puts(file, "slot:-"); + kbasep_print(kctx, file, "slot:-"); else - seq_printf(file, "slot:%d", (int)queue->group->csg_nr); + kbasep_print(kctx, file, "slot:%d", (int)queue->group->csg_nr); - seq_printf(file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); + kbasep_print(kctx, file, " obj:0x%.16llx live_value:0x%.16llx | ", sync_addr, live_val); if (type == DEBUGFS_GPU_SYNC_WAIT) { wait_condition = SYNC_WAIT_CONDITION_GET(sync_cmd); - seq_printf(file, "op:%s ", kbasep_csf_sync_get_wait_op_name(wait_condition)); + kbasep_print(kctx, file, "op:%s ", + kbasep_csf_sync_get_wait_op_name(wait_condition)); } else - seq_printf(file, "op:%s ", gpu_sync_type_op[type]); + kbasep_print(kctx, file, "op:%s ", gpu_sync_type_op[type]); - seq_printf(file, "arg_value:0x%.16llx\n", compare_val); + kbasep_print(kctx, file, "arg_value:0x%.16llx\n", compare_val); } /** @@ -595,7 +680,7 @@ static void kbasep_csf_print_gpu_sync_op(struct seq_file *file, struct kbase_con static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kbase_queue *queue) { struct kbase_context *kctx; - u32 *addr; + u64 *addr; u64 cs_extract, cs_insert, instr, cursor; bool follows_wait = false; int nr_calls = 0; @@ -605,11 +690,11 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct kctx = queue->kctx; - addr = (u32 *)queue->user_io_addr; - cs_insert = addr[CS_INSERT_LO / 4] | ((u64)addr[CS_INSERT_HI / 4] << 32); + addr = queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO / sizeof(*addr)]; - addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); - cs_extract = addr[CS_EXTRACT_LO / 4] | ((u64)addr[CS_EXTRACT_HI / 4] << 32); + addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr); + cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)]; cursor = cs_extract; @@ -637,6 +722,7 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct case GPU_CSF_SYNC_SET64_OPCODE: case GPU_CSF_SYNC_WAIT64_OPCODE: instr_is_64_bit = true; + break; default: break; } @@ -663,7 +749,7 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct break; case GPU_CSF_CALL_OPCODE: nr_calls++; - /* Fallthrough */ + break; default: /* Unrecognized command, skip past it */ break; @@ -677,36 +763,37 @@ static void kbasep_csf_dump_active_queue_sync_info(struct seq_file *file, struct * kbasep_csf_dump_active_group_sync_state() - Prints SYNC commands in all GPU queues of * the provided queue group. * + * @kctx: The kbase context * @file: seq_file for printing to. * @group: Address of a GPU command group to iterate through. * * This function will iterate through each queue in the provided GPU queue group and * print its SYNC related commands. */ -static void kbasep_csf_dump_active_group_sync_state(struct seq_file *file, +static void kbasep_csf_dump_active_group_sync_state(struct kbase_context *kctx, + struct seq_file *file, struct kbase_queue_group *const group) { - struct kbase_context *kctx = file->private; unsigned int i; - seq_printf(file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, - group->csg_nr, kctx->tgid, kctx->id); + kbasep_print(kctx, file, "GPU queues for group %u (slot %d) of ctx %d_%d\n", group->handle, + group->csg_nr, kctx->tgid, kctx->id); for (i = 0; i < MAX_SUPPORTED_STREAMS_PER_GROUP; i++) kbasep_csf_dump_active_queue_sync_info(file, group->bound_queues[i]); } /** - * kbasep_csf_sync_gpu_debugfs_show() - Print CSF GPU queue sync info + * kbasep_csf_sync_gpu_dump() - Print CSF GPU queue sync info * + * @kctx: The kbase context * @file: The seq_file for printing to. * * Return: Negative error code or 0 on success. */ -static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) +static int kbasep_csf_sync_gpu_dump(struct kbase_context *kctx, struct seq_file *file) { u32 gr; - struct kbase_context *kctx = file->private; struct kbase_device *kbdev; if (WARN_ON(!kctx)) @@ -721,7 +808,7 @@ static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) kbdev->csf.scheduler.csg_slots[gr].resident_group; if (!group || group->kctx != kctx) continue; - kbasep_csf_dump_active_group_sync_state(file, group); + kbasep_csf_dump_active_group_sync_state(kctx, file, group); } kbase_csf_scheduler_unlock(kbdev); @@ -738,10 +825,13 @@ static int kbasep_csf_sync_gpu_debugfs_show(struct seq_file *file) */ static int kbasep_csf_sync_debugfs_show(struct seq_file *file, void *data) { - seq_printf(file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", MALI_CSF_SYNC_DEBUGFS_VERSION); + struct kbase_context *kctx = file->private; + + kbasep_print(kctx, file, "MALI_CSF_SYNC_DEBUGFS_VERSION: v%u\n", + MALI_CSF_SYNC_DEBUGFS_VERSION); - kbasep_csf_sync_kcpu_debugfs_show(file); - kbasep_csf_sync_gpu_debugfs_show(file); + kbasep_csf_sync_kcpu_dump(kctx, file); + kbasep_csf_sync_gpu_dump(kctx, file); return 0; } diff --git a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h index 177e15d..2fe5060 100644 --- a/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h +++ b/mali_kbase/csf/mali_kbase_csf_sync_debugfs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -22,6 +22,8 @@ #ifndef _KBASE_CSF_SYNC_DEBUGFS_H_ #define _KBASE_CSF_SYNC_DEBUGFS_H_ +#include <linux/seq_file.h> + /* Forward declaration */ struct kbase_context; @@ -34,4 +36,27 @@ struct kbase_context; */ void kbase_csf_sync_debugfs_init(struct kbase_context *kctx); +/** + * kbasep_csf_sync_kcpu_dump() - Print CSF KCPU queue sync info + * + * @kctx: The kbase context. + * @file: The seq_file for printing to. + * + * Return: Negative error code or 0 on success. + * + * Note: This function should not be used if kcpu_queues.lock is held. Use + * kbasep_csf_sync_kcpu_dump_locked() instead. + */ +int kbasep_csf_sync_kcpu_dump(struct kbase_context *kctx, struct seq_file *file); + +/** + * kbasep_csf_sync_kcpu_dump() - Print CSF KCPU queue sync info + * + * @kctx: The kbase context. + * @file: The seq_file for printing to. + * + * Return: Negative error code or 0 on success. + */ +int kbasep_csf_sync_kcpu_dump_locked(struct kbase_context *kctx, struct seq_file *file); + #endif /* _KBASE_CSF_SYNC_DEBUGFS_H_ */ diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c index 8072a8b..85d8018 100644 --- a/mali_kbase/csf/mali_kbase_csf_tiler_heap.c +++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap.c @@ -362,7 +362,7 @@ static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context * /* If page migration is enabled, we don't want to migrate tiler heap pages. * This does not change if the constituent pages are already marked as isolated. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE); return chunk; @@ -748,7 +748,7 @@ int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_ KBASE_REG_CPU_RD, &heap->buf_desc_map, KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING); - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE); kbase_gpu_vm_unlock(kctx); diff --git a/mali_kbase/csf/mali_kbase_csf_timeout.c b/mali_kbase/csf/mali_kbase_csf_timeout.c index ea6c116..f7fcbb1 100644 --- a/mali_kbase/csf/mali_kbase_csf_timeout.c +++ b/mali_kbase/csf/mali_kbase_csf_timeout.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,7 @@ static int set_timeout(struct kbase_device *const kbdev, u64 const timeout) dev_dbg(kbdev->dev, "New progress timeout: %llu cycles\n", timeout); atomic64_set(&kbdev->csf.progress_timeout, timeout); + kbase_device_set_timeout(kbdev, CSF_SCHED_PROTM_PROGRESS_TIMEOUT, timeout, 1); return 0; } @@ -100,7 +101,7 @@ static ssize_t progress_timeout_store(struct device * const dev, if (!err) { kbase_csf_scheduler_pm_active(kbdev); - err = kbase_csf_scheduler_wait_mcu_active(kbdev); + err = kbase_csf_scheduler_killable_wait_mcu_active(kbdev); if (!err) err = kbase_csf_firmware_set_timeout(kbdev, timeout); @@ -147,8 +148,14 @@ int kbase_csf_timeout_init(struct kbase_device *const kbdev) int err; #if IS_ENABLED(CONFIG_OF) - err = of_property_read_u64(kbdev->dev->of_node, - "progress_timeout", &timeout); + /* Read "progress-timeout" property and fallback to "progress_timeout" + * if not found. + */ + err = of_property_read_u64(kbdev->dev->of_node, "progress-timeout", &timeout); + + if (err == -EINVAL) + err = of_property_read_u64(kbdev->dev->of_node, "progress_timeout", &timeout); + if (!err) dev_info(kbdev->dev, "Found progress_timeout = %llu in Devicetree\n", timeout); diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c index 910ba22..ce50683 100644 --- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c +++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c @@ -39,8 +39,6 @@ #include <linux/version_compat_defs.h> #endif -/* Name of the CSFFW timeline tracebuffer. */ -#define KBASE_CSFFW_TRACEBUFFER_NAME "timeline" /* Name of the timeline header metatadata */ #define KBASE_CSFFW_TIMELINE_HEADER_NAME "timeline_header" @@ -299,16 +297,13 @@ static int tl_reader_init_late( if (self->kbdev) return 0; - tb = kbase_csf_firmware_get_trace_buffer( - kbdev, KBASE_CSFFW_TRACEBUFFER_NAME); + tb = kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_TIMELINE_BUF_NAME); hdr = kbase_csf_firmware_get_timeline_metadata( kbdev, KBASE_CSFFW_TIMELINE_HEADER_NAME, &hdr_size); if (!tb) { - dev_warn( - kbdev->dev, - "'%s' tracebuffer is not present in the firmware image.", - KBASE_CSFFW_TRACEBUFFER_NAME); + dev_warn(kbdev->dev, "'%s' tracebuffer is not present in the firmware image.", + KBASE_CSFFW_TIMELINE_BUF_NAME); return -1; } diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c index 9ce6776..2b63f19 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -89,7 +89,7 @@ struct firmware_trace_buffer { } cpu_va; u32 num_pages; u32 trace_enable_init_mask[CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX]; - char name[1]; /* this field must be last */ + char name[]; /* this field must be last */ }; /** @@ -118,16 +118,19 @@ struct firmware_trace_buffer_data { */ static const struct firmware_trace_buffer_data trace_buffer_data[] = { #if MALI_UNIT_TEST - { "fwutf", { 0 }, 1 }, + { KBASE_CSFFW_UTF_BUF_NAME, { 0 }, 1 }, #endif #ifdef CONFIG_MALI_PIXEL_GPU_SSCD /* Enable all the logs */ - { FIRMWARE_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES }, + { KBASE_CSFFW_LOG_BUF_NAME, { 0xFFFFFFFF }, FW_TRACE_BUF_NR_PAGES }, #else - { FIRMWARE_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES }, + { KBASE_CSFFW_LOG_BUF_NAME, { 0 }, FW_TRACE_BUF_NR_PAGES }, #endif /* CONFIG_MALI_PIXEL_GPU_SSCD */ - { "benchmark", { 0 }, 2 }, - { "timeline", { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, + { KBASE_CSFFW_BENCHMARK_BUF_NAME, { 0 }, 2 }, + { KBASE_CSFFW_TIMELINE_BUF_NAME, { 0 }, KBASE_CSF_TL_BUFFER_NR_PAGES }, +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + { KBASE_CSFFW_GPU_METRICS_BUF_NAME, { 0 }, 8 }, +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ }; int kbase_csf_firmware_trace_buffers_init(struct kbase_device *kbdev) @@ -265,7 +268,7 @@ int kbase_csf_firmware_parse_trace_buffer_entry(struct kbase_device *kbdev, * trace buffer name (with NULL termination). */ trace_buffer = - kmalloc(sizeof(*trace_buffer) + name_len + 1, GFP_KERNEL); + kmalloc(struct_size(trace_buffer, name, name_len + 1), GFP_KERNEL); if (!trace_buffer) return -ENOMEM; @@ -512,6 +515,37 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data( } EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_read_data); +void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer) +{ + unsigned int bytes_discarded; + u32 buffer_size = trace_buffer->num_pages << PAGE_SHIFT; + u32 extract_offset = *(trace_buffer->cpu_va.extract_cpu_va); + u32 insert_offset = *(trace_buffer->cpu_va.insert_cpu_va); + unsigned int trace_size; + + if (insert_offset >= extract_offset) { + trace_size = insert_offset - extract_offset; + if (trace_size > buffer_size / 2) { + bytes_discarded = trace_size - buffer_size / 2; + extract_offset += bytes_discarded; + *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; + } + } else { + unsigned int bytes_tail; + + bytes_tail = buffer_size - extract_offset; + trace_size = bytes_tail + insert_offset; + if (trace_size > buffer_size / 2) { + bytes_discarded = trace_size - buffer_size / 2; + extract_offset += bytes_discarded; + if (extract_offset >= buffer_size) + extract_offset = extract_offset - buffer_size; + *(trace_buffer->cpu_va.extract_cpu_va) = extract_offset; + } + } +} +EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard); + static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask) { unsigned int i; diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h index 037dc22..c0a42ca 100644 --- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h +++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,8 +25,16 @@ #include <linux/types.h> #define CSF_FIRMWARE_TRACE_ENABLE_INIT_MASK_MAX (4) -#define FIRMWARE_LOG_BUF_NAME "fwlog" #define FW_TRACE_BUF_NR_PAGES 4 +#if MALI_UNIT_TEST +#define KBASE_CSFFW_UTF_BUF_NAME "fwutf" +#endif +#define KBASE_CSFFW_LOG_BUF_NAME "fwlog" +#define KBASE_CSFFW_BENCHMARK_BUF_NAME "benchmark" +#define KBASE_CSFFW_TIMELINE_BUF_NAME "timeline" +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#define KBASE_CSFFW_GPU_METRICS_BUF_NAME "gpu_metrics" +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ /* Forward declarations */ struct firmware_trace_buffer; @@ -117,7 +125,8 @@ struct firmware_trace_buffer *kbase_csf_firmware_get_trace_buffer( struct kbase_device *kbdev, const char *name); /** - * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits for a trace buffer + * kbase_csf_firmware_trace_buffer_get_trace_enable_bits_count - Get number of trace enable bits + * for a trace buffer * * @trace_buffer: Trace buffer handle * @@ -167,6 +176,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data( struct firmware_trace_buffer *trace_buffer, u8 *data, unsigned int num_bytes); /** + * kbase_csf_firmware_trace_buffer_discard - Discard data from a trace buffer + * + * @trace_buffer: Trace buffer handle + * + * Discard part of the data in the trace buffer to reduce its utilization to half of its size. + */ +void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer); + +/** * kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask * * @tb: Trace buffer handle diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c index 492684f..571761f 100644 --- a/mali_kbase/device/backend/mali_kbase_device_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_csf.c @@ -34,13 +34,16 @@ #include <mali_kbase.h> #include <backend/gpu/mali_kbase_irq_internal.h> #include <backend/gpu/mali_kbase_pm_internal.h> -#include <backend/gpu/mali_kbase_js_internal.h> #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h> #include <csf/mali_kbase_csf_csg_debugfs.h> +#include <csf/mali_kbase_csf_kcpu_fence_debugfs.h> #include <hwcnt/mali_kbase_hwcnt_virtualizer.h> #include <mali_kbase_kinstr_prfcnt.h> #include <mali_kbase_vinstr.h> #include <tl/mali_kbase_timeline.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> +#endif /** * kbase_device_firmware_hwcnt_term - Terminate CSF firmware and HWC @@ -84,10 +87,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_pm_powerup; - err = kbase_backend_timer_init(kbdev); - if (err) - goto fail_timer; - #ifdef CONFIG_MALI_DEBUG #if IS_ENABLED(CONFIG_MALI_REAL_HW) if (kbasep_common_test_interrupt_handlers(kbdev) != 0) { @@ -123,10 +122,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_update_l2_features; - err = kbase_backend_time_init(kbdev); - if (err) - goto fail_update_l2_features; - init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); kbase_pm_context_idle(kbdev); @@ -148,8 +143,6 @@ fail_interrupt_test: #endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */ #endif /* CONFIG_MALI_DEBUG */ - kbase_backend_timer_term(kbdev); -fail_timer: kbase_pm_context_idle(kbdev); kbase_hwaccess_pm_halt(kbdev); fail_pm_powerup: @@ -285,20 +278,21 @@ static void kbase_device_hwcnt_backend_csf_term(struct kbase_device *kbdev) static const struct kbase_device_init dev_init[] = { #if !IS_ENABLED(CONFIG_MALI_REAL_HW) - { kbase_gpu_device_create, kbase_gpu_device_destroy, - "Dummy model initialization failed" }, + { kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" }, #else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ { assign_irqs, NULL, "IRQ search failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ #if !IS_ENABLED(CONFIG_MALI_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, +#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ { power_control_init, power_control_term, "Power control initialization failed" }, { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, - { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, - { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, + { kbase_backend_time_init, NULL, "Time backend initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, "Miscellaneous device initialization failed" }, { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, @@ -330,6 +324,8 @@ static const struct kbase_device_init dev_init[] = { { kbase_debug_csf_fault_init, kbase_debug_csf_fault_term, "CSF fault debug initialization failed" }, { kbase_device_debugfs_init, kbase_device_debugfs_term, "DebugFS initialization failed" }, + { kbase_csf_fence_timer_debugfs_init, kbase_csf_fence_timer_debugfs_term, + "Fence timeout DebugFS initialization failed" }, /* Sysfs init needs to happen before registering the device with * misc_register(), otherwise it causes a race condition between * registering the device and a uevent event being generated for @@ -522,4 +518,4 @@ out: return ret; } -KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once); +KBASE_EXPORT_TEST_API(kbase_device_firmware_init_once);
\ No newline at end of file diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c index 5e27094..c837f5a 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ static void kbase_gpu_fault_interrupt(struct kbase_device *kbdev) { const u32 status = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_FAULTSTATUS)); - const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; + const bool as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK; const u32 as_nr = (status & GPU_FAULTSTATUS_JASID_MASK) >> GPU_FAULTSTATUS_JASID_SHIFT; bool bus_fault = (status & GPU_FAULTSTATUS_EXCEPTION_TYPE_MASK) == diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c index 38223af..8f7b39b 100644 --- a/mali_kbase/device/backend/mali_kbase_device_hw_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_hw_jm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -124,9 +124,10 @@ KBASE_EXPORT_TEST_API(kbase_reg_write); u32 kbase_reg_read(struct kbase_device *kbdev, u32 offset) { - u32 val; + u32 val = 0; - WARN_ON(!kbdev->pm.backend.gpu_powered); + if (WARN_ON(!kbdev->pm.backend.gpu_powered)) + return val; val = readl(kbdev->reg + offset); diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c index 14b5602..89635b5 100644 --- a/mali_kbase/device/backend/mali_kbase_device_jm.c +++ b/mali_kbase/device/backend/mali_kbase_device_jm.c @@ -45,6 +45,9 @@ #include <backend/gpu/mali_kbase_pm_internal.h> #include <mali_kbase_dummy_job_wa.h> #include <backend/gpu/mali_kbase_clk_rate_trace_mgr.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> +#endif /** * kbase_backend_late_init - Perform any backend-specific initialization. @@ -102,10 +105,6 @@ static int kbase_backend_late_init(struct kbase_device *kbdev) if (err) goto fail_update_l2_features; - err = kbase_backend_time_init(kbdev); - if (err) - goto fail_update_l2_features; - init_waitqueue_head(&kbdev->hwaccess.backend.reset_wait); /* Idle the GPU and/or cores, if the policy wants it to */ @@ -224,12 +223,14 @@ static const struct kbase_device_init dev_init[] = { #if !IS_ENABLED(CONFIG_MALI_NO_MALI) { registers_map, registers_unmap, "Register map failed" }, #endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + { kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" }, +#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */ { kbase_device_io_history_init, kbase_device_io_history_term, "Register access history initialization failed" }, { kbase_device_pm_init, kbase_device_pm_term, "Power management initialization failed" }, { kbase_device_early_init, kbase_device_early_term, "Early device initialization failed" }, - { kbase_device_populate_max_freq, NULL, "Populating max frequency failed" }, - { kbase_pm_lowest_gpu_freq_init, NULL, "Lowest freq initialization failed" }, + { kbase_backend_time_init, NULL, "Time backend initialization failed" }, { kbase_device_misc_init, kbase_device_misc_term, "Miscellaneous device initialization failed" }, { kbase_device_pcm_dev_init, kbase_device_pcm_dev_term, @@ -363,4 +364,4 @@ int kbase_device_firmware_init_once(struct kbase_device *kbdev) mutex_unlock(&kbdev->fw_load_lock); return ret; -} +}
\ No newline at end of file diff --git a/mali_kbase/device/mali_kbase_device.c b/mali_kbase/device/mali_kbase_device.c index e90e791..e5b3e2b 100644 --- a/mali_kbase/device/mali_kbase_device.c +++ b/mali_kbase/device/mali_kbase_device.c @@ -230,11 +230,14 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbdev->cci_snoop_enabled = false; np = kbdev->dev->of_node; if (np != NULL) { - if (of_property_read_u32(np, "snoop_enable_smc", - &kbdev->snoop_enable_smc)) + /* Read "-" versions of the properties and fallback to "_" + * if these are not found + */ + if (of_property_read_u32(np, "snoop-enable-smc", &kbdev->snoop_enable_smc) && + of_property_read_u32(np, "snoop_enable_smc", &kbdev->snoop_enable_smc)) kbdev->snoop_enable_smc = 0; - if (of_property_read_u32(np, "snoop_disable_smc", - &kbdev->snoop_disable_smc)) + if (of_property_read_u32(np, "snoop-disable-smc", &kbdev->snoop_disable_smc) && + of_property_read_u32(np, "snoop_disable_smc", &kbdev->snoop_disable_smc)) kbdev->snoop_disable_smc = 0; /* Either both or none of the calls should be provided. */ if (!((kbdev->snoop_disable_smc == 0 @@ -306,13 +309,13 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbdev->pm.dvfs_period = DEFAULT_PM_DVFS_PERIOD; #if MALI_USE_CSF - kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_CSG_SUSPEND_TIMEOUT); -#else + kbdev->reset_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT); +#else /* MALI_USE_CSF */ kbdev->reset_timeout_ms = JM_DEFAULT_RESET_TIMEOUT_MS; -#endif /* MALI_USE_CSF */ +#endif /* !MALI_USE_CSF */ kbdev->mmu_mode = kbase_mmu_mode_get_aarch64(); - kbdev->mmu_as_inactive_wait_time_ms = + kbdev->mmu_or_gpu_cache_op_wait_time_ms = kbase_get_timeout_ms(kbdev, MMU_AS_INACTIVE_WAIT_TIMEOUT); mutex_init(&kbdev->kctx_list_lock); INIT_LIST_HEAD(&kbdev->kctx_list); @@ -327,9 +330,13 @@ int kbase_device_misc_init(struct kbase_device * const kbdev) kbdev->oom_notifier_block.notifier_call = NULL; } -#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) +#if MALI_USE_CSF +#if IS_ENABLED(CONFIG_SYNC_FILE) atomic_set(&kbdev->live_fence_metadata, 0); +#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */ + atomic_set(&kbdev->fence_signal_timeout_enabled, 1); #endif + return 0; term_as: @@ -367,8 +374,7 @@ void kbase_device_free(struct kbase_device *kbdev) void kbase_device_id_init(struct kbase_device *kbdev) { - scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", kbase_drv_name, - kbase_dev_nr); + scnprintf(kbdev->devname, DEVNAME_SIZE, "%s%d", KBASE_DRV_NAME, kbase_dev_nr); kbdev->id = kbase_dev_nr; } diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h index f025011..e9cb5c2 100644 --- a/mali_kbase/device/mali_kbase_device.h +++ b/mali_kbase/device/mali_kbase_device.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -191,6 +191,7 @@ void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev); * called from paths (like GPU reset) where an indefinite wait for the * completion of cache clean operation can cause deadlock, as the operation may * never complete. + * If cache clean times out, reset GPU to recover. * * Return: 0 if successful or a negative error code on failure. */ diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c index 8b4588e..8126b9b 100644 --- a/mali_kbase/device/mali_kbase_device_hw.c +++ b/mali_kbase/device/mali_kbase_device_hw.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2014-2016, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -27,34 +27,47 @@ #include <mali_kbase_reset_gpu.h> #include <mmu/mali_kbase_mmu.h> -#if !IS_ENABLED(CONFIG_MALI_NO_MALI) bool kbase_is_gpu_removed(struct kbase_device *kbdev) { - u32 val; + if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT)) + return false; - val = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)); - - return val == 0; + return (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_ID)) == 0); } -#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */ -static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) +/** + * busy_wait_cache_operation - Wait for a pending cache flush to complete + * + * @kbdev: Pointer of kbase device. + * @irq_bit: IRQ bit cache flush operation to wait on. + * + * It will reset GPU if the wait fails. + * + * Return: 0 on success, error code otherwise. + */ +static int busy_wait_cache_operation(struct kbase_device *kbdev, u32 irq_bit) { - char *irq_flag_name; - /* Previously MMU-AS command was used for L2 cache flush on page-table update. - * And we're using the same max-loops count for GPU command, because amount of - * L2 cache flush overhead are same between them. - */ - unsigned int max_loops = KBASE_AS_INACTIVE_MAX_LOOPS; + const ktime_t wait_loop_start = ktime_get_raw(); + const u32 wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms; + bool completed = false; + s64 diff; + + do { + unsigned int i; + + for (i = 0; i < 1000; i++) { + if (kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit) { + completed = true; + break; + } + } - /* Wait for the GPU cache clean operation to complete */ - while (--max_loops && - !(kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) & irq_bit)) { - ; - } + diff = ktime_to_ms(ktime_sub(ktime_get_raw(), wait_loop_start)); + } while ((diff < wait_time_ms) && !completed); + + if (!completed) { + char *irq_flag_name; - /* reset gpu if time-out occurred */ - if (max_loops == 0) { switch (irq_bit) { case CLEAN_CACHES_COMPLETED: irq_flag_name = "CLEAN_CACHES_COMPLETED"; @@ -68,15 +81,15 @@ static int busy_wait_on_irq(struct kbase_device *kbdev, u32 irq_bit) } dev_err(kbdev->dev, - "Stuck waiting on %s bit, might be caused by slow/unstable GPU clock or possible faulty FPGA connector\n", + "Stuck waiting on %s bit, might be due to unstable GPU clk/pwr or possible faulty FPGA connector\n", irq_flag_name); if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE)) kbase_reset_gpu_locked(kbdev); + return -EBUSY; } - /* Clear the interrupt bit. */ KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_CLEAR, NULL, irq_bit); kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), irq_bit); @@ -110,7 +123,7 @@ int kbase_gpu_cache_flush_pa_range_and_busy_wait(struct kbase_device *kbdev, phy kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); /* 3. Busy-wait irq status to be enabled. */ - ret = busy_wait_on_irq(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED); + ret = busy_wait_cache_operation(kbdev, (u32)FLUSH_PA_RANGE_COMPLETED); return ret; } @@ -143,7 +156,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, irq_mask & ~CLEAN_CACHES_COMPLETED); /* busy wait irq status to be enabled */ - ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED); if (ret) return ret; @@ -164,7 +177,7 @@ int kbase_gpu_cache_flush_and_busy_wait(struct kbase_device *kbdev, kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_COMMAND), flush_op); /* 3. Busy-wait irq status to be enabled. */ - ret = busy_wait_on_irq(kbdev, (u32)CLEAN_CACHES_COMPLETED); + ret = busy_wait_cache_operation(kbdev, (u32)CLEAN_CACHES_COMPLETED); if (ret) return ret; @@ -271,8 +284,9 @@ static inline bool get_cache_clean_flag(struct kbase_device *kbdev) void kbase_gpu_wait_cache_clean(struct kbase_device *kbdev) { while (get_cache_clean_flag(kbdev)) { - wait_event_interruptible(kbdev->cache_clean_wait, - !kbdev->cache_clean_in_progress); + if (wait_event_interruptible(kbdev->cache_clean_wait, + !kbdev->cache_clean_in_progress)) + dev_warn(kbdev->dev, "Wait for cache clean is interrupted"); } } @@ -280,6 +294,7 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, unsigned int wait_timeout_ms) { long remaining = msecs_to_jiffies(wait_timeout_ms); + int result = 0; while (remaining && get_cache_clean_flag(kbdev)) { remaining = wait_event_timeout(kbdev->cache_clean_wait, @@ -287,5 +302,15 @@ int kbase_gpu_wait_cache_clean_timeout(struct kbase_device *kbdev, remaining); } - return (remaining ? 0 : -ETIMEDOUT); + if (!remaining) { + dev_err(kbdev->dev, + "Cache clean timed out. Might be caused by unstable GPU clk/pwr or faulty system"); + + if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) + kbase_reset_gpu_locked(kbdev); + + result = -ETIMEDOUT; + } + + return result; } diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h index e7457dd..ab989e0 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_csf.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -28,6 +28,17 @@ #error "Cannot be compiled with JM" #endif +/* GPU control registers */ +#define MCU_CONTROL 0x700 + +#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12) +#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT) +#define L2_CONFIG_PBHA_HWU_GET(reg_val) \ + (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT) +#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \ + (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \ + (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK)) + /* GPU_CONTROL_MCU base address */ #define GPU_CONTROL_MCU_BASE 0x3000 @@ -35,35 +46,39 @@ #define MCU_SUBSYSTEM_BASE 0x20000 /* IPA control registers */ -#define COMMAND 0x000 /* (WO) Command register */ -#define TIMER 0x008 /* (RW) Timer control register */ - -#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ -#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ -#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ -#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ -#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ -#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ -#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ -#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ +#define IPA_CONTROL_BASE 0x40000 +#define IPA_CONTROL_REG(r) (IPA_CONTROL_BASE + (r)) + +#define COMMAND 0x000 /* (WO) Command register */ +#define STATUS 0x004 /* (RO) Status register */ +#define TIMER 0x008 /* (RW) Timer control register */ + +#define SELECT_CSHW_LO 0x010 /* (RW) Counter select for CS hardware, low word */ +#define SELECT_CSHW_HI 0x014 /* (RW) Counter select for CS hardware, high word */ +#define SELECT_MEMSYS_LO 0x018 /* (RW) Counter select for Memory system, low word */ +#define SELECT_MEMSYS_HI 0x01C /* (RW) Counter select for Memory system, high word */ +#define SELECT_TILER_LO 0x020 /* (RW) Counter select for Tiler cores, low word */ +#define SELECT_TILER_HI 0x024 /* (RW) Counter select for Tiler cores, high word */ +#define SELECT_SHADER_LO 0x028 /* (RW) Counter select for Shader cores, low word */ +#define SELECT_SHADER_HI 0x02C /* (RW) Counter select for Shader cores, high word */ /* Accumulated counter values for CS hardware */ -#define VALUE_CSHW_BASE 0x100 -#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ +#define VALUE_CSHW_BASE 0x100 +#define VALUE_CSHW_REG_LO(n) (VALUE_CSHW_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_CSHW_REG_HI(n) (VALUE_CSHW_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ /* Accumulated counter values for memory system */ -#define VALUE_MEMSYS_BASE 0x140 -#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ +#define VALUE_MEMSYS_BASE 0x140 +#define VALUE_MEMSYS_REG_LO(n) (VALUE_MEMSYS_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_MEMSYS_REG_HI(n) (VALUE_MEMSYS_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ -#define VALUE_TILER_BASE 0x180 -#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ +#define VALUE_TILER_BASE 0x180 +#define VALUE_TILER_REG_LO(n) (VALUE_TILER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_TILER_REG_HI(n) (VALUE_TILER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ -#define VALUE_SHADER_BASE 0x1C0 -#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ -#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ +#define VALUE_SHADER_BASE 0x1C0 +#define VALUE_SHADER_REG_LO(n) (VALUE_SHADER_BASE + ((n) << 3)) /* (RO) Counter value #n, low word */ +#define VALUE_SHADER_REG_HI(n) (VALUE_SHADER_BASE + ((n) << 3) + 4) /* (RO) Counter value #n, high word */ #define AS_STATUS_AS_ACTIVE_INT 0x2 @@ -112,7 +127,6 @@ /* GPU control registers */ #define CORE_FEATURES 0x008 /* () Shader Core Features */ -#define MCU_CONTROL 0x700 #define MCU_STATUS 0x704 #define MCU_CNTRL_ENABLE (1 << 0) @@ -122,15 +136,7 @@ #define MCU_CNTRL_DOORBELL_DISABLE_SHIFT (31) #define MCU_CNTRL_DOORBELL_DISABLE_MASK (1 << MCU_CNTRL_DOORBELL_DISABLE_SHIFT) -#define MCU_STATUS_HALTED (1 << 1) - -#define L2_CONFIG_PBHA_HWU_SHIFT GPU_U(12) -#define L2_CONFIG_PBHA_HWU_MASK (GPU_U(0xF) << L2_CONFIG_PBHA_HWU_SHIFT) -#define L2_CONFIG_PBHA_HWU_GET(reg_val) \ - (((reg_val)&L2_CONFIG_PBHA_HWU_MASK) >> L2_CONFIG_PBHA_HWU_SHIFT) -#define L2_CONFIG_PBHA_HWU_SET(reg_val, value) \ - (((reg_val) & ~L2_CONFIG_PBHA_HWU_MASK) | \ - (((value) << L2_CONFIG_PBHA_HWU_SHIFT) & L2_CONFIG_PBHA_HWU_MASK)) +#define MCU_STATUS_HALTED (1 << 1) /* JOB IRQ flags */ #define JOB_IRQ_GLOBAL_IF (1u << 31) /* Global interface interrupt received */ @@ -292,13 +298,13 @@ #define GPU_FAULTSTATUS_ACCESS_TYPE_MASK \ (0x3ul << GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT) -#define GPU_FAULTSTATUS_ADDR_VALID_SHIFT 10 -#define GPU_FAULTSTATUS_ADDR_VALID_FLAG \ - (1ul << GPU_FAULTSTATUS_ADDR_VALID_SHIFT) +#define GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT GPU_U(10) +#define GPU_FAULTSTATUS_ADDRESS_VALID_MASK \ + (GPU_U(0x1) << GPU_FAULTSTATUS_ADDRESS_VALID_SHIFT) -#define GPU_FAULTSTATUS_JASID_VALID_SHIFT 11 -#define GPU_FAULTSTATUS_JASID_VALID_FLAG \ - (1ul << GPU_FAULTSTATUS_JASID_VALID_SHIFT) +#define GPU_FAULTSTATUS_JASID_VALID_SHIFT GPU_U(11) +#define GPU_FAULTSTATUS_JASID_VALID_MASK \ + (GPU_U(0x1) << GPU_FAULTSTATUS_JASID_VALID_SHIFT) #define GPU_FAULTSTATUS_JASID_SHIFT 12 #define GPU_FAULTSTATUS_JASID_MASK (0xF << GPU_FAULTSTATUS_JASID_SHIFT) diff --git a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h index f86f493..387cd50 100644 --- a/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h +++ b/mali_kbase/gpu/backend/mali_kbase_gpu_regmap_jm.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -59,28 +59,27 @@ #define CORE_FEATURES 0x008 /* (RO) Shader Core Features */ #define JS_PRESENT 0x01C /* (RO) Job slots present */ - -#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory - * region base address, low word - */ -#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory - * region base address, high word - */ -#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter - * configuration - */ -#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable - * flags for Job Manager - */ -#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable - * flags for shader cores - */ -#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable - * flags for tiler - */ -#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable - * flags for MMU/L2 cache - */ +#define LATEST_FLUSH 0x038 /* (RO) Flush ID of latest + * clean-and-invalidate operation + */ +#define PRFCNT_BASE_LO 0x060 /* (RW) Performance counter memory + * region base address, low word + */ +#define PRFCNT_BASE_HI 0x064 /* (RW) Performance counter memory + * region base address, high word + */ +#define PRFCNT_CONFIG 0x068 /* (RW) Performance counter configuration */ +#define PRFCNT_JM_EN 0x06C /* (RW) Performance counter enable + * flags for Job Manager + */ +#define PRFCNT_SHADER_EN 0x070 /* (RW) Performance counter enable + * flags for shader cores */ +#define PRFCNT_TILER_EN 0x074 /* (RW) Performance counter enable + * flags for tiler + */ +#define PRFCNT_MMU_L2_EN 0x07C /* (RW) Performance counter enable + * flags for MMU/L2 cache + */ #define JS0_FEATURES 0x0C0 /* (RO) Features of job slot 0 */ #define JS1_FEATURES 0x0C4 /* (RO) Features of job slot 1 */ @@ -108,6 +107,8 @@ #define JOB_IRQ_JS_STATE 0x010 /* status==active and _next == busy snapshot from last JOB_IRQ_CLEAR */ #define JOB_IRQ_THROTTLE 0x014 /* cycles to delay delivering an interrupt externally. The JOB_IRQ_STATUS is NOT affected by this, just the delivery of the interrupt. */ +#define JOB_SLOT0 0x800 /* Configuration registers for job slot 0 */ +#define JOB_SLOT_REG(n, r) (JOB_CONTROL_REG(JOB_SLOT0 + ((n) << 7)) + (r)) #define JOB_SLOT1 0x880 /* Configuration registers for job slot 1 */ #define JOB_SLOT2 0x900 /* Configuration registers for job slot 2 */ #define JOB_SLOT3 0x980 /* Configuration registers for job slot 3 */ @@ -124,29 +125,41 @@ #define JOB_SLOT14 0xF00 /* Configuration registers for job slot 14 */ #define JOB_SLOT15 0xF80 /* Configuration registers for job slot 15 */ -#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ - -#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ -#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ - -#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */ - -#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ +/* JM Job control register definitions for mali_kbase_debug_job_fault */ +#define JS_HEAD_LO 0x00 /* (RO) Job queue head pointer for job slot n, low word */ +#define JS_HEAD_HI 0x04 /* (RO) Job queue head pointer for job slot n, high word */ +#define JS_TAIL_LO 0x08 /* (RO) Job queue tail pointer for job slot n, low word */ +#define JS_TAIL_HI 0x0C /* (RO) Job queue tail pointer for job slot n, high word */ +#define JS_AFFINITY_LO 0x10 /* (RO) Core affinity mask for job slot n, low word */ +#define JS_AFFINITY_HI 0x14 /* (RO) Core affinity mask for job slot n, high word */ +#define JS_CONFIG 0x18 /* (RO) Configuration settings for job slot n */ +#define JS_XAFFINITY 0x1C /* (RO) Extended affinity mask for job slot n*/ +#define JS_COMMAND 0x20 /* (WO) Command register for job slot n */ +#define JS_STATUS 0x24 /* (RO) Status register for job slot n */ +#define JS_HEAD_NEXT_LO 0x40 /* (RW) Next job queue head pointer for job slot n, low word */ +#define JS_HEAD_NEXT_HI 0x44 /* (RW) Next job queue head pointer for job slot n, high word */ +#define JS_AFFINITY_NEXT_LO 0x50 /* (RW) Next core affinity mask for job slot n, low word */ +#define JS_AFFINITY_NEXT_HI 0x54 /* (RW) Next core affinity mask for job slot n, high word */ +#define JS_CONFIG_NEXT 0x58 /* (RW) Next configuration settings for job slot n */ +#define JS_XAFFINITY_NEXT 0x5C /* (RW) Next extended affinity mask for job slot n */ +#define JS_COMMAND_NEXT 0x60 /* (RW) Next command register for job slot n */ + +#define JS_FLUSH_ID_NEXT 0x70 /* (RW) Next job slot n cache flush ID */ /* No JM-specific MMU control registers */ /* No JM-specific MMU address space control registers */ /* JS_COMMAND register commands */ -#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ -#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ -#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ -#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ -#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ -#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ -#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ - -#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ +#define JS_COMMAND_NOP 0x00 /* NOP Operation. Writing this value is ignored */ +#define JS_COMMAND_START 0x01 /* Start processing a job chain. Writing this value is ignored */ +#define JS_COMMAND_SOFT_STOP 0x02 /* Gently stop processing a job chain */ +#define JS_COMMAND_HARD_STOP 0x03 /* Rudely stop processing a job chain */ +#define JS_COMMAND_SOFT_STOP_0 0x04 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_HARD_STOP_0 0x05 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 0 */ +#define JS_COMMAND_SOFT_STOP_1 0x06 /* Execute SOFT_STOP if JOB_CHAIN_FLAG is 1 */ +#define JS_COMMAND_HARD_STOP_1 0x07 /* Execute HARD_STOP if JOB_CHAIN_FLAG is 1 */ + +#define JS_COMMAND_MASK 0x07 /* Mask of bits currently in use by the HW */ /* Possible values of JS_CONFIG and JS_CONFIG_NEXT registers */ #define JS_CONFIG_START_FLUSH_NO_ACTION (0u << 0) diff --git a/mali_kbase/gpu/mali_kbase_gpu.c b/mali_kbase/gpu/mali_kbase_gpu.c index 8a84ef5..eee670f 100644 --- a/mali_kbase/gpu/mali_kbase_gpu.c +++ b/mali_kbase/gpu/mali_kbase_gpu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -32,7 +32,7 @@ const char *kbase_gpu_access_type_name(u32 fault_status) return "READ"; case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: return "WRITE"; - case AS_FAULTSTATUS_ACCESS_TYPE_EX: + case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE: return "EXECUTE"; default: WARN_ON(1); diff --git a/mali_kbase/gpu/mali_kbase_gpu_regmap.h b/mali_kbase/gpu/mali_kbase_gpu_regmap.h index e51791f..a92b498 100644 --- a/mali_kbase/gpu/mali_kbase_gpu_regmap.h +++ b/mali_kbase/gpu/mali_kbase_gpu_regmap.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -25,6 +25,7 @@ #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_regmap.h> #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_coherency.h> #include <uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h> + #if MALI_USE_CSF #include "backend/mali_kbase_gpu_regmap_csf.h" #else @@ -42,19 +43,29 @@ #define GPU_ULL(x) x##ull #endif /* __ASSEMBLER__ */ + /* Begin Register Offsets */ /* GPU control registers */ +#define GPU_CONTROL_BASE 0x0000 +#define GPU_CONTROL_REG(r) (GPU_CONTROL_BASE + (r)) + +#define GPU_ID 0x000 /* (RO) GPU and revision identifier */ #define L2_FEATURES 0x004 /* (RO) Level 2 cache features */ #define TILER_FEATURES 0x00C /* (RO) Tiler Features */ #define MEM_FEATURES 0x010 /* (RO) Memory system features */ #define MMU_FEATURES 0x014 /* (RO) MMU features */ #define AS_PRESENT 0x018 /* (RO) Address space slots present */ #define GPU_IRQ_RAWSTAT 0x020 /* (RW) */ +#define GPU_IRQ_CLEAR 0x024 /* (WO) */ #define GPU_IRQ_MASK 0x028 /* (RW) */ - +#define GPU_IRQ_STATUS 0x02C /* (RO) */ #define GPU_COMMAND 0x030 /* (WO) */ + #define GPU_STATUS 0x034 /* (RO) */ +#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ +#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ +#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ #define GPU_DBGEN (1 << 8) /* DBGEN wire status */ @@ -64,10 +75,9 @@ #define L2_CONFIG 0x048 /* (RW) Level 2 cache configuration */ -#define GROUPS_L2_COHERENT (1 << 0) /* Cores groups are l2 coherent */ -#define SUPER_L2_COHERENT (1 << 1) /* Shader cores within a core - * supergroup are l2 coherent - */ +/* Cores groups are l2 coherent */ +#define MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT GPU_U(0) +#define MEM_FEATURES_COHERENT_CORE_GROUP_MASK (GPU_U(0x1) << MEM_FEATURES_COHERENT_CORE_GROUP_SHIFT) #define PWR_KEY 0x050 /* (WO) Power manager key register */ #define PWR_OVERRIDE0 0x054 /* (RW) Power manager override settings */ @@ -95,10 +105,10 @@ #define TEXTURE_FEATURES_REG(n) GPU_CONTROL_REG(TEXTURE_FEATURES_0 + ((n) << 2)) -#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */ -#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */ -#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */ -#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */ +#define GPU_COMMAND_ARG0_LO 0x0D0 /* (RW) Additional parameter 0 for GPU commands, low word */ +#define GPU_COMMAND_ARG0_HI 0x0D4 /* (RW) Additional parameter 0 for GPU commands, high word */ +#define GPU_COMMAND_ARG1_LO 0x0D8 /* (RW) Additional parameter 1 for GPU commands, low word */ +#define GPU_COMMAND_ARG1_HI 0x0DC /* (RW) Additional parameter 1 for GPU commands, high word */ #define SHADER_PRESENT_LO 0x100 /* (RO) Shader core present bitmap, low word */ #define SHADER_PRESENT_HI 0x104 /* (RO) Shader core present bitmap, high word */ @@ -109,14 +119,32 @@ #define L2_PRESENT_LO 0x120 /* (RO) Level 2 cache present bitmap, low word */ #define L2_PRESENT_HI 0x124 /* (RO) Level 2 cache present bitmap, high word */ +#define SHADER_READY_LO 0x140 /* (RO) Shader core ready bitmap, low word */ +#define SHADER_READY_HI 0x144 /* (RO) Shader core ready bitmap, high word */ + +#define TILER_READY_LO 0x150 /* (RO) Tiler core ready bitmap, low word */ +#define TILER_READY_HI 0x154 /* (RO) Tiler core ready bitmap, high word */ + +#define L2_READY_LO 0x160 /* (RO) Level 2 cache ready bitmap, low word */ +#define L2_READY_HI 0x164 /* (RO) Level 2 cache ready bitmap, high word */ + +#define SHADER_PWRON_LO 0x180 /* (WO) Shader core power on bitmap, low word */ +#define SHADER_PWRON_HI 0x184 /* (WO) Shader core power on bitmap, high word */ + +#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */ + +#define TILER_PWRON_LO 0x190 /* (WO) Tiler core power on bitmap, low word */ +#define TILER_PWRON_HI 0x194 /* (WO) Tiler core power on bitmap, high word */ + +#define L2_PWRON_LO 0x1A0 /* (WO) Level 2 cache power on bitmap, low word */ +#define L2_PWRON_HI 0x1A4 /* (WO) Level 2 cache power on bitmap, high word */ + #define STACK_PRESENT_LO 0xE00 /* (RO) Core stack present bitmap, low word */ #define STACK_PRESENT_HI 0xE04 /* (RO) Core stack present bitmap, high word */ #define STACK_READY_LO 0xE10 /* (RO) Core stack ready bitmap, low word */ #define STACK_READY_HI 0xE14 /* (RO) Core stack ready bitmap, high word */ -#define SHADER_PWRFEATURES 0x188 /* (RW) Shader core power features */ - #define STACK_PWRON_LO 0xE20 /* (RO) Core stack power on bitmap, low word */ #define STACK_PWRON_HI 0xE24 /* (RO) Core stack power on bitmap, high word */ @@ -173,10 +201,25 @@ /* Job control registers */ +#define JOB_CONTROL_BASE 0x1000 +#define JOB_CONTROL_REG(r) (JOB_CONTROL_BASE + (r)) + #define JOB_IRQ_RAWSTAT 0x000 /* Raw interrupt status register */ +#define JOB_IRQ_CLEAR 0x004 /* Interrupt clear register */ +#define JOB_IRQ_MASK 0x008 /* Interrupt mask register */ +#define JOB_IRQ_STATUS 0x00C /* Interrupt status register */ /* MMU control registers */ +#define MMU_CONTROL_BASE 0x2000 +#define MMU_CONTROL_REG(r) (MMU_CONTROL_BASE + (r)) + +#define MMU_IRQ_RAWSTAT 0x000 /* (RW) Raw interrupt status register */ +#define MMU_IRQ_CLEAR 0x004 /* (WO) Interrupt clear register */ +#define MMU_IRQ_MASK 0x008 /* (RW) Interrupt mask register */ +#define MMU_IRQ_STATUS 0x00C /* (RO) Interrupt status register */ + +#define MMU_AS0 0x400 /* Configuration registers for address space 0 */ #define MMU_AS1 0x440 /* Configuration registers for address space 1 */ #define MMU_AS2 0x480 /* Configuration registers for address space 2 */ #define MMU_AS3 0x4C0 /* Configuration registers for address space 3 */ @@ -194,17 +237,27 @@ #define MMU_AS15 0x7C0 /* Configuration registers for address space 15 */ /* MMU address space control registers */ -#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ -#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ -#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ -#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ -#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ -#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ - -/* (RO) Secondary fault address for address space n, low word */ -#define AS_FAULTEXTRA_LO 0x38 -/* (RO) Secondary fault address for address space n, high word */ -#define AS_FAULTEXTRA_HI 0x3C + +#define MMU_STAGE1 0x2000 /* () MMU control registers */ +#define MMU_STAGE1_REG(r) (MMU_STAGE1 + (r)) + +#define MMU_AS_REG(n, r) (MMU_AS0 + ((n) << 6) + (r)) + +#define AS_TRANSTAB_LO 0x00 /* (RW) Translation Table Base Address for address space n, low word */ +#define AS_TRANSTAB_HI 0x04 /* (RW) Translation Table Base Address for address space n, high word */ +#define AS_MEMATTR_LO 0x08 /* (RW) Memory attributes for address space n, low word. */ +#define AS_MEMATTR_HI 0x0C /* (RW) Memory attributes for address space n, high word. */ +#define AS_LOCKADDR_LO 0x10 /* (RW) Lock region address for address space n, low word */ +#define AS_LOCKADDR_HI 0x14 /* (RW) Lock region address for address space n, high word */ +#define AS_COMMAND 0x18 /* (WO) MMU command register for address space n */ +#define AS_FAULTSTATUS 0x1C /* (RO) MMU fault status register for address space n */ +#define AS_FAULTADDRESS_LO 0x20 /* (RO) Fault Address for address space n, low word */ +#define AS_FAULTADDRESS_HI 0x24 /* (RO) Fault Address for address space n, high word */ +#define AS_STATUS 0x28 /* (RO) Status flags for address space n */ +#define AS_TRANSCFG_LO 0x30 /* (RW) Translation table configuration for address space n, low word */ +#define AS_TRANSCFG_HI 0x34 /* (RW) Translation table configuration for address space n, high word */ +#define AS_FAULTEXTRA_LO 0x38 /* (RO) Secondary fault address for address space n, low word */ +#define AS_FAULTEXTRA_HI 0x3C /* (RO) Secondary fault address for address space n, high word */ /* End Register Offsets */ @@ -254,7 +307,7 @@ (((reg_val)&AS_FAULTSTATUS_ACCESS_TYPE_MASK) >> AS_FAULTSTATUS_ACCESS_TYPE_SHIFT) #define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0) -#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1) +#define AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE (0x1) #define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2) #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3) @@ -336,11 +389,6 @@ (((reg_val) & ~AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK) | \ ((value << AS_LOCKADDR_FLUSH_SKIP_LEVELS_SHIFT) & AS_LOCKADDR_FLUSH_SKIP_LEVELS_MASK)) -/* GPU_STATUS values */ -#define GPU_STATUS_PRFCNT_ACTIVE (1 << 2) /* Set if the performance counters are active. */ -#define GPU_STATUS_CYCLE_COUNT_ACTIVE (1 << 6) /* Set if the cycle counter is active. */ -#define GPU_STATUS_PROTECTED_MODE_ACTIVE (1 << 7) /* Set if protected mode is active */ - /* PRFCNT_CONFIG register values */ #define PRFCNT_CONFIG_MODE_SHIFT 0 /* Counter mode position. */ #define PRFCNT_CONFIG_AS_SHIFT 4 /* Address space bitmap position. */ @@ -452,16 +500,6 @@ (((reg_val) & ~AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK) | \ (((value) << AMBA_FEATURES_MEMORY_CACHE_SUPPORT_SHIFT) & \ AMBA_FEATURES_MEMORY_CACHE_SUPPORT_MASK)) -#define AMBA_FEATURES_INVALIDATE_HINT_SHIFT GPU_U(6) -#define AMBA_FEATURES_INVALIDATE_HINT_MASK \ - (GPU_U(0x1) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) -#define AMBA_FEATURES_INVALIDATE_HINT_GET(reg_val) \ - (((reg_val)&AMBA_FEATURES_INVALIDATE_HINT_MASK) >> \ - AMBA_FEATURES_INVALIDATE_HINT_SHIFT) -#define AMBA_FEATURES_INVALIDATE_HINT_SET(reg_val, value) \ - (((reg_val) & ~AMBA_FEATURES_INVALIDATE_HINT_MASK) | \ - (((value) << AMBA_FEATURES_INVALIDATE_HINT_SHIFT) & \ - AMBA_FEATURES_INVALIDATE_HINT_MASK)) /* AMBA_ENABLE register */ #define AMBA_ENABLE_COHERENCY_PROTOCOL_SHIFT GPU_U(0) @@ -489,16 +527,6 @@ (((reg_val) & ~AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK) | \ (((value) << AMBA_ENABLE_MEMORY_CACHE_SUPPORT_SHIFT) & \ AMBA_ENABLE_MEMORY_CACHE_SUPPORT_MASK)) -#define AMBA_ENABLE_INVALIDATE_HINT_SHIFT GPU_U(6) -#define AMBA_ENABLE_INVALIDATE_HINT_MASK \ - (GPU_U(0x1) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) -#define AMBA_ENABLE_INVALIDATE_HINT_GET(reg_val) \ - (((reg_val)&AMBA_ENABLE_INVALIDATE_HINT_MASK) >> \ - AMBA_ENABLE_INVALIDATE_HINT_SHIFT) -#define AMBA_ENABLE_INVALIDATE_HINT_SET(reg_val, value) \ - (((reg_val) & ~AMBA_ENABLE_INVALIDATE_HINT_MASK) | \ - (((value) << AMBA_ENABLE_INVALIDATE_HINT_SHIFT) & \ - AMBA_ENABLE_INVALIDATE_HINT_MASK)) /* IDVS_GROUP register */ #define IDVS_GROUP_SIZE_SHIFT (16) diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c index 9a409f6..c8cf934 100644 --- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c +++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c @@ -347,7 +347,7 @@ static int kbasep_hwcnt_backend_csf_if_fw_ring_buf_alloc( /* Update MMU table */ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, phys, num_pages, flags, MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW, - mmu_sync_info, NULL, false); + mmu_sync_info, NULL); if (ret) goto mmu_insert_failed; @@ -480,10 +480,10 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c if (fw_ring_buf->phys) { u64 gpu_va_base = KBASE_HWC_CSF_RING_BUFFER_VA_START; - WARN_ON(kbase_mmu_teardown_pages(fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, - gpu_va_base >> PAGE_SHIFT, fw_ring_buf->phys, - fw_ring_buf->num_pages, fw_ring_buf->num_pages, - MCU_AS_NR, true)); + WARN_ON(kbase_mmu_teardown_firmware_pages( + fw_ctx->kbdev, &fw_ctx->kbdev->csf.mcu_mmu, gpu_va_base >> PAGE_SHIFT, + fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages, + MCU_AS_NR)); vunmap(fw_ring_buf->cpu_dump_base); diff --git a/mali_kbase/jm/mali_kbase_jm_defs.h b/mali_kbase/jm/mali_kbase_jm_defs.h index 639b35f..e694f9f 100644 --- a/mali_kbase/jm/mali_kbase_jm_defs.h +++ b/mali_kbase/jm/mali_kbase_jm_defs.h @@ -140,15 +140,17 @@ * @JM_DEFAULT_JS_FREE_TIMEOUT: Maximum timeout to wait for JS_COMMAND_NEXT * to be updated on HW side so a Job Slot is * considered free. - * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. Must be last in - * the enum. + * @KBASE_TIMEOUT_SELECTOR_COUNT: Number of timeout selectors. + * @KBASE_DEFAULT_TIMEOUT: Fallthrough in case an invalid timeout is + * passed. */ enum kbase_timeout_selector { MMU_AS_INACTIVE_WAIT_TIMEOUT, JM_DEFAULT_JS_FREE_TIMEOUT, /* Must be the last in the enum */ - KBASE_TIMEOUT_SELECTOR_COUNT + KBASE_TIMEOUT_SELECTOR_COUNT, + KBASE_DEFAULT_TIMEOUT = JM_DEFAULT_JS_FREE_TIMEOUT }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -862,10 +864,6 @@ struct jsctx_queue { * @pf_data: Data relating to Page fault. * @bf_data: Data relating to Bus fault. * @current_setup: Stores the MMU configuration for this address space. - * @is_unresponsive: Flag to indicate MMU is not responding. - * Set if a MMU command isn't completed within - * &kbase_device:mmu_as_inactive_wait_time_ms. - * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. */ struct kbase_as { int number; @@ -875,7 +873,6 @@ struct kbase_as { struct kbase_fault pf_data; struct kbase_fault bf_data; struct kbase_mmu_setup current_setup; - bool is_unresponsive; }; #endif /* _KBASE_JM_DEFS_H_ */ diff --git a/mali_kbase/jm/mali_kbase_js_defs.h b/mali_kbase/jm/mali_kbase_js_defs.h index 5023eaa..009ff02 100644 --- a/mali_kbase/jm/mali_kbase_js_defs.h +++ b/mali_kbase/jm/mali_kbase_js_defs.h @@ -342,6 +342,30 @@ struct kbasep_js_device_data { * * the kbasep_js_kctx_info::runpool substructure */ struct mutex runpool_mutex; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics_timer: High-resolution timer used to periodically emit the GPU metrics + * tracepoints for applications that are using the GPU. The timer is + * needed for the long duration handling so that the length of work + * period is within the allowed limit. + */ + struct hrtimer gpu_metrics_timer; + + /** + * @gpu_metrics_timer_needed: Flag to indicate if the @gpu_metrics_timer is needed. + * The timer won't be started after the expiry if the flag + * isn't set. + */ + bool gpu_metrics_timer_needed; + + /** + * @gpu_metrics_timer_running: Flag to indicate if the @gpu_metrics_timer is running. + * The flag is set to false when the timer is cancelled or + * is not restarted after the expiry. + */ + bool gpu_metrics_timer_running; +#endif }; /** diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h index 11aedef..724145f 100644 --- a/mali_kbase/mali_base_hwconfig_features.h +++ b/mali_kbase/mali_base_hwconfig_features.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -21,7 +21,7 @@ /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/hwconfig_generator/README + * For more information see base/tools/docs/hwconfig_generator.md */ #ifndef _BASE_HWCONFIG_FEATURES_H_ diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h index 0fbdec0..91b9b83 100644 --- a/mali_kbase/mali_base_hwconfig_issues.h +++ b/mali_kbase/mali_base_hwconfig_issues.h @@ -21,7 +21,7 @@ /* AUTOMATICALLY GENERATED FILE. If you want to amend the issues/features, * please update base/tools/hwconfig_generator/hwc_{issues,features}.py - * For more information see base/tools/hwconfig_generator/README + * For more information see base/tools/docs/hwconfig_generator.md */ #ifndef _BASE_HWCONFIG_ISSUES_H_ diff --git a/mali_kbase/mali_kbase.h b/mali_kbase/mali_kbase.h index 7de793c..c39ba99 100644 --- a/mali_kbase/mali_kbase.h +++ b/mali_kbase/mali_kbase.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -52,6 +52,7 @@ #include <uapi/gpu/arm/midgard/mali_base_kernel.h> #include <mali_kbase_linux.h> +#include <linux/version_compat_defs.h> /* * Include mali_kbase_defs.h first as this provides types needed by other local @@ -61,9 +62,7 @@ #include "debug/mali_kbase_debug_ktrace.h" #include "context/mali_kbase_context.h" -#include "mali_kbase_strings.h" #include "mali_kbase_mem_lowlevel.h" -#include "mali_kbase_utility.h" #include "mali_kbase_mem.h" #include "mmu/mali_kbase_mmu.h" #include "mali_kbase_gpu_memory_debugfs.h" @@ -87,6 +86,9 @@ #include "mali_linux_trace.h" +#define KBASE_DRV_NAME "mali" +#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" + #if MALI_USE_CSF #include "csf/mali_kbase_csf.h" @@ -462,9 +464,9 @@ void kbasep_as_do_poke(struct work_struct *work); * * @kbdev: The kbase device structure for the device * - * The caller should ensure that either kbdev->pm.active_count_lock is held, or - * a dmb was executed recently (to ensure the value is most - * up-to-date). However, without a lock the value could change afterwards. + * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held, + * or a dmb was executed recently (to ensure the value is most up-to-date). + * However, without a lock the value could change afterwards. * * Return: * * false if a suspend is not in progress @@ -475,6 +477,22 @@ static inline bool kbase_pm_is_suspending(struct kbase_device *kbdev) return kbdev->pm.suspending; } +/** + * kbase_pm_is_resuming - Check whether System resume of GPU device is in progress. + * + * @kbdev: The kbase device structure for the device + * + * The caller should ensure that either kbase_device::kbase_pm_device_data::lock is held, + * or a dmb was executed recently (to ensure the value is most up-to-date). + * However, without a lock the value could change afterwards. + * + * Return: true if System resume is in progress, otherwise false. + */ +static inline bool kbase_pm_is_resuming(struct kbase_device *kbdev) +{ + return kbdev->pm.resuming; +} + #ifdef CONFIG_MALI_ARBITER_SUPPORT /* * Check whether a gpu lost is in progress @@ -528,9 +546,11 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev) } /** - * kbase_pm_lowest_gpu_freq_init() - Find the lowest frequency that the GPU can - * run as using the device tree, and save this - * within kbdev. + * kbase_pm_gpu_freq_init() - Find the lowest frequency that the GPU can + * run as using the device tree, then query the + * GPU properties to find out the highest GPU + * frequency and store both of them within the + * @kbase_device. * @kbdev: Pointer to kbase device. * * This function could be called from kbase_clk_rate_trace_manager_init, @@ -538,9 +558,9 @@ static inline bool kbase_pm_is_active(struct kbase_device *kbdev) * dev_pm_opp_of_add_table() has been called to initialize the OPP table, * which occurs in power_control_init(). * - * Return: 0 in any case. + * Return: 0 on success, negative error code on failure. */ -int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev); +int kbase_pm_gpu_freq_init(struct kbase_device *kbdev); /** * kbase_pm_metrics_start - Start the utilization metrics timer @@ -808,4 +828,108 @@ void kbase_destroy_kworker_stack(struct kthread_worker *worker); #define UINT64_MAX ((uint64_t)0xFFFFFFFFFFFFFFFFULL) #endif +/** + * kbase_file_fops_count() - Get the kfile::fops_count value + * + * @kfile: Pointer to the object representing the mali device file. + * + * The value is read with kfile::lock held. + * + * Return: sampled value of kfile::fops_count. + */ +static inline u32 kbase_file_fops_count(struct kbase_file *kfile) +{ + u32 fops_count; + + spin_lock(&kfile->lock); + fops_count = kfile->fops_count; + spin_unlock(&kfile->lock); + + return fops_count; +} + +/** + * kbase_file_inc_fops_count_unless_closed() - Increment the kfile::fops_count value if the + * kfile::owner is still set. + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * Return: true if the increment was done otherwise false. + */ +static inline bool kbase_file_inc_fops_count_unless_closed(struct kbase_file *kfile) +{ + bool count_incremented = false; + + spin_lock(&kfile->lock); + if (kfile->owner) { + kfile->fops_count++; + count_incremented = true; + } + spin_unlock(&kfile->lock); + + return count_incremented; +} + +/** + * kbase_file_dec_fops_count() - Decrement the kfile::fops_count value + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * This function shall only be called to decrement kfile::fops_count if a successful call + * to kbase_file_inc_fops_count_unless_closed() was made previously by the current thread. + * + * The function would enqueue the kfile::destroy_kctx_work if the process that originally + * created the file instance has closed its copy and no Kbase handled file operations are + * in progress and no memory mappings are present for the file instance. + */ +static inline void kbase_file_dec_fops_count(struct kbase_file *kfile) +{ + spin_lock(&kfile->lock); + WARN_ON_ONCE(kfile->fops_count <= 0); + kfile->fops_count--; + if (unlikely(!kfile->fops_count && !kfile->owner && !kfile->map_count)) { + queue_work(system_wq, &kfile->destroy_kctx_work); +#if IS_ENABLED(CONFIG_DEBUG_FS) + wake_up(&kfile->zero_fops_count_wait); +#endif + } + spin_unlock(&kfile->lock); +} + +/** + * kbase_file_inc_cpu_mapping_count() - Increment the kfile::map_count value. + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * This function shall be called when the memory mapping on /dev/malixx device file + * instance is created. The kbase_file::setup_state shall be KBASE_FILE_COMPLETE. + */ +static inline void kbase_file_inc_cpu_mapping_count(struct kbase_file *kfile) +{ + spin_lock(&kfile->lock); + kfile->map_count++; + spin_unlock(&kfile->lock); +} + +/** + * kbase_file_dec_cpu_mapping_count() - Decrement the kfile::map_count value + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * This function is called to decrement kfile::map_count value when the memory mapping + * on /dev/malixx device file is closed. + * The function would enqueue the kfile::destroy_kctx_work if the process that originally + * created the file instance has closed its copy and there are no mappings present and no + * Kbase handled file operations are in progress for the file instance. + */ +static inline void kbase_file_dec_cpu_mapping_count(struct kbase_file *kfile) +{ + spin_lock(&kfile->lock); + WARN_ON_ONCE(kfile->map_count <= 0); + kfile->map_count--; + if (unlikely(!kfile->map_count && !kfile->owner && !kfile->fops_count)) + queue_work(system_wq, &kfile->destroy_kctx_work); + spin_unlock(&kfile->lock); +} + #endif diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h index c99ad52..fa73612 100644 --- a/mali_kbase/mali_kbase_config_defaults.h +++ b/mali_kbase/mali_kbase_config_defaults.h @@ -183,6 +183,7 @@ enum { * * This is also the default timeout to be used when an invalid timeout * selector is used to retrieve the timeout on CSF GPUs. + * This shouldn't be used as a timeout for the CSG suspend request. * * Based on 75000ms timeout at nominal 100MHz, as is required for Android - based * on scaling from a 50MHz GPU system. @@ -196,17 +197,16 @@ enum { */ #define CSF_PM_TIMEOUT_CYCLES (250000000) -/* Waiting timeout in clock cycles for GPU reset to complete. +/* Waiting timeout in clock cycles for a CSG to be suspended. * - * Based on 2500ms timeout at 100MHz, scaled from a 50MHz GPU system + * Based on 30s timeout at 100MHz, scaled from 5s at 600Mhz GPU frequency. + * More cycles (1s @ 100Mhz = 100000000) are added up to ensure that + * host timeout is always bigger than FW timeout. */ -#define CSF_GPU_RESET_TIMEOUT_CYCLES (250000000) +#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (3100000000ull) -/* Waiting timeout in clock cycles for all active CSGs to be suspended. - * - * Based on 1500ms timeout at 100MHz, scaled from a 50MHz GPU system. - */ -#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (150000000) +/* Waiting timeout in clock cycles for GPU reset to complete. */ +#define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2) /* Waiting timeout in clock cycles for GPU firmware to boot. * @@ -220,6 +220,19 @@ enum { */ #define CSF_FIRMWARE_PING_TIMEOUT_CYCLES (600000000ull) +/* Waiting timeout for a KCPU queue's fence signal blocked to long, in clock cycles. + * + * Based on 10s timeout at 100MHz, scaled from a 50MHz GPU system. + */ +#define KCPU_FENCE_SIGNAL_TIMEOUT_CYCLES (1000000000ull) + +/* Waiting timeout for task execution on an endpoint. Based on the + * DEFAULT_PROGRESS_TIMEOUT. + * + * Based on 25s timeout at 100Mhz, scaled from a 500MHz GPU system. + */ +#define DEFAULT_PROGRESS_TIMEOUT_CYCLES (2500000000ull) + #else /* MALI_USE_CSF */ /* A default timeout in clock cycles to be used when an invalid timeout @@ -242,7 +255,7 @@ enum { */ #define JM_DEFAULT_JS_FREE_TIMEOUT_CYCLES (100000) -#endif /* MALI_USE_CSF */ +#endif /* !MALI_USE_CSF */ /* Default timeslice that a context is scheduled in for, in nanoseconds. * @@ -286,4 +299,10 @@ enum { * It corresponds to 0.5s in GPU @ 100Mhz. */ #define MMU_AS_INACTIVE_WAIT_TIMEOUT_CYCLES ((u64)50 * 1024 * 1024) + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/* Default value of the time interval at which GPU metrics tracepoints are emitted. */ +#define DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS (500000000u) /* 500 ms */ +#endif + #endif /* _KBASE_CONFIG_DEFAULTS_H_ */ diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c index c31994c..28cbcdb 100644 --- a/mali_kbase/mali_kbase_core_linux.c +++ b/mali_kbase/mali_kbase_core_linux.c @@ -170,6 +170,8 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA static struct mutex kbase_probe_mutex; #endif +static void kbase_file_destroy_kctx_worker(struct work_struct *work); + /** * mali_kbase_supports_cap - Query whether a kbase capability is supported * @@ -274,6 +276,8 @@ void kbase_destroy_kworker_stack(struct kthread_worker *worker) * * Return: Address of an object representing a simulated device file, or NULL * on failure. + * + * Note: This function always gets called in Userspace context. */ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, struct file *const filp) @@ -286,6 +290,16 @@ static struct kbase_file *kbase_file_new(struct kbase_device *const kbdev, kfile->kctx = NULL; kfile->api_version = 0; atomic_set(&kfile->setup_state, KBASE_FILE_NEED_VSN); + /* Store the pointer to the file table structure of current process. */ + kfile->owner = current->files; + INIT_WORK(&kfile->destroy_kctx_work, kbase_file_destroy_kctx_worker); + spin_lock_init(&kfile->lock); + kfile->fops_count = 0; + kfile->map_count = 0; + typecheck(typeof(kfile->map_count), typeof(current->mm->map_count)); +#if IS_ENABLED(CONFIG_DEBUG_FS) + init_waitqueue_head(&kfile->zero_fops_count_wait); +#endif } return kfile; } @@ -366,6 +380,33 @@ static int kbase_file_create_kctx(struct kbase_file *kfile, base_context_create_flags flags); /** + * kbase_file_inc_fops_count_if_allowed - Increment the kfile::fops_count value if the file + * operation is allowed for the current process. + * + * @kfile: Pointer to the object representing the /dev/malixx device file instance. + * + * The function shall be called at the beginning of certain file operation methods + * implemented for @kbase_fops, like ioctl, poll, read and mmap. + * + * kbase_file_dec_fops_count() shall be called if the increment was done. + * + * Return: true if the increment was done otherwise false. + * + * Note: This function shall always be called in Userspace context. + */ +static bool kbase_file_inc_fops_count_if_allowed(struct kbase_file *const kfile) +{ + /* Disallow file operations from the other process that shares the instance + * of /dev/malixx file i.e. 'kfile' or disallow file operations if parent + * process has closed the file instance. + */ + if (unlikely(kfile->owner != current->files)) + return false; + + return kbase_file_inc_fops_count_unless_closed(kfile); +} + +/** * kbase_file_get_kctx_if_setup_complete - Get a kernel base context * pointer from a device file * @@ -377,6 +418,8 @@ static int kbase_file_create_kctx(struct kbase_file *kfile, * * Return: Address of the kernel base context associated with the @kfile, or * NULL if no context exists. + * + * Note: This function shall always be called in Userspace context. */ static struct kbase_context *kbase_file_get_kctx_if_setup_complete( struct kbase_file *const kfile) @@ -390,37 +433,103 @@ static struct kbase_context *kbase_file_get_kctx_if_setup_complete( } /** - * kbase_file_delete - Destroy an object representing a device file + * kbase_file_destroy_kctx - Destroy the Kbase context created for @kfile. * * @kfile: A device file created by kbase_file_new() - * - * If any context was created for the @kfile then it is destroyed. */ -static void kbase_file_delete(struct kbase_file *const kfile) +static void kbase_file_destroy_kctx(struct kbase_file *const kfile) { - struct kbase_device *kbdev = NULL; - - if (WARN_ON(!kfile)) + if (atomic_cmpxchg(&kfile->setup_state, KBASE_FILE_COMPLETE, + KBASE_FILE_DESTROY_CTX) != KBASE_FILE_COMPLETE) return; - kfile->filp->private_data = NULL; - kbdev = kfile->kbdev; - - if (atomic_read(&kfile->setup_state) == KBASE_FILE_COMPLETE) { - struct kbase_context *kctx = kfile->kctx; - #if IS_ENABLED(CONFIG_DEBUG_FS) - kbasep_mem_profile_debugfs_remove(kctx); + kbasep_mem_profile_debugfs_remove(kfile->kctx); + kbase_context_debugfs_term(kfile->kctx); #endif - kbase_context_debugfs_term(kctx); - kbase_destroy_context(kctx); + kbase_destroy_context(kfile->kctx); + dev_dbg(kfile->kbdev->dev, "Deleted kbase context"); +} + +/** + * kbase_file_destroy_kctx_worker - Work item to destroy the Kbase context. + * + * @work: Pointer to the kfile::destroy_kctx_work. + * + * The work item shall only be enqueued if the context termination could not + * be done from @kbase_flush(). + */ +static void kbase_file_destroy_kctx_worker(struct work_struct *work) +{ + struct kbase_file *kfile = + container_of(work, struct kbase_file, destroy_kctx_work); + + WARN_ON_ONCE(kfile->owner); + WARN_ON_ONCE(kfile->map_count); + WARN_ON_ONCE(kfile->fops_count); + + kbase_file_destroy_kctx(kfile); +} + +/** + * kbase_file_destroy_kctx_on_flush - Try destroy the Kbase context from the flush() + * method of @kbase_fops. + * + * @kfile: A device file created by kbase_file_new() + */ +static void kbase_file_destroy_kctx_on_flush(struct kbase_file *const kfile) +{ + bool can_destroy_context = false; + + spin_lock(&kfile->lock); + kfile->owner = NULL; + /* To destroy the context from flush() method, unlike the release() + * method, need to synchronize manually against the other threads in + * the current process that could be operating on the /dev/malixx file. + * + * Only destroy the context if all the memory mappings on the + * /dev/malixx file instance have been closed. If there are mappings + * present then the context would be destroyed later when the last + * mapping is closed. + * Also, only destroy the context if no file operations are in progress. + */ + can_destroy_context = !kfile->map_count && !kfile->fops_count; + spin_unlock(&kfile->lock); - dev_dbg(kbdev->dev, "deleted base context\n"); + if (likely(can_destroy_context)) { + WARN_ON_ONCE(work_pending(&kfile->destroy_kctx_work)); + kbase_file_destroy_kctx(kfile); } +} - kbase_release_device(kbdev); +/** + * kbase_file_delete - Destroy an object representing a device file + * + * @kfile: A device file created by kbase_file_new() + * + * If any context was created for the @kfile and is still alive, then it is destroyed. + */ +static void kbase_file_delete(struct kbase_file *const kfile) +{ + if (WARN_ON(!kfile)) + return; + + /* All the CPU mappings on the device file should have been closed */ + WARN_ON_ONCE(kfile->map_count); +#if IS_ENABLED(CONFIG_DEBUG_FS) + /* There could still be file operations due to the debugfs file (mem_view) */ + wait_event(kfile->zero_fops_count_wait, !kbase_file_fops_count(kfile)); +#else + /* There shall not be any file operations in progress on the device file */ + WARN_ON_ONCE(kfile->fops_count); +#endif + kfile->filp->private_data = NULL; + cancel_work_sync(&kfile->destroy_kctx_work); + /* Destroy the context if it wasn't done earlier from the flush() method. */ + kbase_file_destroy_kctx(kfile); + kbase_release_device(kfile->kbdev); kfree(kfile); } @@ -676,7 +785,7 @@ static int kbase_file_create_kctx(struct kbase_file *const kfile, kbdev = kfile->kbdev; kctx = kbase_create_context(kbdev, in_compat_syscall(), - flags, kfile->api_version, kfile->filp); + flags, kfile->api_version, kfile); /* if bad flags, will stay stuck in setup mode */ if (!kctx) @@ -762,6 +871,36 @@ static int kbase_release(struct inode *inode, struct file *filp) return 0; } +/** + * kbase_flush - Function implementing the flush() method of @kbase_fops. + * + * @filp: Pointer to the /dev/malixx device file instance. + * @id: Pointer to the file table structure of current process. + * If @filp is being shared by multiple processes then @id can differ + * from kfile::owner. + * + * This function is called everytime the copy of @filp is closed. So if 3 processes + * are sharing the @filp then this function would be called 3 times and only after + * that kbase_release() would get called. + * + * Return: 0 if successful, otherwise a negative error code. + * + * Note: This function always gets called in Userspace context when the + * file is closed. + */ +static int kbase_flush(struct file *filp, fl_owner_t id) +{ + struct kbase_file *const kfile = filp->private_data; + + /* Try to destroy the context if the flush() method has been called for the + * process that created the instance of /dev/malixx file i.e. 'kfile'. + */ + if (kfile->owner == id) + kbase_file_destroy_kctx_on_flush(kfile); + + return 0; +} + static int kbase_api_set_flags(struct kbase_file *kfile, struct kbase_ioctl_set_flags *flags) { @@ -1485,6 +1624,7 @@ static int kbasep_cs_queue_group_create_1_6( struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create_1_6 *create) { + int ret, i; union kbase_ioctl_cs_queue_group_create new_create = { .in = { .tiler_mask = create->in.tiler_mask, @@ -1498,16 +1638,61 @@ static int kbasep_cs_queue_group_create_1_6( .compute_max = create->in.compute_max, } }; - int ret = kbase_csf_queue_group_create(kctx, &new_create); + for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { + if (create->in.padding[i] != 0) { + dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + return -EINVAL; + } + } + + ret = kbase_csf_queue_group_create(kctx, &new_create); + + create->out.group_handle = new_create.out.group_handle; + create->out.group_uid = new_create.out.group_uid; + + return ret; +} + +static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx, + union kbase_ioctl_cs_queue_group_create_1_18 *create) +{ + int ret, i; + union kbase_ioctl_cs_queue_group_create + new_create = { .in = { + .tiler_mask = create->in.tiler_mask, + .fragment_mask = create->in.fragment_mask, + .compute_mask = create->in.compute_mask, + .cs_min = create->in.cs_min, + .priority = create->in.priority, + .tiler_max = create->in.tiler_max, + .fragment_max = create->in.fragment_max, + .compute_max = create->in.compute_max, + .csi_handlers = create->in.csi_handlers, + .dvs_buf = create->in.dvs_buf, + } }; + + for (i = 0; i < ARRAY_SIZE(create->in.padding); i++) { + if (create->in.padding[i] != 0) { + dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n"); + return -EINVAL; + } + } + + ret = kbase_csf_queue_group_create(kctx, &new_create); create->out.group_handle = new_create.out.group_handle; create->out.group_uid = new_create.out.group_uid; return ret; } + static int kbasep_cs_queue_group_create(struct kbase_context *kctx, union kbase_ioctl_cs_queue_group_create *create) { + if (create->in.reserved != 0) { + dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n"); + return -EINVAL; + } return kbase_csf_queue_group_create(kctx, create); } @@ -1765,9 +1950,8 @@ static int kbasep_ioctl_set_limited_core_count(struct kbase_context *kctx, return 0; } -static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsigned long arg) { - struct kbase_file *const kfile = filp->private_data; struct kbase_context *kctx = NULL; struct kbase_device *kbdev = kfile->kbdev; void __user *uarg = (void __user *)arg; @@ -2081,6 +2265,11 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) kbasep_cs_queue_group_create_1_6, union kbase_ioctl_cs_queue_group_create_1_6, kctx); break; + case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18: + KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE_1_18, + kbasep_cs_queue_group_create_1_18, + union kbase_ioctl_cs_queue_group_create_1_18, kctx); + break; case KBASE_IOCTL_CS_QUEUE_GROUP_CREATE: KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_QUEUE_GROUP_CREATE, kbasep_cs_queue_group_create, @@ -2179,22 +2368,44 @@ static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } +static long kbase_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + struct kbase_file *const kfile = filp->private_data; + long ioctl_ret; + + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) + return -EPERM; + + ioctl_ret = kbase_kfile_ioctl(kfile, cmd, arg); + kbase_file_dec_fops_count(kfile); + + return ioctl_ret; +} + #if MALI_USE_CSF static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; struct base_csf_notification event_data = { .type = BASE_CSF_NOTIFICATION_EVENT }; const size_t data_size = sizeof(event_data); bool read_event = false, read_error = false; + ssize_t err = 0; - if (unlikely(!kctx)) + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - if (count < data_size) - return -ENOBUFS; + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) { + err = -EPERM; + goto out; + } + + if (count < data_size) { + err = -ENOBUFS; + goto out; + } if (atomic_read(&kctx->event_count)) read_event = true; @@ -2218,28 +2429,39 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (copy_to_user(buf, &event_data, data_size) != 0) { dev_warn(kctx->kbdev->dev, "Failed to copy data\n"); - return -EFAULT; + err = -EFAULT; + goto out; } if (read_event) atomic_set(&kctx->event_count, 0); - return data_size; +out: + kbase_file_dec_fops_count(kfile); + return err ? err : data_size; } #else /* MALI_USE_CSF */ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; struct base_jd_event_v2 uevent; int out_count = 0; + ssize_t err = 0; - if (unlikely(!kctx)) + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - if (count < sizeof(uevent)) - return -ENOBUFS; + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (unlikely(!kctx)) { + err = -EPERM; + goto out; + } + + if (count < sizeof(uevent)) { + err = -ENOBUFS; + goto out; + } memset(&uevent, 0, sizeof(uevent)); @@ -2248,56 +2470,78 @@ static ssize_t kbase_read(struct file *filp, char __user *buf, size_t count, lof if (out_count > 0) goto out; - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; + if (filp->f_flags & O_NONBLOCK) { + err = -EAGAIN; + goto out; + } if (wait_event_interruptible(kctx->event_queue, - kbase_event_pending(kctx)) != 0) - return -ERESTARTSYS; + kbase_event_pending(kctx)) != 0) { + err = -ERESTARTSYS; + goto out; + } } if (uevent.event_code == BASE_JD_EVENT_DRV_TERMINATED) { - if (out_count == 0) - return -EPIPE; + if (out_count == 0) { + err = -EPIPE; + goto out; + } goto out; } - if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) - return -EFAULT; + if (copy_to_user(buf, &uevent, sizeof(uevent)) != 0) { + err = -EFAULT; + goto out; + } buf += sizeof(uevent); out_count++; count -= sizeof(uevent); } while (count >= sizeof(uevent)); - out: - return out_count * sizeof(uevent); +out: + kbase_file_dec_fops_count(kfile); + return err ? err : (out_count * sizeof(uevent)); } #endif /* MALI_USE_CSF */ static __poll_t kbase_poll(struct file *filp, poll_table *wait) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; + __poll_t ret = 0; + + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) { +#if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) + ret = POLLNVAL; +#else + ret = EPOLLNVAL; +#endif + return ret; + } + kctx = kbase_file_get_kctx_if_setup_complete(kfile); if (unlikely(!kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - return POLLERR; + ret = POLLERR; #else - return EPOLLERR; + ret = EPOLLERR; #endif + goto out; } poll_wait(filp, &kctx->event_queue, wait); if (kbase_event_pending(kctx)) { #if (KERNEL_VERSION(4, 19, 0) > LINUX_VERSION_CODE) - return POLLIN | POLLRDNORM; + ret = POLLIN | POLLRDNORM; #else - return EPOLLIN | EPOLLRDNORM; + ret = EPOLLIN | EPOLLRDNORM; #endif } - return 0; +out: + kbase_file_dec_fops_count(kfile); + return ret; } void _kbase_event_wakeup(struct kbase_context *kctx, bool sync) @@ -2347,13 +2591,20 @@ KBASE_EXPORT_TEST_API(kbase_event_pending); static int kbase_mmap(struct file *const filp, struct vm_area_struct *const vma) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; + int ret; - if (unlikely(!kctx)) + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - return kbase_context_mmap(kctx, vma); + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (likely(kctx)) + ret = kbase_context_mmap(kctx, vma); + else + ret = -EPERM; + + kbase_file_dec_fops_count(kfile); + return ret; } static int kbase_check_flags(int flags) @@ -2372,18 +2623,26 @@ static unsigned long kbase_get_unmapped_area(struct file *const filp, const unsigned long pgoff, const unsigned long flags) { struct kbase_file *const kfile = filp->private_data; - struct kbase_context *const kctx = - kbase_file_get_kctx_if_setup_complete(kfile); + struct kbase_context *kctx; + unsigned long address; - if (unlikely(!kctx)) + if (unlikely(!kbase_file_inc_fops_count_if_allowed(kfile))) return -EPERM; - return kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); + kctx = kbase_file_get_kctx_if_setup_complete(kfile); + if (likely(kctx)) + address = kbase_context_get_unmapped_area(kctx, addr, len, pgoff, flags); + else + address = -EPERM; + + kbase_file_dec_fops_count(kfile); + return address; } static const struct file_operations kbase_fops = { .owner = THIS_MODULE, .open = kbase_open, + .flush = kbase_flush, .release = kbase_release, .read = kbase_read, .poll = kbase_poll, @@ -3306,10 +3565,8 @@ static ssize_t gpuinfo_show(struct device *dev, .name = "Mali-G510" }, { .id = GPU_ID2_PRODUCT_TVAX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, .name = "Mali-G310" }, - { .id = GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-TTIX" }, { .id = GPU_ID2_PRODUCT_LTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT, - .name = "Mali-LTIX" }, + .name = "Mali-G620" }, }; const char *product_name = "(Unknown Mali GPU)"; struct kbase_device *kbdev; @@ -3361,6 +3618,21 @@ static ssize_t gpuinfo_show(struct device *dev, dev_dbg(kbdev->dev, "GPU ID_Name: %s, nr_cores(%u)\n", product_name, nr_cores); } + + if ((product_id & product_id_mask) == + ((GPU_ID2_PRODUCT_TTIX >> KBASE_GPU_ID_VERSION_PRODUCT_ID_SHIFT) & product_id_mask)) { + const bool rt_supported = + GPU_FEATURES_RAY_TRACING_GET(gpu_props->props.raw_props.gpu_features); + const u8 nr_cores = gpu_props->num_cores; + + if ((nr_cores >= 10) && rt_supported) + product_name = "Mali-G720-Immortalis"; + else + product_name = (nr_cores >= 6) ? "Mali-G720" : "Mali-G620"; + + dev_dbg(kbdev->dev, "GPU ID_Name: %s (ID: 0x%x), nr_cores(%u)\n", product_name, + nr_cores, product_id & product_id_mask); + } #endif /* MALI_USE_CSF */ return scnprintf(buf, PAGE_SIZE, "%s %d cores r%dp%d 0x%04X\n", product_name, @@ -3435,8 +3707,9 @@ static ssize_t dvfs_period_show(struct device *dev, static DEVICE_ATTR_RW(dvfs_period); -int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) +int kbase_pm_gpu_freq_init(struct kbase_device *kbdev) { + int err; /* Uses default reference frequency defined in below macro */ u64 lowest_freq_khz = DEFAULT_REF_TIMEOUT_FREQ_KHZ; @@ -3471,7 +3744,16 @@ int kbase_pm_lowest_gpu_freq_init(struct kbase_device *kbdev) #endif kbdev->lowest_gpu_freq_khz = lowest_freq_khz; + + err = kbase_device_populate_max_freq(kbdev); + if (unlikely(err < 0)) + return -1; + dev_dbg(kbdev->dev, "Lowest frequency identified is %llu kHz", kbdev->lowest_gpu_freq_khz); + dev_dbg(kbdev->dev, + "Setting default highest frequency to %u kHz (pending devfreq initialization", + kbdev->gpu_props.props.core_props.gpu_freq_khz_max); + return 0; } @@ -3584,21 +3866,32 @@ static ssize_t reset_timeout_store(struct device *dev, { struct kbase_device *kbdev; int ret; - int reset_timeout; + u32 reset_timeout; + u32 default_reset_timeout; kbdev = to_kbase_device(dev); if (!kbdev) return -ENODEV; - ret = kstrtoint(buf, 0, &reset_timeout); - if (ret || reset_timeout <= 0) { + ret = kstrtou32(buf, 0, &reset_timeout); + if (ret || reset_timeout == 0) { dev_err(kbdev->dev, "Couldn't process reset_timeout write operation.\n" "Use format <reset_timeout_ms>\n"); return -EINVAL; } +#if MALI_USE_CSF + default_reset_timeout = kbase_get_timeout_ms(kbdev, CSF_GPU_RESET_TIMEOUT); +#else /* MALI_USE_CSF */ + default_reset_timeout = JM_DEFAULT_RESET_TIMEOUT_MS; +#endif /* !MALI_USE_CSF */ + + if (reset_timeout < default_reset_timeout) + dev_warn(kbdev->dev, "requested reset_timeout(%u) is smaller than default(%u)", + reset_timeout, default_reset_timeout); + kbdev->reset_timeout_ms = reset_timeout; - dev_dbg(kbdev->dev, "Reset timeout: %dms\n", reset_timeout); + dev_dbg(kbdev->dev, "Reset timeout: %ums\n", reset_timeout); return count; } @@ -4482,8 +4775,10 @@ static bool kbase_is_pm_enabled(const struct device_node *gpu_node) const void *operating_point_node; bool is_pm_enable = false; - power_model_node = of_get_child_by_name(gpu_node, - "power_model"); + power_model_node = of_get_child_by_name(gpu_node, "power-model"); + if (!power_model_node) + power_model_node = of_get_child_by_name(gpu_node, "power_model"); + if (power_model_node) is_pm_enable = true; @@ -4504,8 +4799,9 @@ static bool kbase_is_pv_enabled(const struct device_node *gpu_node) { const void *arbiter_if_node; - arbiter_if_node = of_get_property(gpu_node, - "arbiter_if", NULL); + arbiter_if_node = of_get_property(gpu_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(gpu_node, "arbiter_if", NULL); return arbiter_if_node ? true : false; } @@ -5409,7 +5705,10 @@ static ssize_t idle_hysteresis_time_store(struct device *dev, return -EINVAL; } - kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); + /* In sysFs, The unit of the input value of idle_hysteresis_time is us. + * But the unit of the input parameter of this function is ns, so multiply by 1000 + */ + kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur * NSEC_PER_USEC); return count; } @@ -5436,7 +5735,8 @@ static ssize_t idle_hysteresis_time_show(struct device *dev, if (!kbdev) return -ENODEV; - dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev); + /* The unit of return value of idle_hysteresis_time_show is us, So divide by 1000.*/ + dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev) / NSEC_PER_USEC; ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur); return ret; @@ -5445,6 +5745,74 @@ static ssize_t idle_hysteresis_time_show(struct device *dev, static DEVICE_ATTR_RW(idle_hysteresis_time); /** + * idle_hysteresis_time_ns_store - Store callback for CSF + * idle_hysteresis_time_ns sysfs file. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes written to the sysfs file + * + * This function is called when the idle_hysteresis_time_ns sysfs + * file is written to. + * + * This file contains values of the idle hysteresis duration in ns. + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t idle_hysteresis_time_ns_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev; + u32 dur = 0; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + if (kstrtou32(buf, 0, &dur)) { + dev_err(kbdev->dev, "Couldn't process idle_hysteresis_time_ns write operation.\n" + "Use format <idle_hysteresis_time_ns>\n"); + return -EINVAL; + } + + kbase_csf_firmware_set_gpu_idle_hysteresis_time(kbdev, dur); + + return count; +} + +/** + * idle_hysteresis_time_ns_show - Show callback for CSF + * idle_hysteresis_time_ns sysfs entry. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer to receive the GPU information. + * + * This function is called to get the current idle hysteresis duration in ns. + * + * Return: The number of bytes output to @buf. + */ +static ssize_t idle_hysteresis_time_ns_show(struct device *dev, struct device_attribute *attr, + char *const buf) +{ + struct kbase_device *kbdev; + ssize_t ret; + u32 dur; + + kbdev = to_kbase_device(dev); + if (!kbdev) + return -ENODEV; + + dur = kbase_csf_firmware_get_gpu_idle_hysteresis_time(kbdev); + ret = scnprintf(buf, PAGE_SIZE, "%u\n", dur); + + return ret; +} + +static DEVICE_ATTR_RW(idle_hysteresis_time_ns); + +/** * mcu_shader_pwroff_timeout_show - Get the MCU shader Core power-off time value. * * @dev: The device this sysfs file is for. @@ -5466,7 +5834,8 @@ static ssize_t mcu_shader_pwroff_timeout_show(struct device *dev, struct device_ if (!kbdev) return -ENODEV; - pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + /* The unit of return value of the function is us, So divide by 1000.*/ + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev) / NSEC_PER_USEC; return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff); } @@ -5490,19 +5859,97 @@ static ssize_t mcu_shader_pwroff_timeout_store(struct device *dev, struct device struct kbase_device *kbdev = dev_get_drvdata(dev); u32 dur; + const struct kbase_pm_policy *current_policy; + bool always_on; + if (!kbdev) return -ENODEV; if (kstrtouint(buf, 0, &dur)) return -EINVAL; - kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur); + current_policy = kbase_pm_get_policy(kbdev); + always_on = current_policy == &kbase_pm_always_on_policy_ops; + if (dur == 0 && !always_on) + return -EINVAL; + + /* In sysFs, The unit of the input value of mcu_shader_pwroff_timeout is us. + * But the unit of the input parameter of this function is ns, so multiply by 1000 + */ + kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur * NSEC_PER_USEC); return count; } static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout); +/** + * mcu_shader_pwroff_timeout_ns_show - Get the MCU shader Core power-off time value. + * + * @dev: The device this sysfs file is for. + * @attr: The attributes of the sysfs file. + * @buf: The output buffer for the sysfs file contents + * + * Get the internally recorded MCU shader Core power-off (nominal) timeout value. + * The unit of the value is in nanoseconds. + * + * Return: The number of bytes output to @buf if the + * function succeeded. A Negative value on failure. + */ +static ssize_t mcu_shader_pwroff_timeout_ns_show(struct device *dev, struct device_attribute *attr, + char *const buf) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + u32 pwroff; + + if (!kbdev) + return -ENODEV; + + pwroff = kbase_csf_firmware_get_mcu_core_pwroff_time(kbdev); + return scnprintf(buf, PAGE_SIZE, "%u\n", pwroff); +} + +/** + * mcu_shader_pwroff_timeout_ns_store - Set the MCU shader core power-off time value. + * + * @dev: The device with sysfs file is for + * @attr: The attributes of the sysfs file + * @buf: The value written to the sysfs file + * @count: The number of bytes to write to the sysfs file + * + * The duration value (unit: nanoseconds) for configuring MCU Shader Core + * timer, when the shader cores' power transitions are delegated to the + * MCU (normal operational mode) + * + * Return: @count if the function succeeded. An error code on failure. + */ +static ssize_t mcu_shader_pwroff_timeout_ns_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kbase_device *kbdev = dev_get_drvdata(dev); + u32 dur; + + const struct kbase_pm_policy *current_policy; + bool always_on; + + if (!kbdev) + return -ENODEV; + + if (kstrtouint(buf, 0, &dur)) + return -EINVAL; + + current_policy = kbase_pm_get_policy(kbdev); + always_on = current_policy == &kbase_pm_always_on_policy_ops; + if (dur == 0 && !always_on) + return -EINVAL; + + kbase_csf_firmware_set_mcu_core_pwroff_time(kbdev, dur); + + return count; +} + +static DEVICE_ATTR_RW(mcu_shader_pwroff_timeout_ns); + #endif /* MALI_USE_CSF */ static struct attribute *kbase_scheduling_attrs[] = { @@ -5563,7 +6010,9 @@ static struct attribute *kbase_attrs[] = { &dev_attr_csg_scheduling_period.attr, &dev_attr_fw_timeout.attr, &dev_attr_idle_hysteresis_time.attr, + &dev_attr_idle_hysteresis_time_ns.attr, &dev_attr_mcu_shader_pwroff_timeout.attr, + &dev_attr_mcu_shader_pwroff_timeout_ns.attr, #endif /* !MALI_USE_CSF */ &dev_attr_power_policy.attr, &dev_attr_core_mask.attr, @@ -5725,6 +6174,14 @@ static int kbase_platform_device_probe(struct platform_device *pdev) mutex_unlock(&kbase_probe_mutex); #endif } else { +#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) + /* Since upstream is not exporting mmap_min_addr, kbase at the + * moment is unable to track possible kernel changes via sysfs. + * Flag this out in a device info message. + */ + dev_info(kbdev->dev, KBASE_COMPILED_MMAP_MIN_ADDR_MSG); +#endif + dev_info(kbdev->dev, "Probed as %s\n", dev_name(kbdev->mdev.this_device)); kbase_increment_device_id(); @@ -5950,7 +6407,7 @@ static struct platform_driver kbase_platform_driver = { .probe = kbase_platform_device_probe, .remove = kbase_platform_device_remove, .driver = { - .name = kbase_drv_name, + .name = KBASE_DRV_NAME, .pm = &kbase_pm_ops, .of_match_table = of_match_ptr(kbase_dt_ids), .probe_type = PROBE_PREFER_ASYNCHRONOUS, diff --git a/mali_kbase/mali_kbase_ctx_sched.c b/mali_kbase/mali_kbase_ctx_sched.c index dc6feb9..ea4f300 100644 --- a/mali_kbase/mali_kbase_ctx_sched.c +++ b/mali_kbase/mali_kbase_ctx_sched.c @@ -239,10 +239,11 @@ void kbase_ctx_sched_restore_all_as(struct kbase_device *kbdev) WARN_ON(!kbdev->pm.backend.gpu_powered); + kbdev->mmu_unresponsive = false; + for (i = 0; i != kbdev->nr_hw_address_spaces; ++i) { struct kbase_context *kctx; - kbdev->as[i].is_unresponsive = false; #if MALI_USE_CSF if ((i == MCU_AS_NR) && kbdev->csf.firmware_inited) { kbase_mmu_update(kbdev, &kbdev->csf.mcu_mmu, @@ -292,7 +293,7 @@ struct kbase_context *kbase_ctx_sched_as_to_ctx_refcount( found_kctx = kbdev->as_to_kctx[as_nr]; - if (!WARN_ON(found_kctx == NULL)) + if (found_kctx) kbase_ctx_sched_retain_ctx_refcount(found_kctx); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); diff --git a/mali_kbase/mali_kbase_debug_mem_allocs.c b/mali_kbase/mali_kbase_debug_mem_allocs.c index 418bb19..0592187 100644 --- a/mali_kbase/mali_kbase_debug_mem_allocs.c +++ b/mali_kbase/mali_kbase_debug_mem_allocs.c @@ -34,8 +34,7 @@ /** * debug_zone_mem_allocs_show - Show information from specific rbtree - * @zone: Name of GPU virtual memory zone - * @rbtree: Pointer to the root of the rbtree associated with @zone + * @zone: The memory zone to be displayed * @sfile: The debugfs entry * * This function is called to show information about all the GPU allocations of a @@ -43,9 +42,10 @@ * The information like the start virtual address and size (in bytes) is shown for * every GPU allocation mapped in the zone. */ -static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struct seq_file *sfile) +static void debug_zone_mem_allocs_show(struct kbase_reg_zone *zone, struct seq_file *sfile) { struct rb_node *p; + struct rb_root *rbtree = &zone->reg_rbtree; struct kbase_va_region *reg; const char *type_names[5] = { "Native", @@ -57,7 +57,7 @@ static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struc #define MEM_ALLOCS_HEADER \ " VA, VA size, Commit size, Flags, Mem type\n" - seq_printf(sfile, "Zone name: %s\n:", zone); + seq_printf(sfile, "Zone name: %s\n:", kbase_reg_zone_get_name(zone->id)); seq_printf(sfile, MEM_ALLOCS_HEADER); for (p = rb_first(rbtree); p; p = rb_next(p)) { reg = rb_entry(p, struct kbase_va_region, rblink); @@ -82,18 +82,15 @@ static void debug_zone_mem_allocs_show(char *zone, struct rb_root *rbtree, struc static int debug_ctx_mem_allocs_show(struct seq_file *sfile, void *data) { struct kbase_context *const kctx = sfile->private; + enum kbase_memory_zone zone_idx; kbase_gpu_vm_lock(kctx); + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + struct kbase_reg_zone *zone; - debug_zone_mem_allocs_show("SAME_VA:", &kctx->reg_rbtree_same, sfile); - debug_zone_mem_allocs_show("CUSTOM_VA:", &kctx->reg_rbtree_custom, sfile); - debug_zone_mem_allocs_show("EXEC_VA:", &kctx->reg_rbtree_exec, sfile); - -#if MALI_USE_CSF - debug_zone_mem_allocs_show("EXEC_VA_FIXED:", &kctx->reg_rbtree_exec_fixed, sfile); - debug_zone_mem_allocs_show("FIXED_VA:", &kctx->reg_rbtree_fixed, sfile); -#endif /* MALI_USE_CSF */ - + zone = &kctx->reg_zone[zone_idx]; + debug_zone_mem_allocs_show(zone, sfile); + } kbase_gpu_vm_unlock(kctx); return 0; } diff --git a/mali_kbase/mali_kbase_debug_mem_view.c b/mali_kbase/mali_kbase_debug_mem_view.c index ce87a00..7086c6b 100644 --- a/mali_kbase/mali_kbase_debug_mem_view.c +++ b/mali_kbase/mali_kbase_debug_mem_view.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2013-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -189,13 +189,13 @@ static const struct seq_operations ops = { .show = debug_mem_show, }; -static int debug_mem_zone_open(struct rb_root *rbtree, - struct debug_mem_data *mem_data) +static int debug_mem_zone_open(struct kbase_reg_zone *zone, struct debug_mem_data *mem_data) { int ret = 0; struct rb_node *p; struct kbase_va_region *reg; struct debug_mem_mapping *mapping; + struct rb_root *rbtree = &zone->reg_rbtree; for (p = rb_first(rbtree); p; p = rb_next(p)) { reg = rb_entry(p, struct kbase_va_region, rblink); @@ -233,8 +233,9 @@ static int debug_mem_open(struct inode *i, struct file *file) struct kbase_context *const kctx = i->i_private; struct debug_mem_data *mem_data; int ret; + enum kbase_memory_zone idx; - if (get_file_rcu(kctx->filp) == 0) + if (!kbase_file_inc_fops_count_unless_closed(kctx->kfile)) return -ENOENT; /* Check if file was opened in write mode. GPU memory contents @@ -263,37 +264,15 @@ static int debug_mem_open(struct inode *i, struct file *file) mem_data->column_width = kctx->mem_view_column_width; - ret = debug_mem_zone_open(&kctx->reg_rbtree_same, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - - ret = debug_mem_zone_open(&kctx->reg_rbtree_custom, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - - ret = debug_mem_zone_open(&kctx->reg_rbtree_exec, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } + for (idx = 0; idx < CONTEXT_ZONE_MAX; idx++) { + struct kbase_reg_zone *zone = &kctx->reg_zone[idx]; -#if MALI_USE_CSF - ret = debug_mem_zone_open(&kctx->reg_rbtree_exec_fixed, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; - } - - ret = debug_mem_zone_open(&kctx->reg_rbtree_fixed, mem_data); - if (ret != 0) { - kbase_gpu_vm_unlock(kctx); - goto out; + ret = debug_mem_zone_open(zone, mem_data); + if (ret != 0) { + kbase_gpu_vm_unlock(kctx); + goto out; + } } -#endif kbase_gpu_vm_unlock(kctx); @@ -316,7 +295,7 @@ out: } seq_release(i, file); open_fail: - fput(kctx->filp); + kbase_file_dec_fops_count(kctx->kfile); return ret; } @@ -346,7 +325,7 @@ static int debug_mem_release(struct inode *inode, struct file *file) kfree(mem_data); } - fput(kctx->filp); + kbase_file_dec_fops_count(kctx->kfile); return 0; } diff --git a/mali_kbase/mali_kbase_debug_mem_zones.c b/mali_kbase/mali_kbase_debug_mem_zones.c index 1f8db32..115c9c3 100644 --- a/mali_kbase/mali_kbase_debug_mem_zones.c +++ b/mali_kbase/mali_kbase_debug_mem_zones.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -47,30 +47,29 @@ static int debug_mem_zones_show(struct seq_file *sfile, void *data) { struct kbase_context *const kctx = sfile->private; - size_t i; - - const char *zone_names[KBASE_REG_ZONE_MAX] = { - "SAME_VA", - "CUSTOM_VA", - "EXEC_VA" -#if MALI_USE_CSF - , - "MCU_SHARED_VA", - "EXEC_FIXED_VA", - "FIXED_VA" -#endif - }; + struct kbase_reg_zone *reg_zone; + enum kbase_memory_zone zone_idx; kbase_gpu_vm_lock(kctx); - for (i = 0; i < KBASE_REG_ZONE_MAX; i++) { - struct kbase_reg_zone *reg_zone = &kctx->reg_zone[i]; + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + reg_zone = &kctx->reg_zone[zone_idx]; if (reg_zone->base_pfn) { - seq_printf(sfile, "%15s %zu 0x%.16llx 0x%.16llx\n", zone_names[i], i, - reg_zone->base_pfn, reg_zone->va_size_pages); + seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n", + kbase_reg_zone_get_name(zone_idx), zone_idx, reg_zone->base_pfn, + reg_zone->va_size_pages); } } +#if MALI_USE_CSF + reg_zone = &kctx->kbdev->csf.mcu_shared_zone; + + if (reg_zone && reg_zone->base_pfn) { + seq_printf(sfile, "%15s %u 0x%.16llx 0x%.16llx\n", + kbase_reg_zone_get_name(MCU_SHARED_ZONE), MCU_SHARED_ZONE, + reg_zone->base_pfn, reg_zone->va_size_pages); + } +#endif kbase_gpu_vm_unlock(kctx); return 0; diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 12e90ac..efe690d 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -183,6 +183,60 @@ struct kbase_as; struct kbase_mmu_setup; struct kbase_kinstr_jm; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * struct kbase_gpu_metrics - Object containing members that are used to emit + * GPU metrics tracepoints for all applications that + * created Kbase context(s) for a GPU. + * + * @active_list: List of applications that did some GPU activity in the recent work period. + * @inactive_list: List of applications that didn't do any GPU activity in the recent work period. + */ +struct kbase_gpu_metrics { + struct list_head active_list; + struct list_head inactive_list; +}; + +/** + * struct kbase_gpu_metrics_ctx - Object created for every application, that created + * Kbase context(s), containing members that are used + * to emit GPU metrics tracepoints for the application. + * + * @link: Links the object in kbase_device::gpu_metrics::active_list + * or kbase_device::gpu_metrics::inactive_list. + * @first_active_start_time: Records the time at which the application first became + * active in the current work period. + * @last_active_start_time: Records the time at which the application last became + * active in the current work period. + * @last_active_end_time: Records the time at which the application last became + * inactive in the current work period. + * @total_active: Tracks the time for which application has been active + * in the current work period. + * @prev_wp_active_end_time: Records the time at which the application last became + * inactive in the previous work period. + * @aid: Unique identifier for an application. + * @kctx_count: Counter to keep a track of the number of Kbase contexts + * created for an application. There may be multiple Kbase + * contexts contributing GPU activity data to a single GPU + * metrics context. + * @active_cnt: Counter that is updated every time the GPU activity starts + * and ends in the current work period for an application. + * @flags: Flags to track the state of GPU metrics context. + */ +struct kbase_gpu_metrics_ctx { + struct list_head link; + u64 first_active_start_time; + u64 last_active_start_time; + u64 last_active_end_time; + u64 total_active; + u64 prev_wp_active_end_time; + unsigned int aid; + unsigned int kctx_count; + u8 active_cnt; + u8 flags; +}; +#endif + /** * struct kbase_io_access - holds information about 1 register access * @@ -317,7 +371,7 @@ struct kbase_mmu_table { u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; } teardown_pages; /** - * @free_pgds: Scratch memory user for insertion, update and teardown + * @free_pgds: Scratch memory used for insertion, update and teardown * operations to store a temporary list of PGDs to be freed * at the end of the operation. */ @@ -331,18 +385,69 @@ struct kbase_mmu_table { }; /** - * struct kbase_reg_zone - Information about GPU memory region zones + * enum kbase_memory_zone - Kbase memory zone identifier + * @SAME_VA_ZONE: Memory zone for allocations where the GPU and CPU VA coincide. + * @CUSTOM_VA_ZONE: When operating in compatibility mode, this zone is used to + * allow 32-bit userspace (either on a 32-bit device or a + * 32-bit application on a 64-bit device) to address the entirety + * of the GPU address space. The @CUSTOM_VA_ZONE is also used + * for JIT allocations: on 64-bit systems, the zone is created + * by reducing the size of the SAME_VA zone by a user-controlled + * amount, whereas on 32-bit systems, it is created as part of + * the existing CUSTOM_VA_ZONE + * @EXEC_VA_ZONE: Memory zone used to track GPU-executable memory. The start + * and end of this zone depend on the individual platform, + * and it is initialized upon user process request. + * @EXEC_FIXED_VA_ZONE: Memory zone used to contain GPU-executable memory + * that also permits FIXED/FIXABLE allocations. + * @FIXED_VA_ZONE: Memory zone used to allocate memory at userspace-supplied + * addresses. + * @MCU_SHARED_ZONE: Memory zone created for mappings shared between the MCU + * and Kbase. Currently this is the only zone type that is + * created on a per-device, rather than a per-context + * basis. + * @MEMORY_ZONE_MAX: Sentinel value used for iterating over all the memory zone + * identifiers. + * @CONTEXT_ZONE_MAX: Sentinel value used to keep track of the last per-context + * zone for iteration. + */ +enum kbase_memory_zone { + SAME_VA_ZONE, + CUSTOM_VA_ZONE, + EXEC_VA_ZONE, +#if IS_ENABLED(MALI_USE_CSF) + EXEC_FIXED_VA_ZONE, + FIXED_VA_ZONE, + MCU_SHARED_ZONE, +#endif + MEMORY_ZONE_MAX, +#if IS_ENABLED(MALI_USE_CSF) + CONTEXT_ZONE_MAX = FIXED_VA_ZONE + 1 +#else + CONTEXT_ZONE_MAX = EXEC_VA_ZONE + 1 +#endif +}; + +/** + * struct kbase_reg_zone - GPU memory zone information and region tracking + * @reg_rbtree: RB tree used to track kbase memory regions. * @base_pfn: Page Frame Number in GPU virtual address space for the start of * the Zone * @va_size_pages: Size of the Zone in pages + * @id: Memory zone identifier + * @cache: Pointer to a per-device slab allocator to allow for quickly allocating + * new regions * * Track information about a zone KBASE_REG_ZONE() and related macros. * In future, this could also store the &rb_root that are currently in * &kbase_context and &kbase_csf_device. */ struct kbase_reg_zone { + struct rb_root reg_rbtree; u64 base_pfn; u64 va_size_pages; + enum kbase_memory_zone id; + struct kmem_cache *cache; }; #if MALI_USE_CSF @@ -439,7 +544,15 @@ struct kbase_clk_rate_trace_manager { * Note that some code paths keep shaders/the tiler * powered whilst this is 0. * Use kbase_pm_is_active() instead to check for such cases. - * @suspending: Flag indicating suspending/suspended + * @suspending: Flag set to true when System suspend of GPU device begins and + * set to false only when System resume of GPU device starts. + * So GPU device could be in suspended state while the flag is set. + * The flag is updated with @lock held. + * @resuming: Flag set to true when System resume of GPU device starts and is set + * to false when resume ends. The flag is set to true at the same time + * when @suspending is set to false with @lock held. + * The flag is currently used only to prevent Kbase context termination + * during System resume of GPU device. * @runtime_active: Flag to track if the GPU is in runtime suspended or active * state. This ensures that runtime_put and runtime_get * functions are called in pairs. For example if runtime_get @@ -450,7 +563,7 @@ struct kbase_clk_rate_trace_manager { * This structure contains data for the power management framework. * There is one instance of this structure per device in the system. * @zero_active_count_wait: Wait queue set when active_count == 0 - * @resume_wait: system resume of GPU device. + * @resume_wait: Wait queue to wait for the System suspend/resume of GPU device. * @debug_core_mask: Bit masks identifying the available shader cores that are * specified via sysfs. One mask per job slot. * @debug_core_mask_all: Bit masks identifying the available shader cores that @@ -471,6 +584,7 @@ struct kbase_pm_device_data { struct rt_mutex lock; int active_count; bool suspending; + bool resuming; #if MALI_USE_CSF bool runtime_active; #endif @@ -823,10 +937,14 @@ struct kbase_mem_migrate { * to the GPU device. This points to an internal memory * group manager if no platform-specific memory group * manager was retrieved through device tree. + * @mmu_unresponsive: Flag to indicate MMU is not responding. + * Set if a MMU command isn't completed within + * &kbase_device:mmu_or_gpu_cache_op_wait_time_ms. + * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. * @as: Array of objects representing address spaces of GPU. - * @as_free: Bitpattern of free/available GPU address spaces. * @as_to_kctx: Array of pointers to struct kbase_context, having * GPU adrress spaces assigned to them. + * @as_free: Bitpattern of free/available GPU address spaces. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are @@ -1082,9 +1200,11 @@ struct kbase_mem_migrate { * KCPU queue. These structures may outlive kbase module * itself. Therefore, in such a case, a warning should be * be produced. - * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of - * a MMU operation + * @mmu_or_gpu_cache_op_wait_time_ms: Maximum waiting time in ms for the completion of + * a cache operation via MMU_AS_CONTROL or GPU_CONTROL. * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. + * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking + * is enabled. */ struct kbase_device { u32 hw_quirks_sc; @@ -1135,9 +1255,10 @@ struct kbase_device { struct memory_group_manager_device *mgm_dev; + bool mmu_unresponsive; struct kbase_as as[BASE_MAX_NR_AS]; - u16 as_free; struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; + u16 as_free; spinlock_t mmu_mask_change; @@ -1196,9 +1317,7 @@ struct kbase_device { u64 lowest_gpu_freq_khz; -#if MALI_USE_CSF struct kbase_backend_time backend_time; -#endif bool cache_clean_in_progress; u32 cache_clean_queued; @@ -1396,8 +1515,18 @@ struct kbase_device { #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) atomic_t live_fence_metadata; #endif - u32 mmu_as_inactive_wait_time_ms; + u32 mmu_or_gpu_cache_op_wait_time_ms; struct kmem_cache *va_region_slab; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics: GPU device wide structure used for emitting GPU metrics tracepoints. + */ + struct kbase_gpu_metrics gpu_metrics; +#endif +#if MALI_USE_CSF + atomic_t fence_signal_timeout_enabled; +#endif }; /** @@ -1414,6 +1543,9 @@ struct kbase_device { * @KBASE_FILE_COMPLETE: Indicates if the setup for context has * completed, i.e. flags have been set for the * context. + * @KBASE_FILE_DESTROY_CTX: Indicates that destroying of context has begun or + * is complete. This state can only be reached after + * @KBASE_FILE_COMPLETE. * * The driver allows only limited interaction with user-space until setup * is complete. @@ -1423,7 +1555,8 @@ enum kbase_file_state { KBASE_FILE_VSN_IN_PROGRESS, KBASE_FILE_NEED_CTX, KBASE_FILE_CTX_IN_PROGRESS, - KBASE_FILE_COMPLETE + KBASE_FILE_COMPLETE, + KBASE_FILE_DESTROY_CTX }; /** @@ -1433,6 +1566,12 @@ enum kbase_file_state { * allocated from the probe method of the Mali driver. * @filp: Pointer to the struct file corresponding to device file * /dev/malixx instance, passed to the file's open method. + * @owner: Pointer to the file table structure of a process that + * created the instance of /dev/malixx device file. Set to + * NULL when that process closes the file instance. No more + * file operations would be allowed once set to NULL. + * It would be updated only in the Userspace context, i.e. + * when @kbase_open or @kbase_flush is called. * @kctx: Object representing an entity, among which GPU is * scheduled and which gets its own GPU address space. * Invalid until @setup_state is KBASE_FILE_COMPLETE. @@ -1441,13 +1580,40 @@ enum kbase_file_state { * @setup_state is KBASE_FILE_NEED_CTX. * @setup_state: Initialization state of the file. Values come from * the kbase_file_state enumeration. + * @destroy_kctx_work: Work item for destroying the @kctx, enqueued only when + * @fops_count and @map_count becomes zero after /dev/malixx + * file was previously closed by the @owner. + * @lock: Lock to serialize the access to members like @owner, @fops_count, + * @map_count. + * @fops_count: Counter that is incremented at the beginning of a method + * defined for @kbase_fops and is decremented at the end. + * So the counter keeps a track of the file operations in progress + * for /dev/malixx file, that are being handled by the Kbase. + * The counter is needed to defer the context termination as + * Userspace can close the /dev/malixx file and flush() method + * can get called when some other file operation is in progress. + * @map_count: Counter to keep a track of the memory mappings present on + * /dev/malixx file instance. The counter is needed to defer the + * context termination as Userspace can close the /dev/malixx + * file and flush() method can get called when mappings are still + * present. + * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0. + * Currently needed only for the "mem_view" debugfs file. */ struct kbase_file { struct kbase_device *kbdev; struct file *filp; + fl_owner_t owner; struct kbase_context *kctx; unsigned long api_version; atomic_t setup_state; + struct work_struct destroy_kctx_work; + spinlock_t lock; + int fops_count; + int map_count; +#if IS_ENABLED(CONFIG_DEBUG_FS) + wait_queue_head_t zero_fops_count_wait; +#endif }; #if MALI_JIT_PRESSURE_LIMIT_BASE /** @@ -1617,8 +1783,8 @@ struct kbase_sub_alloc { /** * struct kbase_context - Kernel base context * - * @filp: Pointer to the struct file corresponding to device file - * /dev/malixx instance, passed to the file's open method. + * @kfile: Pointer to the object representing the /dev/malixx device + * file instance. * @kbdev: Pointer to the Kbase device for which the context is created. * @kctx_list_link: Node into Kbase device list of contexts. * @mmu: Structure holding details of the MMU tables for this @@ -1653,22 +1819,6 @@ struct kbase_sub_alloc { * for the allocations >= 2 MB in size. * @reg_lock: Lock used for GPU virtual address space management operations, * like adding/freeing a memory region in the address space. - * Can be converted to a rwlock ?. - * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA - * zone of the GPU virtual address space. Used for allocations - * having the same value for GPU & CPU virtual address. - * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA - * zone of the GPU virtual address space. - * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA - * zone of the GPU virtual address space. Used for GPU-executable - * allocations which don't need the SAME_VA property. - * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the - * EXEC_FIXED_VA zone of the GPU virtual address space. Used for - * GPU-executable allocations with FIXED/FIXABLE GPU virtual - * addresses. - * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone - * of the GPU virtual address space. Used for allocations with - * FIXED/FIXABLE GPU virtual addresses. * @num_fixable_allocs: A count for the number of memory allocations with the * BASE_MEM_FIXABLE property. * @num_fixed_allocs: A count for the number of memory allocations with the @@ -1881,6 +2031,7 @@ struct kbase_sub_alloc { * that created the Kbase context. It would be set only for the * contexts created by the Userspace and not for the contexts * created internally by the Kbase. + * @comm: Record the process name * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1889,7 +2040,7 @@ struct kbase_sub_alloc { * is made on the device file. */ struct kbase_context { - struct file *filp; + struct kbase_file *kfile; struct kbase_device *kbdev; struct list_head kctx_list_link; struct kbase_mmu_table mmu; @@ -1914,17 +2065,11 @@ struct kbase_context { struct list_head mem_partials; struct mutex reg_lock; - - struct rb_root reg_rbtree_same; - struct rb_root reg_rbtree_custom; - struct rb_root reg_rbtree_exec; #if MALI_USE_CSF - struct rb_root reg_rbtree_exec_fixed; - struct rb_root reg_rbtree_fixed; atomic64_t num_fixable_allocs; atomic64_t num_fixed_allocs; #endif - struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; + struct kbase_reg_zone reg_zone[CONTEXT_ZONE_MAX]; #if MALI_USE_CSF struct kbase_csf_context csf; @@ -2031,6 +2176,16 @@ struct kbase_context { void *platform_data; struct task_struct *task; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics_ctx: Pointer to the GPU metrics context corresponding to the + * application that created the Kbase context. + */ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; +#endif + + char comm[TASK_COMM_LEN]; }; #ifdef CONFIG_MALI_CINSTR_GWT diff --git a/mali_kbase/mali_kbase_dummy_job_wa.c b/mali_kbase/mali_kbase_dummy_job_wa.c index 35934b9..c3c6046 100644 --- a/mali_kbase/mali_kbase_dummy_job_wa.c +++ b/mali_kbase/mali_kbase_dummy_job_wa.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -183,9 +183,9 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) { /* wait for power-ups */ - wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true); + wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), true); if (cores >> 32) - wait(kbdev, SHADER_READY_HI, (cores >> 32), true); + wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), true); } if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) { @@ -218,11 +218,11 @@ int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores) kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32)); /* wait for power off complete */ - wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false); - wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false); + wait(kbdev, GPU_CONTROL_REG(SHADER_READY_LO), (cores & U32_MAX), false); + wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_LO), (cores & U32_MAX), false); if (cores >> 32) { - wait(kbdev, SHADER_READY_HI, (cores >> 32), false); - wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false); + wait(kbdev, GPU_CONTROL_REG(SHADER_READY_HI), (cores >> 32), false); + wait(kbdev, GPU_CONTROL_REG(SHADER_PWRTRANS_HI), (cores >> 32), false); } kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX); } diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h index f4507ac..ea2ac34 100644 --- a/mali_kbase/mali_kbase_fence.h +++ b/mali_kbase/mali_kbase_fence.h @@ -33,6 +33,7 @@ #include "mali_kbase_fence_defs.h" #include "mali_kbase.h" #include "mali_kbase_refcount_defs.h" +#include <linux/version_compat_defs.h> #if MALI_USE_CSF /* Maximum number of characters in DMA fence timeline name. */ @@ -160,16 +161,8 @@ static inline bool kbase_fence_out_is_ours(struct kbase_jd_atom *katom) static inline int kbase_fence_out_signal(struct kbase_jd_atom *katom, int status) { - if (status) { -#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE && \ - KERNEL_VERSION(4, 9, 68) <= LINUX_VERSION_CODE) - fence_set_error(katom->dma_fence.fence, status); -#elif (KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE) - dma_fence_set_error(katom->dma_fence.fence, status); -#else - katom->dma_fence.fence->status = status; -#endif - } + if (status) + dma_fence_set_error_helper(katom->dma_fence.fence, status); return dma_fence_signal(katom->dma_fence.fence); } diff --git a/mali_kbase/mali_kbase_fence_ops.c b/mali_kbase/mali_kbase_fence_ops.c index 25b4c9c..f14a55e 100644 --- a/mali_kbase/mali_kbase_fence_ops.c +++ b/mali_kbase/mali_kbase_fence_ops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -31,7 +31,7 @@ kbase_fence_get_driver_name(struct fence *fence) kbase_fence_get_driver_name(struct dma_fence *fence) #endif { - return kbase_drv_name; + return KBASE_DRV_NAME; } static const char * @@ -46,7 +46,7 @@ kbase_fence_get_timeline_name(struct dma_fence *fence) return kcpu_fence->metadata->timeline_name; #else - return kbase_timeline_name; + return KBASE_TIMELINE_NAME; #endif /* MALI_USE_CSF */ } diff --git a/mali_kbase/mali_kbase_gpu_metrics.c b/mali_kbase/mali_kbase_gpu_metrics.c new file mode 100644 index 0000000..af3a08d --- /dev/null +++ b/mali_kbase/mali_kbase_gpu_metrics.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include "mali_power_gpu_work_period_trace.h" +#include <mali_kbase_gpu_metrics.h> + +/** + * enum gpu_metrics_ctx_flags - Flags for the GPU metrics context + * + * @ACTIVE_INTERVAL_IN_WP: Flag set when the application first becomes active in + * the current work period. + * + * @INSIDE_ACTIVE_LIST: Flag to track if object is in kbase_device::gpu_metrics::active_list + * + * All members need to be separate bits. This enum is intended for use in a + * bitmask where multiple values get OR-ed together. + */ +enum gpu_metrics_ctx_flags { + ACTIVE_INTERVAL_IN_WP = 1 << 0, + INSIDE_ACTIVE_LIST = 1 << 1, +}; + +static inline bool gpu_metrics_ctx_flag(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + enum gpu_metrics_ctx_flags flag) +{ + return (gpu_metrics_ctx->flags & flag); +} + +static inline void gpu_metrics_ctx_flag_set(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + enum gpu_metrics_ctx_flags flag) +{ + gpu_metrics_ctx->flags |= flag; +} + +static inline void gpu_metrics_ctx_flag_clear(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + enum gpu_metrics_ctx_flags flag) +{ + gpu_metrics_ctx->flags &= ~flag; +} + +static inline void validate_tracepoint_data(struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, + u64 start_time, u64 end_time, u64 total_active) +{ +#ifdef CONFIG_MALI_DEBUG + WARN(total_active > NSEC_PER_SEC, + "total_active %llu > 1 second for aid %u active_cnt %u", + total_active, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); + + WARN(start_time >= end_time, + "start_time %llu >= end_time %llu for aid %u active_cnt %u", + start_time, end_time, gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); + + WARN(total_active > (end_time - start_time), + "total_active %llu > end_time %llu - start_time %llu for aid %u active_cnt %u", + total_active, end_time, start_time, + gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); + + WARN(gpu_metrics_ctx->prev_wp_active_end_time > start_time, + "prev_wp_active_end_time %llu > start_time %llu for aid %u active_cnt %u", + gpu_metrics_ctx->prev_wp_active_end_time, start_time, + gpu_metrics_ctx->aid, gpu_metrics_ctx->active_cnt); +#endif +} + +static void emit_tracepoint_for_active_gpu_metrics_ctx(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u64 current_time) +{ + const u64 start_time = gpu_metrics_ctx->first_active_start_time; + u64 total_active = gpu_metrics_ctx->total_active; + u64 end_time; + + /* Check if the GPU activity is currently ongoing */ + if (gpu_metrics_ctx->active_cnt) { + end_time = current_time; + total_active += + end_time - gpu_metrics_ctx->last_active_start_time; + + gpu_metrics_ctx->first_active_start_time = current_time; + gpu_metrics_ctx->last_active_start_time = current_time; + } else { + end_time = gpu_metrics_ctx->last_active_end_time; + gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); + } + + trace_gpu_work_period(kbdev->id, gpu_metrics_ctx->aid, + start_time, end_time, total_active); + + validate_tracepoint_data(gpu_metrics_ctx, start_time, end_time, total_active); + gpu_metrics_ctx->prev_wp_active_end_time = end_time; + gpu_metrics_ctx->total_active = 0; +} + +void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx) +{ + WARN_ON(list_empty(&gpu_metrics_ctx->link)); + WARN_ON(!gpu_metrics_ctx->kctx_count); + + gpu_metrics_ctx->kctx_count--; + if (gpu_metrics_ctx->kctx_count) + return; + + if (gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) + emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, + gpu_metrics_ctx, ktime_get_raw_ns()); + + list_del_init(&gpu_metrics_ctx->link); + kfree(gpu_metrics_ctx); +} + +struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid) +{ + struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics; + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; + + list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->active_list, link) { + if (gpu_metrics_ctx->aid == aid) { + WARN_ON(!gpu_metrics_ctx->kctx_count); + gpu_metrics_ctx->kctx_count++; + return gpu_metrics_ctx; + } + } + + list_for_each_entry(gpu_metrics_ctx, &gpu_metrics->inactive_list, link) { + if (gpu_metrics_ctx->aid == aid) { + WARN_ON(!gpu_metrics_ctx->kctx_count); + gpu_metrics_ctx->kctx_count++; + return gpu_metrics_ctx; + } + } + + return NULL; +} + +void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, unsigned int aid) +{ + gpu_metrics_ctx->aid = aid; + gpu_metrics_ctx->total_active = 0; + gpu_metrics_ctx->kctx_count = 1; + gpu_metrics_ctx->active_cnt = 0; + gpu_metrics_ctx->prev_wp_active_end_time = 0; + gpu_metrics_ctx->flags = 0; + list_add_tail(&gpu_metrics_ctx->link, &kbdev->gpu_metrics.inactive_list); +} + +void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx; + + gpu_metrics_ctx->active_cnt++; + if (gpu_metrics_ctx->active_cnt == 1) + gpu_metrics_ctx->last_active_start_time = timestamp_ns; + + if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { + gpu_metrics_ctx->first_active_start_time = timestamp_ns; + gpu_metrics_ctx_flag_set(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP); + } + + if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)) { + list_move_tail(&gpu_metrics_ctx->link, &kctx->kbdev->gpu_metrics.active_list); + gpu_metrics_ctx_flag_set(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); + } +} + +void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx = kctx->gpu_metrics_ctx; + + if (WARN_ON_ONCE(!gpu_metrics_ctx->active_cnt)) + return; + + if (--gpu_metrics_ctx->active_cnt) + return; + + if (likely(timestamp_ns > gpu_metrics_ctx->last_active_start_time)) { + gpu_metrics_ctx->last_active_end_time = timestamp_ns; + gpu_metrics_ctx->total_active += + timestamp_ns - gpu_metrics_ctx->last_active_start_time; + return; + } + + /* Due to conversion from system timestamp to CPU timestamp (which involves rounding) + * the value for start and end timestamp could come as same. + */ + if (timestamp_ns == gpu_metrics_ctx->last_active_start_time) { + gpu_metrics_ctx->last_active_end_time = timestamp_ns + 1; + gpu_metrics_ctx->total_active += 1; + return; + } + + /* The following check is to detect the situation where 'ACT=0' event was not visible to + * the Kbase even though the system timestamp value sampled by FW was less than the system + * timestamp value sampled by Kbase just before the draining of trace buffer. + */ + if (gpu_metrics_ctx->last_active_start_time == gpu_metrics_ctx->first_active_start_time && + gpu_metrics_ctx->prev_wp_active_end_time == gpu_metrics_ctx->first_active_start_time) { + WARN_ON_ONCE(gpu_metrics_ctx->total_active); + gpu_metrics_ctx->last_active_end_time = + gpu_metrics_ctx->prev_wp_active_end_time + 1; + gpu_metrics_ctx->total_active = 1; + return; + } + + WARN_ON_ONCE(1); +} + +void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts) +{ + struct kbase_gpu_metrics *gpu_metrics = &kbdev->gpu_metrics; + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, *tmp; + + list_for_each_entry_safe(gpu_metrics_ctx, tmp, &gpu_metrics->active_list, link) { + if (!gpu_metrics_ctx_flag(gpu_metrics_ctx, ACTIVE_INTERVAL_IN_WP)) { + WARN_ON(!gpu_metrics_ctx_flag(gpu_metrics_ctx, INSIDE_ACTIVE_LIST)); + WARN_ON(gpu_metrics_ctx->active_cnt); + list_move_tail(&gpu_metrics_ctx->link, &gpu_metrics->inactive_list); + gpu_metrics_ctx_flag_clear(gpu_metrics_ctx, INSIDE_ACTIVE_LIST); + continue; + } + + emit_tracepoint_for_active_gpu_metrics_ctx(kbdev, gpu_metrics_ctx, ts); + } +} + +int kbase_gpu_metrics_init(struct kbase_device *kbdev) +{ + INIT_LIST_HEAD(&kbdev->gpu_metrics.active_list); + INIT_LIST_HEAD(&kbdev->gpu_metrics.inactive_list); + + dev_info(kbdev->dev, "GPU metrics tracepoint support enabled"); + return 0; +} + +void kbase_gpu_metrics_term(struct kbase_device *kbdev) +{ + WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.active_list)); + WARN_ON_ONCE(!list_empty(&kbdev->gpu_metrics.inactive_list)); +} + +#endif diff --git a/mali_kbase/mali_kbase_gpu_metrics.h b/mali_kbase/mali_kbase_gpu_metrics.h new file mode 100644 index 0000000..adc8816 --- /dev/null +++ b/mali_kbase/mali_kbase_gpu_metrics.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +/** + * DOC: GPU metrics frontend APIs + */ + +#ifndef _KBASE_GPU_METRICS_H_ +#define _KBASE_GPU_METRICS_H_ + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase.h> + +/** + * kbase_gpu_metrics_get_emit_interval() - Return the trace point emission interval. + * + * Return: The time interval in nanosecond for GPU metrics trace point emission. + */ +unsigned long kbase_gpu_metrics_get_emit_interval(void); + +/** + * kbase_gpu_metrics_ctx_put() - Decrement the Kbase context count for the GPU metrics + * context and free it if the count becomes 0. + * + * @kbdev: Pointer to the GPU device. + * @gpu_metrics_ctx: Pointer to the GPU metrics context. + * + * This function must be called when a Kbase context is destroyed. + * The function would decrement the Kbase context count for the GPU metrics context and + * free the memory if the count becomes 0. + * The function would emit a power/gpu_work_period tracepoint for the GPU metrics context + * if there was some GPU activity done for it since the last tracepoint was emitted. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_put(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx); + +/** + * kbase_gpu_metrics_ctx_get() - Increment the Kbase context count for the GPU metrics + * context if it exists. + * + * @kbdev: Pointer to the GPU device. + * @aid: Unique identifier of the Application that is creating the Kbase context. + * + * This function must be called when a Kbase context is created. + * The function would increment the Kbase context count for the GPU metrics context, + * corresponding to the @aid, if it exists. + * + * Return: Pointer to the GPU metrics context corresponding to the @aid if it already + * exists otherwise NULL. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + * The caller shall allocate memory for GPU metrics context structure if the + * function returns NULL. + */ +struct kbase_gpu_metrics_ctx *kbase_gpu_metrics_ctx_get(struct kbase_device *kbdev, u32 aid); + +/** + * kbase_gpu_metrics_ctx_init() - Initialise the GPU metrics context + * + * @kbdev: Pointer to the GPU device. + * @gpu_metrics_ctx: Pointer to the GPU metrics context. + * @aid: Unique identifier of the Application for which GPU metrics + * context needs to be initialized. + * + * This function must be called when a Kbase context is created, after the call to + * kbase_gpu_metrics_ctx_get() returned NULL and memory for the GPU metrics context + * structure was allocated. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_init(struct kbase_device *kbdev, + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx, u32 aid); + +/** + * kbase_gpu_metrics_ctx_start_activity() - Report the start of some GPU activity + * for GPU metrics context. + * + * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. + * @timestamp_ns: CPU timestamp at which the GPU activity started. + * + * The provided timestamp would be later used as the "start_time_ns" for the + * power/gpu_work_period tracepoint if this is the first GPU activity for the GPU + * metrics context in the current work period. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_start_activity(struct kbase_context *kctx, u64 timestamp_ns); + +/** + * kbase_gpu_metrics_ctx_end_activity() - Report the end of some GPU activity + * for GPU metrics context. + * + * @kctx: Pointer to the Kbase context contributing data to the GPU metrics context. + * @timestamp_ns: CPU timestamp at which the GPU activity ended. + * + * The provided timestamp would be later used as the "end_time_ns" for the + * power/gpu_work_period tracepoint if this is the last GPU activity for the GPU + * metrics context in the current work period. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_ctx_end_activity(struct kbase_context *kctx, u64 timestamp_ns); + +/** + * kbase_gpu_metrics_emit_tracepoint() - Emit power/gpu_work_period tracepoint + * for active GPU metrics contexts. + * + * @kbdev: Pointer to the GPU device. + * @ts: Timestamp at which the tracepoint is being emitted. + * + * This function would loop through all the active GPU metrics contexts and emit a + * power/gpu_work_period tracepoint for them. + * The GPU metrics context that is found to be inactive since the last tracepoint + * was emitted would be moved to the inactive list. + * The current work period would be considered as over and a new work period would + * begin whenever any application does the GPU activity. + * + * Note: The caller must appropriately serialize the call to this function with the + * call to other GPU metrics functions declared in this file. + */ +void kbase_gpu_metrics_emit_tracepoint(struct kbase_device *kbdev, u64 ts); + +/** + * kbase_gpu_metrics_init() - Initialise a gpu_metrics instance for a GPU + * + * @kbdev: Pointer to the GPU device. + * + * This function is called once for each @kbdev. + * + * Return: 0 on success, or negative on failure. + */ +int kbase_gpu_metrics_init(struct kbase_device *kbdev); + +/** + * kbase_gpu_metrics_term() - Terminate a gpu_metrics instance + * + * @kbdev: Pointer to the GPU device. + */ +void kbase_gpu_metrics_term(struct kbase_device *kbdev); + +#endif +#endif /* _KBASE_GPU_METRICS_H_ */ diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c index afbba3d..02d6bb2 100644 --- a/mali_kbase/mali_kbase_gpuprops.c +++ b/mali_kbase/mali_kbase_gpuprops.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -49,7 +49,7 @@ static void kbase_gpuprops_construct_coherent_groups( props->coherency_info.coherency = props->raw_props.mem_features; props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present); - if (props->coherency_info.coherency & GROUPS_L2_COHERENT) { + if (props->coherency_info.coherency & MEM_FEATURES_COHERENT_CORE_GROUP_MASK) { /* Group is l2 coherent */ group_present = props->raw_props.l2_present; } else { diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c index 0eba889..32c9241 100644 --- a/mali_kbase/mali_kbase_gwt.c +++ b/mali_kbase/mali_kbase_gwt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -53,9 +53,9 @@ static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, unsigned long flag) { kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&(kctx->reg_rbtree_same))); + rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree)); kbase_gpu_gwt_setup_page_permission(kctx, flag, - rb_first(&(kctx->reg_rbtree_custom))); + rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree)); } diff --git a/mali_kbase/mali_kbase_hwaccess_time.h b/mali_kbase/mali_kbase_hwaccess_time.h index ac2a26d..f16348f 100644 --- a/mali_kbase/mali_kbase_hwaccess_time.h +++ b/mali_kbase/mali_kbase_hwaccess_time.h @@ -22,13 +22,16 @@ #ifndef _KBASE_BACKEND_TIME_H_ #define _KBASE_BACKEND_TIME_H_ -#if MALI_USE_CSF /** * struct kbase_backend_time - System timestamp attributes. * * @multiplier: Numerator of the converter's fraction. * @divisor: Denominator of the converter's fraction. * @offset: Converter's offset term. + * @device_scaled_timeouts: Timeouts in milliseconds that were scaled to be + * consistent with the minimum MCU frequency. This + * array caches the results of all of the conversions + * for ease of use later on. * * According to Generic timer spec, system timer: * - Increments at a fixed frequency @@ -49,11 +52,15 @@ * */ struct kbase_backend_time { +#if MALI_USE_CSF u64 multiplier; u64 divisor; s64 offset; +#endif + unsigned int device_scaled_timeouts[KBASE_TIMEOUT_SELECTOR_COUNT]; }; +#if MALI_USE_CSF /** * kbase_backend_time_convert_gpu_to_cpu() - Convert GPU timestamp to CPU timestamp. * @@ -89,6 +96,40 @@ void kbase_backend_get_gpu_time_norequest(struct kbase_device *kbdev, u64 *cycle_counter, u64 *system_time, struct timespec64 *ts); + +/** + * kbase_device_set_timeout_ms - Set an unscaled device timeout in milliseconds, + * subject to the maximum timeout constraint. + * + * @kbdev: KBase device pointer. + * @selector: The specific timeout that should be scaled. + * @timeout_ms: The timeout in cycles which should be scaled. + * + * This function writes the absolute timeout in milliseconds to the table of + * precomputed device timeouts, while estabilishing an upped bound on the individual + * timeout of UINT_MAX milliseconds. + */ +void kbase_device_set_timeout_ms(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + unsigned int timeout_ms); + +/** + * kbase_device_set_timeout - Calculate the given timeout using the provided + * timeout cycles and multiplier. + * + * @kbdev: KBase device pointer. + * @selector: The specific timeout that should be scaled. + * @timeout_cycles: The timeout in cycles which should be scaled. + * @cycle_multiplier: A multiplier applied to the number of cycles, allowing + * the callsite to scale the minimum timeout based on the + * host device. + * + * This function writes the scaled timeout to the per-device table to avoid + * having to recompute the timeouts every single time that the related methods + * are called. + */ +void kbase_device_set_timeout(struct kbase_device *kbdev, enum kbase_timeout_selector selector, + u64 timeout_cycles, u32 cycle_multiplier); + /** * kbase_get_timeout_ms - Choose a timeout value to get a timeout scaled * GPU frequency, using a choice from diff --git a/mali_kbase/mali_kbase_js.c b/mali_kbase/mali_kbase_js.c index 5dd7813..d7facb9 100644 --- a/mali_kbase/mali_kbase_js.c +++ b/mali_kbase/mali_kbase_js.c @@ -36,6 +36,20 @@ #include "mali_kbase_hwaccess_jm.h" #include <mali_kbase_hwaccess_time.h> #include <linux/priority_control_manager.h> +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#include <mali_kbase_gpu_metrics.h> + +static unsigned long gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS; + +module_param(gpu_metrics_tp_emit_interval_ns, ulong, 0444); +MODULE_PARM_DESC(gpu_metrics_tp_emit_interval_ns, + "Time interval in nano seconds at which GPU metrics tracepoints are emitted"); + +unsigned long kbase_gpu_metrics_get_emit_interval(void) +{ + return gpu_metrics_tp_emit_interval_ns; +} +#endif /* * Private types @@ -101,6 +115,118 @@ static int kbase_ktrace_get_ctx_refcnt(struct kbase_context *kctx) * Private functions */ +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * gpu_metrics_timer_callback() - Callback function for the GPU metrics hrtimer + * + * @timer: Pointer to the GPU metrics hrtimer + * + * This function will emit power/gpu_work_period tracepoint for all the active + * GPU metrics contexts. The timer will be restarted if needed. + * + * Return: enum value to indicate that timer should not be restarted. + */ +static enum hrtimer_restart gpu_metrics_timer_callback(struct hrtimer *timer) +{ + struct kbasep_js_device_data *js_devdata = + container_of(timer, struct kbasep_js_device_data, gpu_metrics_timer); + struct kbase_device *kbdev = + container_of(js_devdata, struct kbase_device, js_data); + unsigned long flags; + + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_metrics_emit_tracepoint(kbdev, ktime_get_raw_ns()); + WARN_ON_ONCE(!js_devdata->gpu_metrics_timer_running); + if (js_devdata->gpu_metrics_timer_needed) { + hrtimer_start(&js_devdata->gpu_metrics_timer, + HR_TIMER_DELAY_NSEC(gpu_metrics_tp_emit_interval_ns), + HRTIMER_MODE_REL); + } else + js_devdata->gpu_metrics_timer_running = false; + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + return HRTIMER_NORESTART; +} + +/** + * gpu_metrics_ctx_init() - Take a reference on GPU metrics context if it exists, + * otherwise allocate and initialise one. + * + * @kctx: Pointer to the Kbase context. + * + * The GPU metrics context represents an "Application" for the purposes of GPU metrics + * reporting. There may be multiple kbase_contexts contributing data to a single GPU + * metrics context. + * This function takes a reference on GPU metrics context if it already exists + * corresponding to the Application that is creating the Kbase context, otherwise + * memory is allocated for it and initialised. + * + * Return: 0 on success, or negative on failure. + */ +static inline int gpu_metrics_ctx_init(struct kbase_context *kctx) +{ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; + struct kbase_device *kbdev = kctx->kbdev; + unsigned long flags; + int ret = 0; + + const struct cred *cred = get_current_cred(); + const unsigned int aid = cred->euid.val; + + put_cred(cred); + + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return 0; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kbdev->kctx_list_lock); + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + gpu_metrics_ctx = kbase_gpu_metrics_ctx_get(kbdev, aid); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + + if (!gpu_metrics_ctx) { + gpu_metrics_ctx = kmalloc(sizeof(*gpu_metrics_ctx), GFP_KERNEL); + + if (gpu_metrics_ctx) { + spin_lock_irqsave(&kbdev->hwaccess_lock, flags); + kbase_gpu_metrics_ctx_init(kbdev, gpu_metrics_ctx, aid); + spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + } else { + dev_err(kbdev->dev, "Allocation for gpu_metrics_ctx failed"); + ret = -ENOMEM; + } + } + + kctx->gpu_metrics_ctx = gpu_metrics_ctx; + mutex_unlock(&kbdev->kctx_list_lock); + + return ret; +} + +/** + * gpu_metrics_ctx_term() - Drop a reference on a GPU metrics context and free it + * if the refcount becomes 0. + * + * @kctx: Pointer to the Kbase context. + */ +static inline void gpu_metrics_ctx_term(struct kbase_context *kctx) +{ + unsigned long flags; + + /* Return early if this is not a Userspace created context */ + if (unlikely(!kctx->kfile)) + return; + + /* Serialize against the other threads trying to create/destroy Kbase contexts. */ + mutex_lock(&kctx->kbdev->kctx_list_lock); + spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags); + kbase_gpu_metrics_ctx_put(kctx->kbdev, kctx->gpu_metrics_ctx); + spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags); + mutex_unlock(&kctx->kbdev->kctx_list_lock); +} +#endif + /** * core_reqs_from_jsn_features - Convert JSn_FEATURES to core requirements * @features: JSn_FEATURE register value @@ -602,6 +728,21 @@ int kbasep_js_devdata_init(struct kbase_device * const kbdev) } } +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + if (!gpu_metrics_tp_emit_interval_ns || (gpu_metrics_tp_emit_interval_ns > NSEC_PER_SEC)) { + dev_warn( + kbdev->dev, + "Invalid value (%lu ns) for module param gpu_metrics_tp_emit_interval_ns. Using default value: %u ns", + gpu_metrics_tp_emit_interval_ns, DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS); + gpu_metrics_tp_emit_interval_ns = DEFAULT_GPU_METRICS_TP_EMIT_INTERVAL_NS; + } + + hrtimer_init(&jsdd->gpu_metrics_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + jsdd->gpu_metrics_timer.function = gpu_metrics_timer_callback; + jsdd->gpu_metrics_timer_needed = false; + jsdd->gpu_metrics_timer_running = false; +#endif + return 0; } @@ -626,16 +767,29 @@ void kbasep_js_devdata_term(struct kbase_device *kbdev) zero_ctx_attr_ref_count, sizeof(zero_ctx_attr_ref_count)) == 0); CSTD_UNUSED(zero_ctx_attr_ref_count); + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + js_devdata->gpu_metrics_timer_needed = false; + hrtimer_cancel(&js_devdata->gpu_metrics_timer); +#endif } int kbasep_js_kctx_init(struct kbase_context *const kctx) { struct kbasep_js_kctx_info *js_kctx_info; int i, j; + int ret; CSTD_UNUSED(js_kctx_info); KBASE_DEBUG_ASSERT(kctx != NULL); + CSTD_UNUSED(ret); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + ret = gpu_metrics_ctx_init(kctx); + if (ret) + return ret; +#endif + kbase_ctx_sched_init_ctx(kctx); for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) @@ -715,6 +869,9 @@ void kbasep_js_kctx_term(struct kbase_context *kctx) } kbase_ctx_sched_remove_ctx(kctx); +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + gpu_metrics_ctx_term(kctx); +#endif } /* diff --git a/mali_kbase/mali_kbase_kinstr_prfcnt.c b/mali_kbase/mali_kbase_kinstr_prfcnt.c index cfafd11..f0c4da7 100644 --- a/mali_kbase/mali_kbase_kinstr_prfcnt.c +++ b/mali_kbase/mali_kbase_kinstr_prfcnt.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,7 +36,6 @@ #include <linux/mutex.h> #include <linux/poll.h> #include <linux/slab.h> -#include <linux/overflow.h> #include <linux/version_compat_defs.h> #include <linux/workqueue.h> @@ -1267,8 +1266,10 @@ void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx) void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) { - if (WARN_ON(!kinstr_ctx)) + if (!kinstr_ctx) { + pr_warn("%s: kinstr_ctx is NULL\n", __func__); return; + } mutex_lock(&kinstr_ctx->lock); @@ -1297,8 +1298,10 @@ void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx) void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx) { - if (WARN_ON(!kinstr_ctx)) + if (!kinstr_ctx) { + pr_warn("%s: kinstr_ctx is NULL\n", __func__); return; + } mutex_lock(&kinstr_ctx->lock); diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c index 8912783..c07d520 100644 --- a/mali_kbase/mali_kbase_mem.c +++ b/mali_kbase/mali_kbase_mem.c @@ -43,7 +43,7 @@ #include <mmu/mali_kbase_mmu.h> #include <mali_kbase_config_defaults.h> #include <mali_kbase_trace_gpu_mem.h> - +#include <linux/version_compat_defs.h> #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-" #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1) @@ -101,56 +101,66 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) return cpu_va_bits; } -/* This function finds out which RB tree the given pfn from the GPU VA belongs - * to based on the memory zone the pfn refers to - */ -static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, - u64 gpu_pfn) +unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone) { - struct rb_root *rbtree = NULL; + return ((((unsigned long)zone) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) + << KBASE_REG_ZONE_SHIFT); +} - struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); +enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits) +{ + return (enum kbase_memory_zone)(((zone_bits) & KBASE_REG_ZONE_MASK) + >> KBASE_REG_ZONE_SHIFT); +} +char *kbase_reg_zone_get_name(enum kbase_memory_zone zone) +{ + switch (zone) { + case SAME_VA_ZONE: + return "SAME_VA"; + case CUSTOM_VA_ZONE: + return "CUSTOM_VA"; + case EXEC_VA_ZONE: + return "EXEC_VA"; #if MALI_USE_CSF - struct kbase_reg_zone *fixed_va_zone = - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA); - - struct kbase_reg_zone *exec_fixed_va_zone = - kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA); - - if (gpu_pfn >= fixed_va_zone->base_pfn) { - rbtree = &kctx->reg_rbtree_fixed; - return rbtree; - } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) { - rbtree = &kctx->reg_rbtree_exec_fixed; - return rbtree; - } + case MCU_SHARED_ZONE: + return "MCU_SHARED"; + case EXEC_FIXED_VA_ZONE: + return "EXEC_FIXED_VA"; + case FIXED_VA_ZONE: + return "FIXED_VA"; #endif - if (gpu_pfn >= exec_va_zone->base_pfn) - rbtree = &kctx->reg_rbtree_exec; - else { - u64 same_va_end; + default: + return NULL; + } +} - if (kbase_ctx_compat_mode(kctx)) { - same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; - } else { - struct kbase_reg_zone *same_va_zone = - kbase_ctx_reg_zone_get(kctx, - KBASE_REG_ZONE_SAME_VA); - same_va_end = kbase_reg_zone_end_pfn(same_va_zone); - } +/** + * kbase_gpu_pfn_to_rbtree - find the rb-tree tracking the region with the indicated GPU + * page frame number + * @kctx: kbase context + * @gpu_pfn: GPU PFN address + * + * Context: any context. + * + * Return: reference to the rb-tree root, NULL if not found + */ +static struct rb_root *kbase_gpu_pfn_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn) +{ + enum kbase_memory_zone zone_idx; + struct kbase_reg_zone *zone; - if (gpu_pfn >= same_va_end) - rbtree = &kctx->reg_rbtree_custom; - else - rbtree = &kctx->reg_rbtree_same; + for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) { + zone = &kctx->reg_zone[zone_idx]; + if ((gpu_pfn >= zone->base_pfn) && (gpu_pfn < kbase_reg_zone_end_pfn(zone))) + return &zone->reg_rbtree; } - return rbtree; + return NULL; } /* This function inserts a region into the tree. */ -static void kbase_region_tracker_insert(struct kbase_va_region *new_reg) +void kbase_region_tracker_insert(struct kbase_va_region *new_reg) { u64 start_pfn = new_reg->start_pfn; struct rb_node **link = NULL; @@ -251,7 +261,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( lockdep_assert_held(&kctx->reg_lock); - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn); + if (unlikely(!rbtree)) + return NULL; return kbase_find_region_enclosing_address(rbtree, gpu_addr); } @@ -289,7 +301,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address( lockdep_assert_held(&kctx->reg_lock); - rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn); + rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn); + if (unlikely(!rbtree)) + return NULL; return kbase_find_region_base_address(rbtree, gpu_addr); } @@ -376,6 +390,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev, struct kbase_va_region *reg) { struct rb_node *rbprev; + struct kbase_reg_zone *zone = container_of(reg->rbtree, struct kbase_reg_zone, reg_rbtree); struct kbase_va_region *prev = NULL; struct rb_node *rbnext; struct kbase_va_region *next = NULL; @@ -400,8 +415,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev, */ u64 prev_end_pfn = prev->start_pfn + prev->nr_pages; - WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); + WARN_ON((kbase_bits_to_zone(prev->flags)) != + (kbase_bits_to_zone(reg->flags))); if (!WARN_ON(reg->start_pfn < prev_end_pfn)) prev->nr_pages += reg->start_pfn - prev_end_pfn; prev->nr_pages += reg->nr_pages; @@ -422,8 +437,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev, */ u64 reg_end_pfn = reg->start_pfn + reg->nr_pages; - WARN_ON((next->flags & KBASE_REG_ZONE_MASK) != - (reg->flags & KBASE_REG_ZONE_MASK)); + WARN_ON((kbase_bits_to_zone(next->flags)) != + (kbase_bits_to_zone(reg->flags))); if (!WARN_ON(next->start_pfn < reg_end_pfn)) next->nr_pages += next->start_pfn - reg_end_pfn; next->start_pfn = reg->start_pfn; @@ -445,8 +460,7 @@ void kbase_remove_va_region(struct kbase_device *kbdev, */ struct kbase_va_region *free_reg; - free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages, - reg->flags & KBASE_REG_ZONE_MASK); + free_reg = kbase_alloc_free_region(zone, reg->start_pfn, reg->nr_pages); if (!free_reg) { /* In case of failure, we cannot allocate a replacement * free region, so we will be left with a 'gap' in the @@ -507,6 +521,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, size_t nr_pages) { struct rb_root *reg_rbtree = NULL; + struct kbase_reg_zone *zone = + container_of(at_reg->rbtree, struct kbase_reg_zone, reg_rbtree); int err = 0; reg_rbtree = at_reg->rbtree; @@ -548,9 +564,8 @@ static int kbase_insert_va_region_nolock(struct kbase_device *kbdev, else { struct kbase_va_region *new_front_reg; - new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn, - start_pfn - at_reg->start_pfn, - at_reg->flags & KBASE_REG_ZONE_MASK); + new_front_reg = kbase_alloc_free_region(zone, at_reg->start_pfn, + start_pfn - at_reg->start_pfn); if (new_front_reg) { at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages; @@ -603,9 +618,9 @@ int kbase_add_va_region(struct kbase_context *kctx, #endif if (!(reg->flags & KBASE_REG_GPU_NX) && !addr && #if MALI_USE_CSF - ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) && + ((kbase_bits_to_zone(reg->flags)) != EXEC_FIXED_VA_ZONE) && #endif - ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) { + ((kbase_bits_to_zone(reg->flags)) != EXEC_VA_ZONE)) { if (cpu_va_bits > gpu_pc_bits) { align = max(align, (size_t)((1ULL << gpu_pc_bits) >> PAGE_SHIFT)); @@ -623,8 +638,7 @@ int kbase_add_va_region(struct kbase_context *kctx, * then don't retry, we're out of VA and there is * nothing which can be done about it. */ - if ((reg->flags & KBASE_REG_ZONE_MASK) != - KBASE_REG_ZONE_CUSTOM_VA) + if ((kbase_bits_to_zone(reg->flags)) != CUSTOM_VA_ZONE) break; } while (kbase_jit_evict(kctx)); @@ -728,119 +742,27 @@ exit: return err; } -/* - * @brief Initialize the internal region tracker data structure. +/** + * kbase_reg_to_kctx - Obtain the kbase context tracking a VA region. + * @reg: VA region + * + * Return: + * * pointer to kbase context of the memory allocation + * * NULL if the region does not belong to a kbase context (for instance, + * if the allocation corresponds to a shared MCU region on CSF). */ -#if MALI_USE_CSF -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *custom_va_reg, - struct kbase_va_region *exec_va_reg, - struct kbase_va_region *exec_fixed_va_reg, - struct kbase_va_region *fixed_va_reg) -{ - u64 last_zone_end_pfn; - - kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(same_va_reg); - - last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages; - - /* Although custom_va_reg doesn't always exist, initialize - * unconditionally because of the mem_view debugfs - * implementation which relies on it being empty. - */ - kctx->reg_rbtree_custom = RB_ROOT; - kctx->reg_rbtree_exec = RB_ROOT; - - if (custom_va_reg) { - WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(custom_va_reg); - last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages; - } - - /* Initialize exec, fixed and exec_fixed. These are always - * initialized at this stage, if they will exist at all. - */ - kctx->reg_rbtree_fixed = RB_ROOT; - kctx->reg_rbtree_exec_fixed = RB_ROOT; - - if (exec_va_reg) { - WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(exec_va_reg); - last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages; - } - - if (exec_fixed_va_reg) { - WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(exec_fixed_va_reg); - last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages; - } - - if (fixed_va_reg) { - WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn); - kbase_region_tracker_insert(fixed_va_reg); - last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages; - } -} -#else -static void kbase_region_tracker_ds_init(struct kbase_context *kctx, - struct kbase_va_region *same_va_reg, - struct kbase_va_region *custom_va_reg) -{ - kctx->reg_rbtree_same = RB_ROOT; - kbase_region_tracker_insert(same_va_reg); - - /* Although custom_va_reg and exec_va_reg don't always exist, - * initialize unconditionally because of the mem_view debugfs - * implementation which relies on them being empty. - * - * The difference between the two is that the EXEC_VA region - * is never initialized at this stage. - */ - kctx->reg_rbtree_custom = RB_ROOT; - kctx->reg_rbtree_exec = RB_ROOT; - - if (custom_va_reg) - kbase_region_tracker_insert(custom_va_reg); -} -#endif /* MALI_USE_CSF */ - -static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg) +static struct kbase_context *kbase_reg_to_kctx(struct kbase_va_region *reg) { - struct kbase_context *kctx = NULL; struct rb_root *rbtree = reg->rbtree; + struct kbase_reg_zone *zone = container_of(rbtree, struct kbase_reg_zone, reg_rbtree); - switch (reg->flags & KBASE_REG_ZONE_MASK) { - case KBASE_REG_ZONE_CUSTOM_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom); - break; - case KBASE_REG_ZONE_SAME_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same); - break; - case KBASE_REG_ZONE_EXEC_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec); - break; -#if MALI_USE_CSF - case KBASE_REG_ZONE_EXEC_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed); - break; - case KBASE_REG_ZONE_FIXED_VA: - kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed); - break; - case KBASE_REG_ZONE_MCU_SHARED: - /* This is only expected to be called on driver unload. */ - break; -#endif - default: - WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags); - break; - } + if (!kbase_is_ctx_reg_zone(zone->id)) + return NULL; - return kctx; + return container_of(zone - zone->id, struct kbase_context, reg_zone[0]); } -static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) +void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) { struct rb_node *rbnode; struct kbase_va_region *reg; @@ -851,8 +773,12 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) rb_erase(rbnode, rbtree); reg = rb_entry(rbnode, struct kbase_va_region, rblink); WARN_ON(kbase_refcount_read(®->va_refcnt) != 1); - if (kbase_page_migration_enabled) - kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg); + if (kbase_is_page_migration_enabled()) { + struct kbase_context *kctx = kbase_reg_to_kctx(reg); + + if (kctx) + kbase_gpu_munmap(kctx, reg); + } /* Reset the start_pfn - as the rbtree is being * destroyed and we've already erased this region, there * is no further need to attempt to remove it. @@ -867,209 +793,261 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree) } while (rbnode); } -void kbase_region_tracker_term(struct kbase_context *kctx) -{ - WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, - "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", - kctx->tgid, kctx->id); - - kbase_gpu_vm_lock(kctx); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); -#if MALI_USE_CSF - WARN_ON(!list_empty(&kctx->csf.event_pages_head)); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed); - kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed); - -#endif - kbase_gpu_vm_unlock(kctx); -} - -void kbase_region_tracker_term_rbtree(struct rb_root *rbtree) -{ - kbase_region_tracker_erase_rbtree(rbtree); -} - static size_t kbase_get_same_va_bits(struct kbase_context *kctx) { return min_t(size_t, kbase_get_num_cpu_va_bits(kctx), kctx->kbdev->gpu_props.mmu.va_bits); } -int kbase_region_tracker_init(struct kbase_context *kctx) +static int kbase_reg_zone_same_va_init(struct kbase_context *kctx, u64 gpu_va_limit) { - struct kbase_va_region *same_va_reg; - struct kbase_va_region *custom_va_reg = NULL; - size_t same_va_bits = kbase_get_same_va_bits(kctx); - u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; - u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; - u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; - u64 same_va_pages; - u64 same_va_base = 1u; int err; -#if MALI_USE_CSF - struct kbase_va_region *exec_va_reg; - struct kbase_va_region *exec_fixed_va_reg; - struct kbase_va_region *fixed_va_reg; - - u64 exec_va_base; - u64 fixed_va_end; - u64 exec_fixed_va_base; - u64 fixed_va_base; - u64 fixed_va_pages; -#endif - - /* Take the lock as kbase_free_alloced_region requires it */ - kbase_gpu_vm_lock(kctx); + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); + const size_t same_va_bits = kbase_get_same_va_bits(kctx); + const u64 base_pfn = 1u; + u64 nr_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - base_pfn; - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; + lockdep_assert_held(&kctx->reg_lock); #if MALI_USE_CSF - if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { + if ((base_pfn + nr_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) { /* Depending on how the kernel is configured, it's possible (eg on aarch64) for * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone * doesn't cross into the exec_va zone. */ - same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base; + nr_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - base_pfn; } #endif + err = kbase_reg_zone_init(kctx->kbdev, zone, SAME_VA_ZONE, base_pfn, nr_pages); + if (err) + return -ENOMEM; - /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base, - same_va_pages, KBASE_REG_ZONE_SAME_VA); + kctx->gpu_va_end = base_pfn + nr_pages; - if (!same_va_reg) { - err = -ENOMEM; - goto fail_unlock; - } - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, - same_va_pages); + return 0; +} - if (kbase_ctx_compat_mode(kctx)) { - if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) { - err = -EINVAL; - goto fail_free_same_va; - } - /* If the current size of TMEM is out of range of the - * virtual address space addressable by the MMU then - * we should shrink it to fit - */ - if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) - custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; +static void kbase_reg_zone_same_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); - custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size, KBASE_REG_ZONE_CUSTOM_VA); + kbase_reg_zone_term(zone); +} - if (!custom_va_reg) { - err = -ENOMEM; - goto fail_free_same_va; - } - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, - KBASE_REG_ZONE_CUSTOM_VA_BASE, - custom_va_size); - } else { - custom_va_size = 0; - } +static int kbase_reg_zone_custom_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); + u64 nr_pages = KBASE_REG_ZONE_CUSTOM_VA_SIZE; -#if MALI_USE_CSF - /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */ - exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64; + /* If the context does not support CUSTOM_VA zones, then we don't need to + * proceed past this point, and can pretend that it was initialized properly. + * In practice, this will mean that the zone metadata structure will be zero + * initialized and not contain a valid zone ID. + */ + if (!kbase_ctx_compat_mode(kctx)) + return 0; + + if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) + return -EINVAL; - /* Similarly the end of the FIXED_VA zone also depends on whether the client - * is 32 or 64-bits. + /* If the current size of TMEM is out of range of the + * virtual address space addressable by the MMU then + * we should shrink it to fit */ - fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; + if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit) + nr_pages = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE; - if (kbase_ctx_compat_mode(kctx)) { - exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32; - fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; - } + if (kbase_reg_zone_init(kctx->kbdev, zone, CUSTOM_VA_ZONE, KBASE_REG_ZONE_CUSTOM_VA_BASE, + nr_pages)) + return -ENOMEM; - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base, - KBASE_REG_ZONE_EXEC_VA_SIZE); + /* On JM systems, this is the last memory zone that gets initialized, + * so the GPU VA ends right after the end of the CUSTOM_VA zone. On CSF, + * setting here is harmless, as the FIXED_VA initializer will overwrite + * it + */ + kctx->gpu_va_end += nr_pages; - exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base, - KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA); + return 0; +} - if (!exec_va_reg) { - err = -ENOMEM; - goto fail_free_custom_va; - } +static void kbase_reg_zone_custom_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); - exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE; + kbase_reg_zone_term(zone); +} - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base, - KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); +static inline u64 kbase_get_exec_va_zone_base(struct kbase_context *kctx) +{ + u64 base_pfn; - exec_fixed_va_reg = - kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed, - exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE, - KBASE_REG_ZONE_EXEC_FIXED_VA); +#if MALI_USE_CSF + base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_64; + if (kbase_ctx_compat_mode(kctx)) + base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_32; +#else + /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is + * initially U64_MAX + */ + base_pfn = U64_MAX; +#endif - if (!exec_fixed_va_reg) { - err = -ENOMEM; - goto fail_free_exec_va; - } + return base_pfn; +} + +static inline int kbase_reg_zone_exec_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + const u64 base_pfn = kbase_get_exec_va_zone_base(kctx); + u64 nr_pages = KBASE_REG_ZONE_EXEC_VA_SIZE; + +#if !MALI_USE_CSF + nr_pages = 0; +#endif - fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; - fixed_va_pages = fixed_va_end - fixed_va_base; + return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_VA_ZONE, base_pfn, nr_pages); +} - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages); +static void kbase_reg_zone_exec_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); - fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base, - fixed_va_pages, KBASE_REG_ZONE_FIXED_VA); + kbase_reg_zone_term(zone); +} + +#if MALI_USE_CSF +static inline u64 kbase_get_exec_fixed_va_zone_base(struct kbase_context *kctx) +{ + return kbase_get_exec_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_VA_SIZE; +} + +static int kbase_reg_zone_exec_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE); + const u64 base_pfn = kbase_get_exec_fixed_va_zone_base(kctx); + + return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_FIXED_VA_ZONE, base_pfn, + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE); +} + +static void kbase_reg_zone_exec_fixed_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE); + + WARN_ON(!list_empty(&kctx->csf.event_pages_head)); + kbase_reg_zone_term(zone); +} + +static int kbase_reg_zone_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE); + const u64 base_pfn = + kbase_get_exec_fixed_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE; + u64 fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64; + u64 nr_pages; + + if (kbase_ctx_compat_mode(kctx)) + fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32; + + nr_pages = fixed_va_end - base_pfn; + + if (kbase_reg_zone_init(kctx->kbdev, zone, FIXED_VA_ZONE, base_pfn, nr_pages)) + return -ENOMEM; kctx->gpu_va_end = fixed_va_end; - if (!fixed_va_reg) { - err = -ENOMEM; - goto fail_free_exec_fixed_va; - } + return 0; +} - kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg, - exec_fixed_va_reg, fixed_va_reg); +static void kbase_reg_zone_fixed_va_term(struct kbase_context *kctx) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE); - INIT_LIST_HEAD(&kctx->csf.event_pages_head); -#else - /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is - * initially U64_MAX - */ - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); - /* Other zones are 0: kbase_create_context() uses vzalloc */ + kbase_reg_zone_term(zone); +} +#endif + +typedef int kbase_memory_zone_init(struct kbase_context *kctx, u64 gpu_va_limit); +typedef void kbase_memory_zone_term(struct kbase_context *kctx); - kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); - kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; +struct kbase_memory_zone_init_meta { + kbase_memory_zone_init *init; + kbase_memory_zone_term *term; + char *error_msg; +}; + +static const struct kbase_memory_zone_init_meta zones_init[] = { + [SAME_VA_ZONE] = { kbase_reg_zone_same_va_init, kbase_reg_zone_same_va_term, + "Could not initialize SAME_VA zone" }, + [CUSTOM_VA_ZONE] = { kbase_reg_zone_custom_va_init, kbase_reg_zone_custom_va_term, + "Could not initialize CUSTOM_VA zone" }, + [EXEC_VA_ZONE] = { kbase_reg_zone_exec_va_init, kbase_reg_zone_exec_va_term, + "Could not initialize EXEC_VA zone" }, +#if MALI_USE_CSF + [EXEC_FIXED_VA_ZONE] = { kbase_reg_zone_exec_fixed_va_init, + kbase_reg_zone_exec_fixed_va_term, + "Could not initialize EXEC_FIXED_VA zone" }, + [FIXED_VA_ZONE] = { kbase_reg_zone_fixed_va_init, kbase_reg_zone_fixed_va_term, + "Could not initialize FIXED_VA zone" }, #endif - kctx->jit_va = false; +}; - kbase_gpu_vm_unlock(kctx); - return 0; +int kbase_region_tracker_init(struct kbase_context *kctx) +{ + const u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits; + const u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT; + int err; + unsigned int i; + /* Take the lock as kbase_free_alloced_region requires it */ + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < ARRAY_SIZE(zones_init); i++) { + err = zones_init[i].init(kctx, gpu_va_limit); + if (unlikely(err)) { + dev_err(kctx->kbdev->dev, "%s, err = %d\n", zones_init[i].error_msg, err); + goto term; + } + } #if MALI_USE_CSF -fail_free_exec_fixed_va: - kbase_free_alloced_region(exec_fixed_va_reg); -fail_free_exec_va: - kbase_free_alloced_region(exec_va_reg); -fail_free_custom_va: - if (custom_va_reg) - kbase_free_alloced_region(custom_va_reg); + INIT_LIST_HEAD(&kctx->csf.event_pages_head); #endif + kctx->jit_va = false; + + kbase_gpu_vm_unlock(kctx); + + return 0; +term: + while (i-- > 0) + zones_init[i].term(kctx); -fail_free_same_va: - kbase_free_alloced_region(same_va_reg); -fail_unlock: kbase_gpu_vm_unlock(kctx); return err; } +void kbase_region_tracker_term(struct kbase_context *kctx) +{ + unsigned int i; + + WARN(kctx->as_nr != KBASEP_AS_NR_INVALID, + "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions", + kctx->tgid, kctx->id); + + kbase_gpu_vm_lock(kctx); + + for (i = 0; i < ARRAY_SIZE(zones_init); i++) + zones_init[i].term(kctx); + + kbase_gpu_vm_unlock(kctx); +} + static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) { struct kbase_reg_zone *exec_va_zone; lockdep_assert_held(&kctx->reg_lock); - exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); return (exec_va_zone->base_pfn != U64_MAX); } @@ -1109,16 +1087,16 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) lockdep_assert_held(&kctx->reg_lock); - for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { + for (zone_idx = 0; zone_idx < MEMORY_ZONE_MAX; zone_idx++) { struct kbase_reg_zone *zone; struct kbase_va_region *reg; u64 zone_base_addr; - unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); - unsigned long reg_zone; + enum kbase_memory_zone reg_zone; - if (!kbase_is_ctx_reg_zone(zone_bits)) + if (!kbase_is_ctx_reg_zone(zone_idx)) continue; - zone = kbase_ctx_reg_zone_get(kctx, zone_bits); + + zone = kbase_ctx_reg_zone_get(kctx, zone_idx); zone_base_addr = zone->base_pfn << PAGE_SHIFT; reg = kbase_region_tracker_find_region_base_address( @@ -1126,21 +1104,21 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) if (!zone->va_size_pages) { WARN(reg, - "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", - (unsigned long long)zone_base_addr, zone_bits); + "Should not have found a region that starts at 0x%.16llx for zone %s", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx)); continue; } if (WARN(!reg, - "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", - (unsigned long long)zone_base_addr, zone_bits)) + "There should always be a region that starts at 0x%.16llx for zone %s, couldn't find it", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx))) return true; /* Safest return value */ - reg_zone = reg->flags & KBASE_REG_ZONE_MASK; - if (WARN(reg_zone != zone_bits, - "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", - (unsigned long long)zone_base_addr, zone_bits, - reg_zone)) + reg_zone = kbase_bits_to_zone(reg->flags); + if (WARN(reg_zone != zone_idx, + "The region that starts at 0x%.16llx should be in zone %s but was found in the wrong zone %s", + (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx), + kbase_reg_zone_get_name(reg_zone))) return true; /* Safest return value */ /* Unless the region is completely free, of the same size as @@ -1161,10 +1139,8 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, u64 jit_va_pages) { struct kbase_va_region *same_va_reg; - struct kbase_reg_zone *same_va_zone; + struct kbase_reg_zone *same_va_zone, *custom_va_zone; u64 same_va_zone_base_addr; - const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; - struct kbase_va_region *custom_va_reg; u64 jit_va_start; lockdep_assert_held(&kctx->reg_lock); @@ -1175,14 +1151,14 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, * cause an overlap to happen with existing same VA allocations and the * custom VA zone. */ - same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); + same_va_zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE); same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; same_va_reg = kbase_region_tracker_find_region_base_address( kctx, same_va_zone_base_addr); if (WARN(!same_va_reg, - "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", - (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) + "Already found a free region at the start of every zone, but now cannot find any region for zone SAME_VA base 0x%.16llx", + (unsigned long long)same_va_zone_base_addr)) return -ENOMEM; /* kbase_region_tracker_has_allocs() in the caller has already ensured @@ -1203,24 +1179,15 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, /* * Create a custom VA zone at the end of the VA for allocations which - * JIT can use so it doesn't have to allocate VA from the kernel. + * JIT can use so it doesn't have to allocate VA from the kernel. Note + * that while the zone has already been zero-initialized during the + * region tracker initialization, we can just overwrite it. */ - custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start, - jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); - - /* - * The context will be destroyed if we fail here so no point - * reverting the change we made to same_va. - */ - if (!custom_va_reg) + custom_va_zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE); + if (kbase_reg_zone_init(kctx->kbdev, custom_va_zone, CUSTOM_VA_ZONE, jit_va_start, + jit_va_pages)) return -ENOMEM; - /* Since this is 64-bit, the custom zone will not have been - * initialized, so initialize it now - */ - kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, - jit_va_pages); - kbase_region_tracker_insert(custom_va_reg); return 0; } @@ -1291,12 +1258,11 @@ exit_unlock: int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) { #if !MALI_USE_CSF - struct kbase_va_region *exec_va_reg; struct kbase_reg_zone *exec_va_zone; struct kbase_reg_zone *target_zone; struct kbase_va_region *target_reg; u64 target_zone_base_addr; - unsigned long target_zone_bits; + enum kbase_memory_zone target_zone_id; u64 exec_va_start; int err; #endif @@ -1342,20 +1308,21 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages if (kbase_ctx_compat_mode(kctx)) { /* 32-bit client: take from CUSTOM_VA zone */ - target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; + target_zone_id = CUSTOM_VA_ZONE; } else { /* 64-bit client: take from SAME_VA zone */ - target_zone_bits = KBASE_REG_ZONE_SAME_VA; + target_zone_id = SAME_VA_ZONE; } - target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); + target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_id); target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; target_reg = kbase_region_tracker_find_region_base_address( kctx, target_zone_base_addr); if (WARN(!target_reg, - "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", - (unsigned long long)target_zone_base_addr, target_zone_bits)) { + "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone %s", + (unsigned long long)target_zone_base_addr, + kbase_reg_zone_get_name(target_zone_id))) { err = -ENOMEM; goto exit_unlock; } @@ -1374,26 +1341,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages /* Taken from the end of the target zone */ exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; - - exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start, - exec_va_pages, KBASE_REG_ZONE_EXEC_VA); - if (!exec_va_reg) { - err = -ENOMEM; - goto exit_unlock; - } - /* Update EXEC_VA zone - * - * not using kbase_ctx_reg_zone_init() - it was already initialized - */ - exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); - exec_va_zone->base_pfn = exec_va_start; - exec_va_zone->va_size_pages = exec_va_pages; + exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE); + if (kbase_reg_zone_init(kctx->kbdev, exec_va_zone, EXEC_VA_ZONE, exec_va_start, + exec_va_pages)) + return -ENOMEM; /* Update target zone and corresponding region */ target_reg->nr_pages -= exec_va_pages; target_zone->va_size_pages -= exec_va_pages; - - kbase_region_tracker_insert(exec_va_reg); err = 0; exit_unlock: @@ -1405,28 +1360,13 @@ exit_unlock: #if MALI_USE_CSF void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) { - kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); + kbase_reg_zone_term(&kbdev->csf.mcu_shared_zone); } int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) { - struct kbase_va_region *shared_reg; - u64 shared_reg_start_pfn; - u64 shared_reg_size; - - shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; - shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; - - kbdev->csf.shared_reg_rbtree = RB_ROOT; - - shared_reg = - kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn, - shared_reg_size, KBASE_REG_ZONE_MCU_SHARED); - if (!shared_reg) - return -ENOMEM; - - kbase_region_tracker_insert(shared_reg); - return 0; + return kbase_reg_zone_init(kbdev, &kbdev->csf.mcu_shared_zone, MCU_SHARED_ZONE, + KBASE_REG_ZONE_MCU_SHARED_BASE, MCU_SHARED_ZONE_SIZE); } #endif @@ -1583,33 +1523,31 @@ KBASE_EXPORT_TEST_API(kbase_mem_term); /** * kbase_alloc_free_region - Allocate a free region object. * - * @kbdev: kbase device - * @rbtree: Backlink to the red-black tree of memory regions. + * @zone: CUSTOM_VA_ZONE or SAME_VA_ZONE * @start_pfn: The Page Frame Number in GPU virtual address space. * @nr_pages: The size of the region in pages. - * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA * * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. * - * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA. - * * Return: pointer to the allocated region object on success, NULL otherwise. */ -struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone) +struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn, + size_t nr_pages) { struct kbase_va_region *new_reg; - KBASE_DEBUG_ASSERT(rbtree != NULL); - - /* zone argument should only contain zone related region flags */ - KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0); KBASE_DEBUG_ASSERT(nr_pages > 0); /* 64-bit address range is the max */ KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE)); - new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL); + if (WARN_ON(!zone)) + return NULL; + + if (unlikely(!zone->base_pfn || !zone->va_size_pages)) + return NULL; + + new_reg = kmem_cache_zalloc(zone->cache, GFP_KERNEL); if (!new_reg) return NULL; @@ -1618,8 +1556,8 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru atomic_set(&new_reg->no_user_free_count, 0); new_reg->cpu_alloc = NULL; /* no alloc bound yet */ new_reg->gpu_alloc = NULL; /* no alloc bound yet */ - new_reg->rbtree = rbtree; - new_reg->flags = zone | KBASE_REG_FREE; + new_reg->rbtree = &zone->reg_rbtree; + new_reg->flags = kbase_zone_to_bits(zone->id) | KBASE_REG_FREE; new_reg->flags |= KBASE_REG_GROWABLE; @@ -1631,9 +1569,17 @@ struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, stru return new_reg; } - KBASE_EXPORT_TEST_API(kbase_alloc_free_region); +struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx, + enum kbase_memory_zone id, u64 start_pfn, + size_t nr_pages) +{ + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, id); + + return kbase_alloc_free_region(zone, start_pfn, nr_pages); +} + /** * kbase_free_alloced_region - Free a region object. * @@ -1645,19 +1591,18 @@ KBASE_EXPORT_TEST_API(kbase_alloc_free_region); * alloc object will be released. * It is a bug if no alloc object exists for non-free regions. * - * If region is KBASE_REG_ZONE_MCU_SHARED it is freed + * If region is MCU_SHARED_ZONE it is freed */ void kbase_free_alloced_region(struct kbase_va_region *reg) { #if MALI_USE_CSF - if ((reg->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_MCU_SHARED) { + if (kbase_bits_to_zone(reg->flags) == MCU_SHARED_ZONE) { kfree(reg); return; } #endif if (!(reg->flags & KBASE_REG_FREE)) { - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + struct kbase_context *kctx = kbase_reg_to_kctx(reg); if (WARN_ON(!kctx)) return; @@ -1665,8 +1610,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; - dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", - (void *)reg); + dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n of zone %s", (void *)reg, + kbase_reg_zone_get_name(kbase_bits_to_zone(reg->flags))); #if MALI_USE_CSF if (reg->flags & KBASE_REG_CSF_EVENT) /* @@ -1802,8 +1747,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, } else { if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM || reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) { - - err = kbase_mmu_insert_imported_pages( + err = kbase_mmu_insert_pages_skip_status_update( kctx->kbdev, &kctx->mmu, reg->start_pfn, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg); @@ -1812,7 +1756,7 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg), reg->flags & gwt_mask, kctx->as_nr, group_id, - mmu_sync_info, reg, true); + mmu_sync_info, reg); } if (err) @@ -1856,8 +1800,7 @@ bad_aliased_insert: kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride), phys_alloc, alloc->imported.alias.aliased[i].length, - alloc->imported.alias.aliased[i].length, kctx->as_nr, - false); + alloc->imported.alias.aliased[i].length, kctx->as_nr); } bad_insert: kbase_remove_va_region(kctx->kbdev, reg); @@ -1868,7 +1811,7 @@ bad_insert: KBASE_EXPORT_TEST_API(kbase_gpu_mmap); static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, - struct kbase_va_region *reg, bool writeable); + struct kbase_va_region *reg); int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { @@ -1889,9 +1832,8 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) size_t i = 0; /* Due to the way the number of valid PTEs and ATEs are tracked * currently, only the GPU virtual range that is backed & mapped - * should be passed to the kbase_mmu_teardown_pages() function, - * hence individual aliased regions needs to be unmapped - * separately. + * should be passed to the page teardown function, hence individual + * aliased regions needs to be unmapped separately. */ for (i = 0; i < alloc->imported.alias.nents; i++) { struct tagged_addr *phys_alloc = NULL; @@ -1905,8 +1847,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * alloc->imported.alias.stride), phys_alloc, alloc->imported.alias.aliased[i].length, - alloc->imported.alias.aliased[i].length, kctx->as_nr, - false); + alloc->imported.alias.aliased[i].length, kctx->as_nr); if (WARN_ON_ONCE(err_loop)) err = err_loop; @@ -1928,17 +1869,19 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) if (reg->flags & KBASE_REG_IMPORT_PAD) nr_phys_pages = alloc->nents + 1; - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, nr_phys_pages, nr_virt_pages, - kctx->as_nr, true); + err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, alloc->pages, + nr_phys_pages, nr_virt_pages, + kctx->as_nr); } break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { size_t nr_reg_pages = kbase_reg_current_backed_size(reg); - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, nr_reg_pages, nr_reg_pages, - kctx->as_nr, true); + err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, + reg->start_pfn, alloc->pages, + nr_reg_pages, nr_reg_pages, + kctx->as_nr); } break; default: { @@ -1946,7 +1889,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, nr_reg_pages, nr_reg_pages, - kctx->as_nr, false); + kctx->as_nr); } break; } @@ -1966,9 +1909,7 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* The allocation could still have active mappings. */ if (user_buf->current_mapping_usage_count == 0) { - kbase_jd_user_buf_unmap(kctx, alloc, reg, - (reg->flags & - (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR))); + kbase_jd_user_buf_unmap(kctx, alloc, reg); } } } @@ -2112,18 +2053,18 @@ void kbase_sync_single(struct kbase_context *kctx, dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset; if (sync_fn == KBASE_SYNC_TO_DEVICE) { - src = ((unsigned char *)kmap(cpu_page)) + offset; - dst = ((unsigned char *)kmap(gpu_page)) + offset; + src = ((unsigned char *)kbase_kmap(cpu_page)) + offset; + dst = ((unsigned char *)kbase_kmap(gpu_page)) + offset; } else if (sync_fn == KBASE_SYNC_TO_CPU) { dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); - src = ((unsigned char *)kmap(gpu_page)) + offset; - dst = ((unsigned char *)kmap(cpu_page)) + offset; + src = ((unsigned char *)kbase_kmap(gpu_page)) + offset; + dst = ((unsigned char *)kbase_kmap(cpu_page)) + offset; } memcpy(dst, src, size); - kunmap(gpu_page); - kunmap(cpu_page); + kbase_kunmap(gpu_page, src); + kbase_kunmap(cpu_page, dst); if (sync_fn == KBASE_SYNC_TO_DEVICE) dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); @@ -2303,8 +2244,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re } #if MALI_USE_CSF - if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) || - ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { + if (((kbase_bits_to_zone(reg->flags)) == FIXED_VA_ZONE) || + ((kbase_bits_to_zone(reg->flags)) == EXEC_FIXED_VA_ZONE)) { if (reg->flags & KBASE_REG_FIXED_ADDRESS) atomic64_dec(&kctx->num_fixed_allocs); else @@ -2381,7 +2322,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) goto out_unlock; } - if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) { + if ((kbase_bits_to_zone(reg->flags)) == SAME_VA_ZONE) { /* SAME_VA must be freed through munmap */ dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__, gpu_addr); @@ -2544,6 +2485,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, * allocation is visible to the OOM killer */ kbase_process_page_usage_inc(kctx, nr_pages_requested); + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; @@ -2665,8 +2607,6 @@ no_new_partial: alloc->nents += nr_pages_requested; - kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); - done: return 0; @@ -2676,19 +2616,13 @@ alloc_failed: size_t nr_pages_to_free = nr_pages_requested - nr_left; alloc->nents += nr_pages_to_free; - - kbase_process_page_usage_inc(kctx, nr_pages_to_free); - atomic_add(nr_pages_to_free, &kctx->used_pages); - atomic_add(nr_pages_to_free, - &kctx->kbdev->memdev.used_pages); - kbase_free_phy_pages_helper(alloc, nr_pages_to_free); } - kbase_process_page_usage_dec(kctx, nr_pages_requested); - atomic_sub(nr_pages_requested, &kctx->used_pages); - atomic_sub(nr_pages_requested, - &kctx->kbdev->memdev.used_pages); + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_left); + kbase_process_page_usage_dec(kctx, nr_left); + atomic_sub(nr_left, &kctx->used_pages); + atomic_sub(nr_left, &kctx->kbdev->memdev.used_pages); invalid_request: return -ENOMEM; @@ -2737,6 +2671,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( * allocation is visible to the OOM killer */ kbase_process_page_usage_inc(kctx, nr_pages_requested); + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; new_pages = tp; @@ -2839,8 +2774,6 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( alloc->nents += nr_pages_requested; - kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); - done: return new_pages; @@ -2877,6 +2810,7 @@ alloc_failed: } } + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_pages_requested); kbase_process_page_usage_dec(kctx, nr_pages_requested); atomic_sub(nr_pages_requested, &kctx->used_pages); atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages); @@ -4539,7 +4473,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, /* A suitable JIT allocation existed on the evict list, so we need * to make sure that the NOT_MOVABLE property is cleared. */ - if (kbase_page_migration_enabled) { + if (kbase_is_page_migration_enabled()) { kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED); @@ -4717,14 +4651,14 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) * by page migration. Once freed, they will enter into the page migration * state machine via the mempools. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); } void kbase_jit_backing_lost(struct kbase_va_region *reg) { - struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); + struct kbase_context *kctx = kbase_reg_to_kctx(reg); if (WARN_ON(!kctx)) return; @@ -5035,6 +4969,15 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, * MMU operations. */ const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC; + bool write; + enum dma_data_direction dma_dir; + + /* If neither the CPU nor the GPU needs write access, use DMA_TO_DEVICE + * to avoid potentially-destructive CPU cache invalidates that could + * corruption of user data. + */ + write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; lockdep_assert_held(&kctx->reg_lock); @@ -5068,9 +5011,9 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, for (i = 0; i < pinned_pages; i++) { dma_addr_t dma_addr; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir); #else - dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL, + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif err = dma_mapping_error(dev, dma_addr); @@ -5080,7 +5023,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, alloc->imported.user_buf.dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); } #ifdef CONFIG_MALI_CINSTR_GWT @@ -5088,10 +5031,10 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, - mmu_sync_info, NULL); + err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa, + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id, mmu_sync_info, NULL); if (err == 0) return 0; @@ -5111,12 +5054,11 @@ unwind: for (i = 0; i < dma_mapped_pages; i++) { dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir); #else - dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif } @@ -5134,17 +5076,113 @@ unwind: return err; } +/* user_buf_sync_read_only_page - This function handles syncing a single page that has read access, + * only, on both the CPU and * GPU, so it is ready to be unmapped. + * @kctx: kbase context + * @imported_size: the number of bytes to sync + * @dma_addr: DMA address of the bytes to be sync'd + * @offset_within_page: (unused) offset of the bytes within the page. Passed so that the calling + * signature is identical to user_buf_sync_writable_page(). + */ +static void user_buf_sync_read_only_page(struct kbase_context *kctx, unsigned long imported_size, + dma_addr_t dma_addr, unsigned long offset_within_page) +{ + /* Manual cache synchronization. + * + * Writes from neither the CPU nor GPU are possible via this mapping, + * so we just sync the entire page to the device. + */ + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, imported_size, DMA_TO_DEVICE); +} + +/* user_buf_sync_writable_page - This function handles syncing a single page that has read + * and writable access, from either (or both of) the CPU and GPU, + * so it is ready to be unmapped. + * @kctx: kbase context + * @imported_size: the number of bytes to unmap + * @dma_addr: DMA address of the bytes to be unmapped + * @offset_within_page: offset of the bytes within the page. This is the offset to the subrange of + * the memory that is "imported" and so is intended for GPU access. Areas of + * the page outside of this - whilst still GPU accessible - are not intended + * for use by GPU work, and should also not be modified as the userspace CPU + * threads may be modifying them. + */ +static void user_buf_sync_writable_page(struct kbase_context *kctx, unsigned long imported_size, + dma_addr_t dma_addr, unsigned long offset_within_page) +{ + /* Manual CPU cache synchronization. + * + * When the GPU returns ownership of the buffer to the CPU, the driver + * needs to treat imported and non-imported memory differently. + * + * The first case to consider is non-imported sub-regions at the + * beginning of the first page and at the end of last page. For these + * sub-regions: CPU cache shall be committed with a clean+invalidate, + * in order to keep the last CPU write. + * + * Imported region prefers the opposite treatment: this memory has been + * legitimately mapped and used by the GPU, hence GPU writes shall be + * committed to memory, while CPU cache shall be invalidated to make + * sure that CPU reads the correct memory content. + * + * The following diagram shows the expect value of the variables + * used in this loop in the corner case of an imported region encloed + * by a single memory page: + * + * page boundary ->|---------- | <- dma_addr (initial value) + * | | + * | - - - - - | <- offset_within_page + * |XXXXXXXXXXX|\ + * |XXXXXXXXXXX| \ + * |XXXXXXXXXXX| }- imported_size + * |XXXXXXXXXXX| / + * |XXXXXXXXXXX|/ + * | - - - - - | <- offset_within_page + imported_size + * | |\ + * | | }- PAGE_SIZE - imported_size - + * | |/ offset_within_page + * | | + * page boundary ->|-----------| + * + * If the imported region is enclosed by more than one page, then + * offset_within_page = 0 for any page after the first. + */ + + /* Only for first page: handle non-imported range at the beginning. */ + if (offset_within_page > 0) { + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, + DMA_BIDIRECTIONAL); + dma_addr += offset_within_page; + } + + /* For every page: handle imported range. */ + if (imported_size > 0) + dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, + DMA_BIDIRECTIONAL); + + /* Only for last page (that may coincide with first page): + * handle non-imported range at the end. + */ + if ((imported_size + offset_within_page) < PAGE_SIZE) { + dma_addr += imported_size; + dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, + PAGE_SIZE - imported_size - offset_within_page, + DMA_BIDIRECTIONAL); + } +} + /* This function would also perform the work of unpinning pages on Job Manager * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT * have a corresponding call to kbase_jd_user_buf_unpin_pages(). */ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc, - struct kbase_va_region *reg, bool writeable) + struct kbase_va_region *reg) { long i; struct page **pages; unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK; unsigned long remaining_size = alloc->imported.user_buf.size; + bool writable = (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)); lockdep_assert_held(&kctx->reg_lock); @@ -5153,8 +5191,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem #if !MALI_USE_CSF kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents); -#else - CSTD_UNUSED(reg); #endif for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) { @@ -5173,75 +5209,24 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem * whole memory page. */ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i]; + enum dma_data_direction dma_dir = writable ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - /* Manual CPU cache synchronization. - * - * When the GPU returns ownership of the buffer to the CPU, the driver - * needs to treat imported and non-imported memory differently. - * - * The first case to consider is non-imported sub-regions at the - * beginning of the first page and at the end of last page. For these - * sub-regions: CPU cache shall be committed with a clean+invalidate, - * in order to keep the last CPU write. - * - * Imported region prefers the opposite treatment: this memory has been - * legitimately mapped and used by the GPU, hence GPU writes shall be - * committed to memory, while CPU cache shall be invalidated to make - * sure that CPU reads the correct memory content. - * - * The following diagram shows the expect value of the variables - * used in this loop in the corner case of an imported region encloed - * by a single memory page: - * - * page boundary ->|---------- | <- dma_addr (initial value) - * | | - * | - - - - - | <- offset_within_page - * |XXXXXXXXXXX|\ - * |XXXXXXXXXXX| \ - * |XXXXXXXXXXX| }- imported_size - * |XXXXXXXXXXX| / - * |XXXXXXXXXXX|/ - * | - - - - - | <- offset_within_page + imported_size - * | |\ - * | | }- PAGE_SIZE - imported_size - offset_within_page - * | |/ - * page boundary ->|-----------| - * - * If the imported region is enclosed by more than one page, then - * offset_within_page = 0 for any page after the first. - */ - - /* Only for first page: handle non-imported range at the beginning. */ - if (offset_within_page > 0) { - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page, - DMA_BIDIRECTIONAL); - dma_addr += offset_within_page; - } - - /* For every page: handle imported range. */ - if (imported_size > 0) - dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size, - DMA_BIDIRECTIONAL); - - /* Only for last page (that may coincide with first page): - * handle non-imported range at the end. - */ - if ((imported_size + offset_within_page) < PAGE_SIZE) { - dma_addr += imported_size; - dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, - PAGE_SIZE - imported_size - offset_within_page, - DMA_BIDIRECTIONAL); - } + if (writable) + user_buf_sync_writable_page(kctx, imported_size, dma_addr, + offset_within_page); + else + user_buf_sync_read_only_page(kctx, imported_size, dma_addr, + offset_within_page); - /* Notice: use the original DMA address to unmap the whole memory page. */ + /* Notice: use the original DMA address to unmap the whole memory page. */ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE, - DMA_BIDIRECTIONAL); + dma_dir); #else dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], - PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif - if (writeable) + if (writable) set_page_dirty_lock(pages[i]); #if !MALI_USE_CSF kbase_unpin_user_buf_page(pages[i]); @@ -5260,7 +5245,8 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, void *src_page, size_t *to_copy, unsigned int nr_pages, unsigned int *target_page_nr, size_t offset) { - void *target_page = kmap(dest_pages[*target_page_nr]); + void *target_page = kbase_kmap(dest_pages[*target_page_nr]); + size_t chunk = PAGE_SIZE-offset; if (!target_page) { @@ -5273,13 +5259,13 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, memcpy(target_page + offset, src_page, chunk); *to_copy -= chunk; - kunmap(dest_pages[*target_page_nr]); + kbase_kunmap(dest_pages[*target_page_nr], target_page); *target_page_nr += 1; if (*target_page_nr >= nr_pages || *to_copy == 0) return 0; - target_page = kmap(dest_pages[*target_page_nr]); + target_page = kbase_kmap(dest_pages[*target_page_nr]); if (!target_page) { pr_err("%s: kmap failure", __func__); return -ENOMEM; @@ -5291,7 +5277,7 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, memcpy(target_page, src_page + PAGE_SIZE-offset, chunk); *to_copy -= chunk; - kunmap(dest_pages[*target_page_nr]); + kbase_kunmap(dest_pages[*target_page_nr], target_page); return 0; } @@ -5358,20 +5344,14 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_r alloc->imported.user_buf.current_mapping_usage_count--; if (alloc->imported.user_buf.current_mapping_usage_count == 0) { - bool writeable = true; - if (!kbase_is_region_invalid_or_free(reg)) { - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, - kbase_reg_current_backed_size(reg), - kbase_reg_current_backed_size(reg), - kctx->as_nr, true); + kbase_mmu_teardown_imported_pages( + kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + kbase_reg_current_backed_size(reg), + kbase_reg_current_backed_size(reg), kctx->as_nr); } - if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0) - writeable = false; - - kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable); + kbase_jd_user_buf_unmap(kctx, alloc, reg); } } break; diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h index 02e5509..aa67717 100644 --- a/mali_kbase/mali_kbase_mem.h +++ b/mali_kbase/mali_kbase_mem.h @@ -62,6 +62,186 @@ static inline void kbase_process_page_usage_inc(struct kbase_context *kctx, #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_8316 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_8316) #define KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_HW_ISSUE_9630 (1u << KBASEP_TMEM_GROWABLE_BLOCKSIZE_PAGES_LOG2_HW_ISSUE_9630) +/* Free region */ +#define KBASE_REG_FREE (1ul << 0) +/* CPU write access */ +#define KBASE_REG_CPU_WR (1ul << 1) +/* GPU write access */ +#define KBASE_REG_GPU_WR (1ul << 2) +/* No eXecute flag */ +#define KBASE_REG_GPU_NX (1ul << 3) +/* Is CPU cached? */ +#define KBASE_REG_CPU_CACHED (1ul << 4) +/* Is GPU cached? + * Some components within the GPU might only be able to access memory that is + * GPU cacheable. Refer to the specific GPU implementation for more details. + */ +#define KBASE_REG_GPU_CACHED (1ul << 5) + +#define KBASE_REG_GROWABLE (1ul << 6) +/* Can grow on pf? */ +#define KBASE_REG_PF_GROW (1ul << 7) + +/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ +#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) + +/* inner shareable coherency */ +#define KBASE_REG_SHARE_IN (1ul << 9) +/* inner & outer shareable coherency */ +#define KBASE_REG_SHARE_BOTH (1ul << 10) + +#if MALI_USE_CSF +/* Space for 8 different zones */ +#define KBASE_REG_ZONE_BITS 3 +#else +/* Space for 4 different zones */ +#define KBASE_REG_ZONE_BITS 2 +#endif + +/* The bits 11-13 (inclusive) of the kbase_va_region flag are reserved + * for information about the zone in which it was allocated. + */ +#define KBASE_REG_ZONE_SHIFT (11ul) +#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << KBASE_REG_ZONE_SHIFT) + +#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS) +#error "Too many zones for the number of zone bits defined" +#endif + +/* GPU read access */ +#define KBASE_REG_GPU_RD (1ul << 14) +/* CPU read access */ +#define KBASE_REG_CPU_RD (1ul << 15) + +/* Index of chosen MEMATTR for this region (0..7) */ +#define KBASE_REG_MEMATTR_MASK (7ul << 16) +#define KBASE_REG_MEMATTR_INDEX(x) (((x)&7) << 16) +#define KBASE_REG_MEMATTR_VALUE(x) (((x)&KBASE_REG_MEMATTR_MASK) >> 16) + +#define KBASE_REG_PROTECTED (1ul << 19) + +/* Region belongs to a shrinker. + * + * This can either mean that it is part of the JIT/Ephemeral or tiler heap + * shrinker paths. Should be removed only after making sure that there are + * no references remaining to it in these paths, as it may cause the physical + * backing of the region to disappear during use. + */ +#define KBASE_REG_DONT_NEED (1ul << 20) + +/* Imported buffer is padded? */ +#define KBASE_REG_IMPORT_PAD (1ul << 21) + +#if MALI_USE_CSF +/* CSF event memory */ +#define KBASE_REG_CSF_EVENT (1ul << 22) +/* Bit 23 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) +#else +/* Bit 22 is reserved. + * + * Do not remove, use the next unreserved bit for new flags + */ +#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) +/* The top of the initial commit is aligned to extension pages. + * Extent must be a power of 2 + */ +#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) +#endif /* MALI_USE_CSF */ + +/* Bit 24 is currently unused and is available for use for a new flag */ + +/* Memory has permanent kernel side mapping */ +#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) + +/* GPU VA region has been freed by the userspace, but still remains allocated + * due to the reference held by CPU mappings created on the GPU VA region. + * + * A region with this flag set has had kbase_gpu_munmap() called on it, but can + * still be looked-up in the region tracker as a non-free region. Hence must + * not create or update any more GPU mappings on such regions because they will + * not be unmapped when the region is finally destroyed. + * + * Since such regions are still present in the region tracker, new allocations + * attempted with BASE_MEM_SAME_VA might fail if their address intersects with + * a region with this flag set. + * + * In addition, this flag indicates the gpu_alloc member might no longer valid + * e.g. in infinite cache simulation. + */ +#define KBASE_REG_VA_FREED (1ul << 26) + +/* If set, the heap info address points to a u32 holding the used size in bytes; + * otherwise it points to a u64 holding the lowest address of unused memory. + */ +#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) + +/* Allocation is actively used for JIT memory */ +#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) + +#if MALI_USE_CSF +/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA + * memory zones, and it determines whether they were created with a fixed + * GPU VA address requested by the user. + */ +#define KBASE_REG_FIXED_ADDRESS (1ul << 29) +#else +#define KBASE_REG_RESERVED_BIT_29 (1ul << 29) +#endif + +#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) + +#if MALI_USE_CSF +/* only used with 32-bit clients */ +/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43). + */ +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +#else +/* only used with 32-bit clients */ +/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the + * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44 + * bytes, see mmap64 man page for reference). So we put the default limit to the + * maximum possible on Linux and shrink it down, if required by the GPU, during + * initialization. + */ +#define KBASE_REG_ZONE_CUSTOM_VA_SIZE (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) +/* end 32-bit clients only */ +#endif + +/* The starting address and size of the GPU-executable zone are dynamic + * and depend on the platform and the number of pages requested by the + * user process, with an upper limit of 4 GB. + */ +#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ +#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES + +#if MALI_USE_CSF +#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) +#define MCU_SHARED_ZONE_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - KBASE_REG_ZONE_MCU_SHARED_BASE) + +/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit + * clients, and 2^43 for 32-bit clients. + */ +#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT) +#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT) +/* Executable zone supporting FIXED/FIXABLE allocations. + * It is always 4GB in size. + */ +#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES + +/* Non-executable zone supporting FIXED/FIXABLE allocations. + * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from + * (2^43) up to (2^44)-1 for 32-bit userspace clients. For the same reason, + * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1. + */ +#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT) +#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT) + +#endif + /* * A CPU mapping */ @@ -307,6 +487,32 @@ enum kbase_jit_report_flags { }; /** + * kbase_zone_to_bits - Convert a memory zone @zone to the corresponding + * bitpattern, for ORing together with other flags. + * @zone: Memory zone + * + * Return: Bitpattern with the appropriate bits set. + */ +unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone); + +/** + * kbase_bits_to_zone - Convert the bitpattern @zone_bits to the corresponding + * zone identifier + * @zone_bits: Memory allocation flag containing a zone pattern + * + * Return: Zone identifier for valid zone bitpatterns, + */ +enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits); + +/** + * kbase_mem_zone_get_name - Get the string name for a given memory zone + * @zone: Memory zone identifier + * + * Return: string for valid memory zone, NULL otherwise + */ +char *kbase_reg_zone_get_name(enum kbase_memory_zone zone); + +/** * kbase_set_phy_alloc_page_status - Set the page migration status of the underlying * physical allocation. * @alloc: the physical allocation containing the pages whose metadata is going @@ -449,204 +655,6 @@ struct kbase_va_region { size_t nr_pages; size_t initial_commit; size_t threshold_pages; - -/* Free region */ -#define KBASE_REG_FREE (1ul << 0) -/* CPU write access */ -#define KBASE_REG_CPU_WR (1ul << 1) -/* GPU write access */ -#define KBASE_REG_GPU_WR (1ul << 2) -/* No eXecute flag */ -#define KBASE_REG_GPU_NX (1ul << 3) -/* Is CPU cached? */ -#define KBASE_REG_CPU_CACHED (1ul << 4) -/* Is GPU cached? - * Some components within the GPU might only be able to access memory that is - * GPU cacheable. Refer to the specific GPU implementation for more details. - */ -#define KBASE_REG_GPU_CACHED (1ul << 5) - -#define KBASE_REG_GROWABLE (1ul << 6) -/* Can grow on pf? */ -#define KBASE_REG_PF_GROW (1ul << 7) - -/* Allocation doesn't straddle the 4GB boundary in GPU virtual space */ -#define KBASE_REG_GPU_VA_SAME_4GB_PAGE (1ul << 8) - -/* inner shareable coherency */ -#define KBASE_REG_SHARE_IN (1ul << 9) -/* inner & outer shareable coherency */ -#define KBASE_REG_SHARE_BOTH (1ul << 10) - -#if MALI_USE_CSF -/* Space for 8 different zones */ -#define KBASE_REG_ZONE_BITS 3 -#else -/* Space for 4 different zones */ -#define KBASE_REG_ZONE_BITS 2 -#endif - -#define KBASE_REG_ZONE_MASK (((1 << KBASE_REG_ZONE_BITS) - 1ul) << 11) -#define KBASE_REG_ZONE(x) (((x) & ((1 << KBASE_REG_ZONE_BITS) - 1ul)) << 11) -#define KBASE_REG_ZONE_IDX(x) (((x) & KBASE_REG_ZONE_MASK) >> 11) - -#if KBASE_REG_ZONE_MAX > (1 << KBASE_REG_ZONE_BITS) -#error "Too many zones for the number of zone bits defined" -#endif - -/* GPU read access */ -#define KBASE_REG_GPU_RD (1ul << 14) -/* CPU read access */ -#define KBASE_REG_CPU_RD (1ul << 15) - -/* Index of chosen MEMATTR for this region (0..7) */ -#define KBASE_REG_MEMATTR_MASK (7ul << 16) -#define KBASE_REG_MEMATTR_INDEX(x) (((x) & 7) << 16) -#define KBASE_REG_MEMATTR_VALUE(x) (((x) & KBASE_REG_MEMATTR_MASK) >> 16) - -#define KBASE_REG_PROTECTED (1ul << 19) - -/* Region belongs to a shrinker. - * - * This can either mean that it is part of the JIT/Ephemeral or tiler heap - * shrinker paths. Should be removed only after making sure that there are - * no references remaining to it in these paths, as it may cause the physical - * backing of the region to disappear during use. - */ -#define KBASE_REG_DONT_NEED (1ul << 20) - -/* Imported buffer is padded? */ -#define KBASE_REG_IMPORT_PAD (1ul << 21) - -#if MALI_USE_CSF -/* CSF event memory */ -#define KBASE_REG_CSF_EVENT (1ul << 22) -#else -/* Bit 22 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define KBASE_REG_RESERVED_BIT_22 (1ul << 22) -#endif - -#if !MALI_USE_CSF -/* The top of the initial commit is aligned to extension pages. - * Extent must be a power of 2 - */ -#define KBASE_REG_TILER_ALIGN_TOP (1ul << 23) -#else -/* Bit 23 is reserved. - * - * Do not remove, use the next unreserved bit for new flags - */ -#define KBASE_REG_RESERVED_BIT_23 (1ul << 23) -#endif /* !MALI_USE_CSF */ - -/* Bit 24 is currently unused and is available for use for a new flag */ - -/* Memory has permanent kernel side mapping */ -#define KBASE_REG_PERMANENT_KERNEL_MAPPING (1ul << 25) - -/* GPU VA region has been freed by the userspace, but still remains allocated - * due to the reference held by CPU mappings created on the GPU VA region. - * - * A region with this flag set has had kbase_gpu_munmap() called on it, but can - * still be looked-up in the region tracker as a non-free region. Hence must - * not create or update any more GPU mappings on such regions because they will - * not be unmapped when the region is finally destroyed. - * - * Since such regions are still present in the region tracker, new allocations - * attempted with BASE_MEM_SAME_VA might fail if their address intersects with - * a region with this flag set. - * - * In addition, this flag indicates the gpu_alloc member might no longer valid - * e.g. in infinite cache simulation. - */ -#define KBASE_REG_VA_FREED (1ul << 26) - -/* If set, the heap info address points to a u32 holding the used size in bytes; - * otherwise it points to a u64 holding the lowest address of unused memory. - */ -#define KBASE_REG_HEAP_INFO_IS_SIZE (1ul << 27) - -/* Allocation is actively used for JIT memory */ -#define KBASE_REG_ACTIVE_JIT_ALLOC (1ul << 28) - -#if MALI_USE_CSF -/* This flag only applies to allocations in the EXEC_FIXED_VA and FIXED_VA - * memory zones, and it determines whether they were created with a fixed - * GPU VA address requested by the user. - */ -#define KBASE_REG_FIXED_ADDRESS (1ul << 29) -#else -#define KBASE_REG_RESERVED_BIT_29 (1ul << 29) -#endif - -#define KBASE_REG_ZONE_SAME_VA KBASE_REG_ZONE(0) - -#define KBASE_REG_ZONE_CUSTOM_VA KBASE_REG_ZONE(1) -#define KBASE_REG_ZONE_CUSTOM_VA_BASE (0x100000000ULL >> PAGE_SHIFT) - -#if MALI_USE_CSF -/* only used with 32-bit clients */ -/* On a 32bit platform, custom VA should be wired from 4GB to 2^(43). - */ -#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ - (((1ULL << 43) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -#else -/* only used with 32-bit clients */ -/* On a 32bit platform, custom VA should be wired from 4GB to the VA limit of the - * GPU. Unfortunately, the Linux mmap() interface limits us to 2^32 pages (2^44 - * bytes, see mmap64 man page for reference). So we put the default limit to the - * maximum possible on Linux and shrink it down, if required by the GPU, during - * initialization. - */ -#define KBASE_REG_ZONE_CUSTOM_VA_SIZE \ - (((1ULL << 44) >> PAGE_SHIFT) - KBASE_REG_ZONE_CUSTOM_VA_BASE) -/* end 32-bit clients only */ -#endif - -/* The starting address and size of the GPU-executable zone are dynamic - * and depend on the platform and the number of pages requested by the - * user process, with an upper limit of 4 GB. - */ -#define KBASE_REG_ZONE_EXEC_VA KBASE_REG_ZONE(2) -#define KBASE_REG_ZONE_EXEC_VA_MAX_PAGES ((1ULL << 32) >> PAGE_SHIFT) /* 4 GB */ - -#if MALI_USE_CSF -#define KBASE_REG_ZONE_MCU_SHARED KBASE_REG_ZONE(3) -#define KBASE_REG_ZONE_MCU_SHARED_BASE (0x04000000ULL >> PAGE_SHIFT) -#define KBASE_REG_ZONE_MCU_SHARED_SIZE (((0x08000000ULL) >> PAGE_SHIFT) - \ - KBASE_REG_ZONE_MCU_SHARED_BASE) - -/* For CSF GPUs, the EXEC_VA zone is always 4GB in size, and starts at 2^47 for 64-bit - * clients, and 2^43 for 32-bit clients. - */ -#define KBASE_REG_ZONE_EXEC_VA_BASE_64 ((1ULL << 47) >> PAGE_SHIFT) -#define KBASE_REG_ZONE_EXEC_VA_BASE_32 ((1ULL << 43) >> PAGE_SHIFT) -#define KBASE_REG_ZONE_EXEC_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES - -/* Executable zone supporting FIXED/FIXABLE allocations. - * It is always 4GB in size. - */ - -#define KBASE_REG_ZONE_EXEC_FIXED_VA KBASE_REG_ZONE(4) -#define KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE KBASE_REG_ZONE_EXEC_VA_MAX_PAGES - -/* Non-executable zone supporting FIXED/FIXABLE allocations. - * It extends from (2^47) up to (2^48)-1, for 64-bit userspace clients, and from - * (2^43) up to (2^44)-1 for 32-bit userspace clients. - */ -#define KBASE_REG_ZONE_FIXED_VA KBASE_REG_ZONE(5) - -/* Again - 32-bit userspace cannot map addresses beyond 2^44, but 64-bit can - and so - * the end of the FIXED_VA zone for 64-bit clients is (2^48)-1. - */ -#define KBASE_REG_ZONE_FIXED_VA_END_64 ((1ULL << 48) >> PAGE_SHIFT) -#define KBASE_REG_ZONE_FIXED_VA_END_32 ((1ULL << 44) >> PAGE_SHIFT) - -#endif - unsigned long flags; size_t extension; struct kbase_mem_phy_alloc *cpu_alloc; @@ -687,20 +695,19 @@ struct kbase_va_region { }; /** - * kbase_is_ctx_reg_zone - determine whether a KBASE_REG_ZONE_<...> is for a - * context or for a device - * @zone_bits: A KBASE_REG_ZONE_<...> to query + * kbase_is_ctx_reg_zone - Determine whether a zone is associated with a + * context or with the device + * @zone: Zone identifier * - * Return: True if the zone for @zone_bits is a context zone, False otherwise + * Return: True if @zone is a context zone, False otherwise */ -static inline bool kbase_is_ctx_reg_zone(unsigned long zone_bits) +static inline bool kbase_is_ctx_reg_zone(enum kbase_memory_zone zone) { - WARN_ON((zone_bits & KBASE_REG_ZONE_MASK) != zone_bits); - return (zone_bits == KBASE_REG_ZONE_SAME_VA || #if MALI_USE_CSF - zone_bits == KBASE_REG_ZONE_EXEC_FIXED_VA || zone_bits == KBASE_REG_ZONE_FIXED_VA || + return !(zone == MCU_SHARED_ZONE); +#else + return true; #endif - zone_bits == KBASE_REG_ZONE_CUSTOM_VA || zone_bits == KBASE_REG_ZONE_EXEC_VA); } /* Special marker for failed JIT allocations that still must be marked as @@ -1359,18 +1366,19 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages void kbase_region_tracker_term(struct kbase_context *kctx); /** - * kbase_region_tracker_term_rbtree - Free memory for a region tracker + * kbase_region_tracker_erase_rbtree - Free memory for a region tracker * * @rbtree: Region tracker tree root * * This will free all the regions within the region tracker */ -void kbase_region_tracker_term_rbtree(struct rb_root *rbtree); +void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree); struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( struct kbase_context *kctx, u64 gpu_addr); struct kbase_va_region *kbase_find_region_enclosing_address( struct rb_root *rbtree, u64 gpu_addr); +void kbase_region_tracker_insert(struct kbase_va_region *new_reg); /** * kbase_region_tracker_find_region_base_address - Check that a pointer is @@ -1387,8 +1395,11 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address( struct kbase_va_region *kbase_find_region_base_address(struct rb_root *rbtree, u64 gpu_addr); -struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree, - u64 start_pfn, size_t nr_pages, int zone); +struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn, + size_t nr_pages); +struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx, + enum kbase_memory_zone id, u64 start_pfn, + size_t nr_pages); void kbase_free_alloced_region(struct kbase_va_region *reg); int kbase_add_va_region(struct kbase_context *kctx, struct kbase_va_region *reg, u64 addr, size_t nr_pages, size_t align); @@ -1866,7 +1877,7 @@ static inline struct kbase_page_metadata *kbase_page_private(struct page *p) static inline dma_addr_t kbase_dma_addr(struct page *p) { - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) return kbase_page_private(p)->dma_addr; return kbase_dma_addr_as_priv(p); @@ -1876,8 +1887,9 @@ static inline dma_addr_t kbase_dma_addr_from_tagged(struct tagged_addr tagged_pa { phys_addr_t pa = as_phys_addr_t(tagged_pa); struct page *page = pfn_to_page(PFN_DOWN(pa)); - dma_addr_t dma_addr = - is_huge(tagged_pa) ? kbase_dma_addr_as_priv(page) : kbase_dma_addr(page); + dma_addr_t dma_addr = (is_huge(tagged_pa) || is_partial(tagged_pa)) ? + kbase_dma_addr_as_priv(page) : + kbase_dma_addr(page); return dma_addr; } @@ -2434,75 +2446,95 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, unsigned int *target_page_nr, size_t offset); /** - * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone - * @zone: zone to query + * kbase_ctx_reg_zone_get_nolock - Get a zone from @kctx where the caller does + * not have @kctx 's region lock + * @kctx: Pointer to kbase context + * @zone: Zone identifier * - * Return: The end of the zone corresponding to @zone + * This should only be used in performance-critical paths where the code is + * resilient to a race with the zone changing, and only when the zone is tracked + * by the @kctx. + * + * Return: The zone corresponding to @zone */ -static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) +static inline struct kbase_reg_zone *kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, + enum kbase_memory_zone zone) { - return zone->base_pfn + zone->va_size_pages; + WARN_ON(!kbase_is_ctx_reg_zone(zone)); + return &kctx->reg_zone[zone]; } /** - * kbase_ctx_reg_zone_init - initialize a zone in @kctx + * kbase_ctx_reg_zone_get - Get a memory zone from @kctx * @kctx: Pointer to kbase context - * @zone_bits: A KBASE_REG_ZONE_<...> to initialize + * @zone: Zone identifier + * + * Note that the zone is not refcounted, so there is no corresponding operation to + * put the zone back. + * + * Return: The zone corresponding to @zone + */ +static inline struct kbase_reg_zone *kbase_ctx_reg_zone_get(struct kbase_context *kctx, + enum kbase_memory_zone zone) +{ + lockdep_assert_held(&kctx->reg_lock); + return kbase_ctx_reg_zone_get_nolock(kctx, zone); +} + +/** + * kbase_reg_zone_init - Initialize a zone in @kctx + * @kbdev: Pointer to kbase device in order to initialize the VA region cache + * @zone: Memory zone + * @id: Memory zone identifier to facilitate lookups * @base_pfn: Page Frame Number in GPU virtual address space for the start of * the Zone * @va_size_pages: Size of the Zone in pages + * + * Return: + * * 0 on success + * * -ENOMEM on error */ -static inline void kbase_ctx_reg_zone_init(struct kbase_context *kctx, - unsigned long zone_bits, - u64 base_pfn, u64 va_size_pages) +static inline int kbase_reg_zone_init(struct kbase_device *kbdev, struct kbase_reg_zone *zone, + enum kbase_memory_zone id, u64 base_pfn, u64 va_size_pages) { - struct kbase_reg_zone *zone; + struct kbase_va_region *reg; - lockdep_assert_held(&kctx->reg_lock); - WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); + *zone = (struct kbase_reg_zone){ .reg_rbtree = RB_ROOT, + .base_pfn = base_pfn, + .va_size_pages = va_size_pages, + .id = id, + .cache = kbdev->va_region_slab }; + + if (unlikely(!va_size_pages)) + return 0; + + reg = kbase_alloc_free_region(zone, base_pfn, va_size_pages); + if (unlikely(!reg)) + return -ENOMEM; + + kbase_region_tracker_insert(reg); - zone = &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; - *zone = (struct kbase_reg_zone){ - .base_pfn = base_pfn, .va_size_pages = va_size_pages, - }; + return 0; } /** - * kbase_ctx_reg_zone_get_nolock - get a zone from @kctx where the caller does - * not have @kctx 's region lock - * @kctx: Pointer to kbase context - * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve - * - * This should only be used in performance-critical paths where the code is - * resilient to a race with the zone changing. + * kbase_reg_zone_end_pfn - return the end Page Frame Number of @zone + * @zone: zone to query * - * Return: The zone corresponding to @zone_bits + * Return: The end of the zone corresponding to @zone */ -static inline struct kbase_reg_zone * -kbase_ctx_reg_zone_get_nolock(struct kbase_context *kctx, - unsigned long zone_bits) +static inline u64 kbase_reg_zone_end_pfn(struct kbase_reg_zone *zone) { - WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); - - return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; + return zone->base_pfn + zone->va_size_pages; } /** - * kbase_ctx_reg_zone_get - get a zone from @kctx - * @kctx: Pointer to kbase context - * @zone_bits: A KBASE_REG_ZONE_<...> to retrieve - * - * The get is not refcounted - there is no corresponding 'put' operation - * - * Return: The zone corresponding to @zone_bits + * kbase_reg_zone_term - Terminate the memory zone tracker + * @zone: Memory zone */ -static inline struct kbase_reg_zone * -kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits) +static inline void kbase_reg_zone_term(struct kbase_reg_zone *zone) { - lockdep_assert_held(&kctx->reg_lock); - WARN_ON(!kbase_is_ctx_reg_zone(zone_bits)); - - return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)]; + kbase_region_tracker_erase_rbtree(&zone->reg_rbtree); } /** diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c index e8df130..1af833d 100644 --- a/mali_kbase/mali_kbase_mem_linux.c +++ b/mali_kbase/mali_kbase_mem_linux.c @@ -287,9 +287,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages u64 extension, u64 *flags, u64 *gpu_va, enum kbase_caller_mmu_sync_info mmu_sync_info) { - int zone; struct kbase_va_region *reg; - struct rb_root *rbtree; + enum kbase_memory_zone zone; struct device *dev; KBASE_DEBUG_ASSERT(kctx); @@ -359,31 +358,25 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages #endif /* find out which VA zone to use */ - if (*flags & BASE_MEM_SAME_VA) { - rbtree = &kctx->reg_rbtree_same; - zone = KBASE_REG_ZONE_SAME_VA; - } + if (*flags & BASE_MEM_SAME_VA) + zone = SAME_VA_ZONE; #if MALI_USE_CSF /* fixed va_zone always exists */ else if (*flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)) { if (*flags & BASE_MEM_PROT_GPU_EX) { - rbtree = &kctx->reg_rbtree_exec_fixed; - zone = KBASE_REG_ZONE_EXEC_FIXED_VA; + zone = EXEC_FIXED_VA_ZONE; } else { - rbtree = &kctx->reg_rbtree_fixed; - zone = KBASE_REG_ZONE_FIXED_VA; + zone = FIXED_VA_ZONE; } } #endif else if ((*flags & BASE_MEM_PROT_GPU_EX) && kbase_has_exec_va_zone(kctx)) { - rbtree = &kctx->reg_rbtree_exec; - zone = KBASE_REG_ZONE_EXEC_VA; + zone = EXEC_VA_ZONE; } else { - rbtree = &kctx->reg_rbtree_custom; - zone = KBASE_REG_ZONE_CUSTOM_VA; + zone = CUSTOM_VA_ZONE; } - reg = kbase_alloc_free_region(kctx->kbdev, rbtree, PFN_DOWN(*gpu_va), va_pages, zone); + reg = kbase_ctx_alloc_free_region(kctx, zone, PFN_DOWN(*gpu_va), va_pages); if (!reg) { dev_err(dev, "Failed to allocate free region"); @@ -634,8 +627,8 @@ int kbase_mem_query(struct kbase_context *kctx, #if MALI_USE_CSF if (KBASE_REG_CSF_EVENT & reg->flags) *out |= BASE_MEM_CSF_EVENT; - if (((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_FIXED_VA) || - ((KBASE_REG_ZONE_MASK & reg->flags) == KBASE_REG_ZONE_EXEC_FIXED_VA)) { + if ((kbase_bits_to_zone(reg->flags) == FIXED_VA_ZONE) || + (kbase_bits_to_zone(reg->flags) == EXEC_FIXED_VA_ZONE)) { if (KBASE_REG_FIXED_ADDRESS & reg->flags) *out |= BASE_MEM_FIXED; else @@ -680,9 +673,6 @@ unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s, int evict_nents = atomic_read(&kctx->evict_nents); unsigned long nr_freeable_items; - WARN((sc->gfp_mask & __GFP_ATOMIC), - "Shrinkers cannot be called for GFP_ATOMIC allocations. Check kernel mm for problems. gfp_mask==%x\n", - sc->gfp_mask); WARN(in_atomic(), "Shrinker called in atomic context. The caller must use GFP_ATOMIC or similar, then Shrinkers must not be called. gfp_mask==%x\n", sc->gfp_mask); @@ -875,7 +865,7 @@ int kbase_mem_evictable_make(struct kbase_mem_phy_alloc *gpu_alloc) /* Indicate to page migration that the memory can be reclaimed by the shrinker. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(gpu_alloc, NOT_MOVABLE); mutex_unlock(&kctx->jit_evict_lock); @@ -936,7 +926,7 @@ bool kbase_mem_evictable_unmake(struct kbase_mem_phy_alloc *gpu_alloc) * in which a physical allocation could transition to NOT_MOVABLE * from. */ - if (kbase_page_migration_enabled) + if (kbase_is_page_migration_enabled()) kbase_set_phy_alloc_page_status(gpu_alloc, ALLOCATED_MAPPED); } } @@ -1316,11 +1306,11 @@ int kbase_mem_umm_map(struct kbase_context *kctx, gwt_mask = ~KBASE_REG_GPU_WR; #endif - err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - kbase_get_gpu_phy_pages(reg), - kbase_reg_current_backed_size(reg), - reg->flags & gwt_mask, kctx->as_nr, alloc->group_id, - mmu_sync_info, NULL); + err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, + kbase_get_gpu_phy_pages(reg), + kbase_reg_current_backed_size(reg), + reg->flags & gwt_mask, kctx->as_nr, + alloc->group_id, mmu_sync_info, NULL); if (err) goto bad_insert; @@ -1345,8 +1335,8 @@ int kbase_mem_umm_map(struct kbase_context *kctx, return 0; bad_pad_insert: - kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, - alloc->nents, alloc->nents, kctx->as_nr, true); + kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages, + alloc->nents, alloc->nents, kctx->as_nr); bad_insert: kbase_mem_umm_unmap_attachment(kctx, alloc); bad_map_attachment: @@ -1374,9 +1364,9 @@ void kbase_mem_umm_unmap(struct kbase_context *kctx, if (!kbase_is_region_invalid_or_free(reg) && reg->gpu_alloc == alloc) { int err; - err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, - alloc->pages, reg->nr_pages, reg->nr_pages, - kctx->as_nr, true); + err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, + alloc->pages, reg->nr_pages, reg->nr_pages, + kctx->as_nr); WARN_ON(err); } @@ -1423,6 +1413,7 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, struct kbase_va_region *reg; struct dma_buf *dma_buf; struct dma_buf_attachment *dma_attachment; + enum kbase_memory_zone zone; bool shared_zone = false; bool need_sync = false; int group_id; @@ -1481,12 +1472,11 @@ static struct kbase_va_region *kbase_mem_from_umm(struct kbase_context *kctx, if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *va_pages, - KBASE_REG_ZONE_SAME_VA); - } else { - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *va_pages, - KBASE_REG_ZONE_CUSTOM_VA); - } + zone = SAME_VA_ZONE; + } else + zone = CUSTOM_VA_ZONE; + + reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages); if (!reg) { dma_buf_detach(dma_buf, dma_attachment); @@ -1572,9 +1562,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( { long i, dma_mapped_pages; struct kbase_va_region *reg; - struct rb_root *rbtree; long faulted_pages; - int zone = KBASE_REG_ZONE_CUSTOM_VA; + enum kbase_memory_zone zone = CUSTOM_VA_ZONE; bool shared_zone = false; u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev); struct kbase_alloc_import_user_buf *user_buf; @@ -1582,6 +1571,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( struct tagged_addr *pa; struct device *dev; int write; + enum dma_data_direction dma_dir; /* Flag supported only for dma-buf imported memory */ if (*flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) @@ -1637,13 +1627,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( if (shared_zone) { *flags |= BASE_MEM_NEED_MMAP; - zone = KBASE_REG_ZONE_SAME_VA; - rbtree = &kctx->reg_rbtree_same; - } else - rbtree = &kctx->reg_rbtree_custom; - - reg = kbase_alloc_free_region(kctx->kbdev, rbtree, 0, *va_pages, zone); + zone = SAME_VA_ZONE; + } + reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *va_pages); if (!reg) goto no_region; @@ -1693,6 +1680,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( down_read(kbase_mem_get_process_mmap_lock()); write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR); + dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; #if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE faulted_pages = get_user_pages(address, *va_pages, @@ -1751,10 +1739,10 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( for (i = 0; i < faulted_pages; i++) { dma_addr_t dma_addr; #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir); #else - dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, - DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); + dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir, + DMA_ATTR_SKIP_CPU_SYNC); #endif if (dma_mapping_error(dev, dma_addr)) goto unwind_dma_map; @@ -1762,7 +1750,7 @@ static struct kbase_va_region *kbase_mem_from_user_buffer( user_buf->dma_addrs[i] = dma_addr; pa[i] = as_tagged(page_to_phys(pages[i])); - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); } reg->gpu_alloc->nents = faulted_pages; @@ -1781,12 +1769,11 @@ unwind_dma_map: for (i = 0; i < dma_mapped_pages; i++) { dma_addr_t dma_addr = user_buf->dma_addrs[i]; - dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir); #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE) - dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); + dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir); #else - dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL, - DMA_ATTR_SKIP_CPU_SYNC); + dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC); #endif } fault_mismatch: @@ -1819,6 +1806,7 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, size_t i; bool coherent; uint64_t max_stride; + enum kbase_memory_zone zone; /* Calls to this function are inherently asynchronous, with respect to * MMU operations. @@ -1870,13 +1858,12 @@ u64 kbase_mem_alias(struct kbase_context *kctx, u64 *flags, u64 stride, /* 64-bit tasks must MMAP anyway, but not expose this address to * clients */ + zone = SAME_VA_ZONE; *flags |= BASE_MEM_NEED_MMAP; - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, *num_pages, - KBASE_REG_ZONE_SAME_VA); - } else { - reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, 0, *num_pages, - KBASE_REG_ZONE_CUSTOM_VA); - } + } else + zone = CUSTOM_VA_ZONE; + + reg = kbase_ctx_alloc_free_region(kctx, zone, 0, *num_pages); if (!reg) goto no_reg; @@ -2200,7 +2187,7 @@ int kbase_mem_grow_gpu_mapping(struct kbase_context *kctx, phy_pages = kbase_get_gpu_phy_pages(reg); ret = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + old_pages, phy_pages + old_pages, delta, reg->flags, kctx->as_nr, - reg->gpu_alloc->group_id, mmu_sync_info, reg, false); + reg->gpu_alloc->group_id, mmu_sync_info, reg); return ret; } @@ -2215,7 +2202,7 @@ void kbase_mem_shrink_cpu_mapping(struct kbase_context *kctx, /* Nothing to do */ return; - unmap_mapping_range(kctx->filp->f_inode->i_mapping, + unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, (gpu_va_start + new_pages)<<PAGE_SHIFT, (old_pages - new_pages)<<PAGE_SHIFT, 1); } @@ -2229,7 +2216,7 @@ int kbase_mem_shrink_gpu_mapping(struct kbase_context *const kctx, int ret = 0; ret = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + new_pages, - alloc->pages + new_pages, delta, delta, kctx->as_nr, false); + alloc->pages + new_pages, delta, delta, kctx->as_nr); return ret; } @@ -2388,6 +2375,21 @@ int kbase_mem_shrink(struct kbase_context *const kctx, return -EINVAL; delta = old_pages - new_pages; + if (kctx->kbdev->pagesize_2mb) { + struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages; + + /* Move the end of new committed range to a valid location. + * This mirrors the adjustment done inside kbase_free_phy_pages_helper(). + */ + while (delta && is_huge(*start_free) && !is_huge_head(*start_free)) { + start_free++; + new_pages++; + delta--; + } + + if (!delta) + return 0; + } /* Update the GPU mapping */ err = kbase_mem_shrink_gpu_mapping(kctx, reg, @@ -2400,19 +2402,6 @@ int kbase_mem_shrink(struct kbase_context *const kctx, kbase_free_phy_pages_helper(reg->cpu_alloc, delta); if (reg->cpu_alloc != reg->gpu_alloc) kbase_free_phy_pages_helper(reg->gpu_alloc, delta); - - if (kctx->kbdev->pagesize_2mb) { - if (kbase_reg_current_backed_size(reg) > new_pages) { - old_pages = new_pages; - new_pages = kbase_reg_current_backed_size(reg); - - /* Update GPU mapping. */ - err = kbase_mem_grow_gpu_mapping(kctx, reg, new_pages, old_pages, - CALLER_MMU_ASYNC); - } - } else { - WARN_ON(kbase_reg_current_backed_size(reg) != new_pages); - } } return err; @@ -2446,8 +2435,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) kbase_gpu_vm_lock(map->kctx); if (map->free_on_close) { - KBASE_DEBUG_ASSERT((map->region->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_SAME_VA); + KBASE_DEBUG_ASSERT(kbase_bits_to_zone(map->region->flags) == SAME_VA_ZONE); /* Avoid freeing memory on the process death which results in * GPU Page Fault. Memory will be freed in kbase_destroy_context */ @@ -2461,6 +2449,7 @@ static void kbase_cpu_vm_close(struct vm_area_struct *vma) kbase_gpu_vm_unlock(map->kctx); kbase_mem_phy_alloc_put(map->alloc); + kbase_file_dec_cpu_mapping_count(map->kctx->kfile); kfree(map); } @@ -2519,9 +2508,17 @@ static vm_fault_t kbase_cpu_vm_fault(struct vm_fault *vmf) KBASE_DEBUG_ASSERT(map->kctx); KBASE_DEBUG_ASSERT(map->alloc); + kbase_gpu_vm_lock(map->kctx); + + /* Reject faults for SAME_VA mapping of UMM allocations */ + if ((map->alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) && map->free_on_close) { + dev_warn(map->kctx->kbdev->dev, "Invalid CPU access to UMM memory for ctx %d_%d", + map->kctx->tgid, map->kctx->id); + goto exit; + } + map_start_pgoff = vma->vm_pgoff - map->region->start_pfn; - kbase_gpu_vm_lock(map->kctx); if (unlikely(map->region->cpu_alloc->type == KBASE_MEM_TYPE_ALIAS)) { struct kbase_aliased *aliased = get_aliased_alloc(vma, map->region, &map_start_pgoff, 1); @@ -2608,7 +2605,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, * See MIDBASE-1057 */ - vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); vma->vm_ops = &kbase_vm_ops; vma->vm_private_data = map; @@ -2636,12 +2633,12 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); } - if (!kaddr) { - vma->vm_flags |= VM_PFNMAP; - } else { + if (!kaddr) + vm_flags_set(vma, VM_PFNMAP); + else { WARN_ON(aligned_offset); /* MIXEDMAP so we can vfree the kaddr early and not track it after map time */ - vma->vm_flags |= VM_MIXEDMAP; + vm_flags_set(vma, VM_MIXEDMAP); /* vmalloc remaping is easy... */ err = remap_vmalloc_range(vma, kaddr, 0); WARN_ON(err); @@ -2662,6 +2659,7 @@ static int kbase_cpu_mmap(struct kbase_context *kctx, map->alloc->properties |= KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED; list_add(&map->mappings_list, &map->alloc->mappings); + kbase_file_inc_cpu_mapping_count(kctx->kfile); out: return err; @@ -2710,8 +2708,7 @@ static int kbase_mmu_dump_mmap(struct kbase_context *kctx, goto out; } - new_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, 0, nr_pages, - KBASE_REG_ZONE_SAME_VA); + new_reg = kbase_ctx_alloc_free_region(kctx, SAME_VA_ZONE, 0, nr_pages); if (!new_reg) { err = -ENOMEM; WARN_ON(1); @@ -2855,9 +2852,9 @@ int kbase_context_mmap(struct kbase_context *const kctx, dev_dbg(dev, "kbase_mmap\n"); if (!(vma->vm_flags & VM_READ)) - vma->vm_flags &= ~VM_MAYREAD; + vm_flags_clear(vma, VM_MAYREAD); if (!(vma->vm_flags & VM_WRITE)) - vma->vm_flags &= ~VM_MAYWRITE; + vm_flags_clear(vma, VM_MAYWRITE); if (nr_pages == 0) { err = -EINVAL; @@ -3070,6 +3067,9 @@ static void kbase_vmap_phy_pages_migrate_count_increment(struct tagged_addr *pag { size_t i; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + for (i = 0; i < page_count; i++) { struct page *p = as_page(pages[i]); struct kbase_page_metadata *page_md = kbase_page_private(p); @@ -3119,6 +3119,9 @@ static void kbase_vunmap_phy_pages_migrate_count_decrement(struct tagged_addr *p { size_t i; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + for (i = 0; i < page_count; i++) { struct page *p = as_page(pages[i]); struct kbase_page_metadata *page_md = kbase_page_private(p); @@ -3219,7 +3222,7 @@ static int kbase_vmap_phy_pages(struct kbase_context *kctx, struct kbase_va_regi * of all physical pages. In case of errors, e.g. too many mappings, * make the page not movable to prevent trouble. */ - if (kbase_page_migration_enabled && !kbase_mem_is_imported(reg->gpu_alloc->type)) + if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(reg->gpu_alloc->type)) kbase_vmap_phy_pages_migrate_count_increment(page_array, page_count, reg->flags); kfree(pages); @@ -3331,7 +3334,7 @@ static void kbase_vunmap_phy_pages(struct kbase_context *kctx, * for all physical pages. Now is a good time to do it because references * haven't been released yet. */ - if (kbase_page_migration_enabled && !kbase_mem_is_imported(map->gpu_alloc->type)) { + if (kbase_is_page_migration_enabled() && !kbase_mem_is_imported(map->gpu_alloc->type)) { const size_t page_count = PFN_UP(map->offset_in_page + map->size); struct tagged_addr *pages_array = map->cpu_pages; @@ -3367,11 +3370,14 @@ KBASE_EXPORT_TEST_API(kbase_vunmap); static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value) { -#if (KERNEL_VERSION(4, 19, 0) <= LINUX_VERSION_CODE) - /* To avoid the build breakage due to an unexported kernel symbol - * 'mm_trace_rss_stat' from later kernels, i.e. from V4.19.0 onwards, - * we inline here the equivalent of 'add_mm_counter()' from linux - * kernel V5.4.0~8. +#if (KERNEL_VERSION(6, 2, 0) <= LINUX_VERSION_CODE) + /* To avoid the build breakage due to the type change in rss_stat, + * we inline here the equivalent of 'add_mm_counter()' from linux kernel V6.2. + */ + percpu_counter_add(&mm->rss_stat[member], value); +#elif (KERNEL_VERSION(5, 5, 0) <= LINUX_VERSION_CODE) + /* To avoid the build breakage due to an unexported kernel symbol 'mm_trace_rss_stat', + * we inline here the equivalent of 'add_mm_counter()' from linux kernel V5.5. */ atomic_long_add(value, &mm->rss_stat.count[member]); #else @@ -3396,15 +3402,37 @@ void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages) #endif } +static void kbase_special_vm_open(struct vm_area_struct *vma) +{ + struct kbase_context *kctx = vma->vm_private_data; + + kbase_file_inc_cpu_mapping_count(kctx->kfile); +} + +static void kbase_special_vm_close(struct vm_area_struct *vma) +{ + struct kbase_context *kctx = vma->vm_private_data; + + kbase_file_dec_cpu_mapping_count(kctx->kfile); +} + +static const struct vm_operations_struct kbase_vm_special_ops = { + .open = kbase_special_vm_open, + .close = kbase_special_vm_close, +}; + static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma) { if (vma_pages(vma) != 1) return -EINVAL; /* no real access */ - vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; + vm_flags_clear(vma, VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC); + vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO); + vma->vm_ops = &kbase_vm_special_ops; + vma->vm_private_data = kctx; + kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } @@ -3459,6 +3487,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) struct kbase_device *kbdev; int err; bool reset_prevented = false; + struct kbase_file *kfile; if (!queue) { pr_debug("Close method called for the new User IO pages mapping vma\n"); @@ -3467,6 +3496,7 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) kctx = queue->kctx; kbdev = kctx->kbdev; + kfile = kctx->kfile; err = kbase_reset_gpu_prevent_and_wait(kbdev); if (err) @@ -3484,8 +3514,9 @@ static void kbase_csf_user_io_pages_vm_close(struct vm_area_struct *vma) if (reset_prevented) kbase_reset_gpu_allow(kbdev); + kbase_file_dec_cpu_mapping_count(kfile); /* Now as the vma is closed, drop the reference on mali device file */ - fput(kctx->filp); + fput(kfile->filp); } #if (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) @@ -3618,13 +3649,13 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, if (err) goto map_failed; - vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); /* TODO use VM_MIXEDMAP, since it is more appropriate as both types of * memory with and without "struct page" backing are being inserted here. * Hw Doorbell pages comes from the device register area so kernel does * not use "struct page" for them. */ - vma->vm_flags |= VM_PFNMAP; + vm_flags_set(vma, VM_PFNMAP); vma->vm_ops = &kbase_csf_user_io_pages_vm_ops; vma->vm_private_data = queue; @@ -3638,6 +3669,7 @@ static int kbase_csf_cpu_mmap_user_io_pages(struct kbase_context *kctx, /* Also adjust the vm_pgoff */ vma->vm_pgoff = queue->db_file_offset; + kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; map_failed: @@ -3677,6 +3709,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) { struct kbase_context *kctx = vma->vm_private_data; struct kbase_device *kbdev; + struct kbase_file *kfile; if (unlikely(!kctx)) { pr_debug("Close function called for the unexpected mapping"); @@ -3684,6 +3717,7 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) } kbdev = kctx->kbdev; + kfile = kctx->kfile; if (unlikely(!kctx->csf.user_reg.vma)) dev_warn(kbdev->dev, "user_reg VMA pointer unexpectedly NULL for ctx %d_%d", @@ -3695,8 +3729,9 @@ static void kbase_csf_user_reg_vm_close(struct vm_area_struct *vma) kctx->csf.user_reg.vma = NULL; + kbase_file_dec_cpu_mapping_count(kfile); /* Now as the VMA is closed, drop the reference on mali device file */ - fput(kctx->filp); + fput(kfile->filp); } /** @@ -3801,12 +3836,12 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, /* Map uncached */ vma->vm_page_prot = pgprot_device(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO; + vm_flags_set(vma, VM_DONTCOPY | VM_DONTDUMP | VM_DONTEXPAND | VM_IO); /* User register page comes from the device register area so * "struct page" isn't available for it. */ - vma->vm_flags |= VM_PFNMAP; + vm_flags_set(vma, VM_PFNMAP); kctx->csf.user_reg.vma = vma; @@ -3826,6 +3861,7 @@ static int kbase_csf_cpu_mmap_user_reg_page(struct kbase_context *kctx, vma->vm_ops = &kbase_csf_user_reg_vm_ops; vma->vm_private_data = kctx; + kbase_file_inc_cpu_mapping_count(kctx->kfile); return 0; } diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c index 1dc76d0..f2014f6 100644 --- a/mali_kbase/mali_kbase_mem_migrate.c +++ b/mali_kbase/mali_kbase_mem_migrate.c @@ -32,10 +32,33 @@ * provided and if page migration feature is enabled. * Feature is disabled on all platforms by default. */ -int kbase_page_migration_enabled; +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) +/* If page migration support is explicitly compiled out, there should be no way to change + * this int. Its value is automatically 0 as a global. + */ +const int kbase_page_migration_enabled; +/* module_param is not called so this value cannot be changed at insmod when compiled + * without support for page migration. + */ +#else +/* -1 as default, 0 when manually set as off and 1 when manually set as on */ +int kbase_page_migration_enabled = -1; module_param(kbase_page_migration_enabled, int, 0444); +MODULE_PARM_DESC(kbase_page_migration_enabled, + "Explicitly enable or disable page migration with 1 or 0 respectively."); +#endif /* !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) */ + KBASE_EXPORT_TEST_API(kbase_page_migration_enabled); +bool kbase_is_page_migration_enabled(void) +{ + /* Handle uninitialised int case */ + if (kbase_page_migration_enabled < 0) + return false; + return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled; +} +KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled); + #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) static const struct movable_operations movable_ops; #endif @@ -43,9 +66,15 @@ static const struct movable_operations movable_ops; bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, u8 group_id) { - struct kbase_page_metadata *page_md = - kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); + struct kbase_page_metadata *page_md; + + /* A check for kbase_page_migration_enabled would help here too but it's already being + * checked in the only caller of this function. + */ + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return false; + page_md = kzalloc(sizeof(struct kbase_page_metadata), GFP_KERNEL); if (!page_md) return false; @@ -95,6 +124,8 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, struct kbase_page_metadata *page_md; dma_addr_t dma_addr; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; page_md = kbase_page_private(p); if (!page_md) return; @@ -109,6 +140,10 @@ static void kbase_free_page_metadata(struct kbase_device *kbdev, struct page *p, ClearPagePrivate(p); } +#if IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) +/* This function is only called when page migration + * support is not explicitly compiled out. + */ static void kbase_free_pages_worker(struct work_struct *work) { struct kbase_mem_migrate *mem_migrate = @@ -121,14 +156,13 @@ static void kbase_free_pages_worker(struct work_struct *work) spin_lock(&mem_migrate->free_pages_lock); list_splice_init(&mem_migrate->free_pages_list, &free_list); spin_unlock(&mem_migrate->free_pages_lock); - list_for_each_entry_safe(p, tmp, &free_list, lru) { u8 group_id = 0; list_del_init(&p->lru); lock_page(p); page_md = kbase_page_private(p); - if (IS_PAGE_MOVABLE(page_md->status)) { + if (page_md && IS_PAGE_MOVABLE(page_md->status)) { __ClearPageMovable(p); page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); } @@ -138,11 +172,14 @@ static void kbase_free_pages_worker(struct work_struct *work) kbdev->mgm_dev->ops.mgm_free_page(kbdev->mgm_dev, group_id, p, 0); } } +#endif void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; spin_lock(&mem_migrate->free_pages_lock); list_add(&p->lru, &mem_migrate->free_pages_list); spin_unlock(&mem_migrate->free_pages_lock); @@ -161,6 +198,9 @@ void kbase_free_page_later(struct kbase_device *kbdev, struct page *p) * the movable property. The meta data attached to the PGD page is transferred to the * new (replacement) page. * + * This function returns early with an error if called when not compiled with + * CONFIG_PAGE_MIGRATION_SUPPORT. + * * Return: 0 on migration success, or -EAGAIN for a later retry. Otherwise it's a failure * and the migration is aborted. */ @@ -173,6 +213,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new dma_addr_t new_dma_addr; int ret; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return -EINVAL; + /* Create a new dma map for the new page */ new_dma_addr = dma_map_page(kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(kbdev->dev, new_dma_addr)) @@ -227,6 +270,9 @@ static int kbasep_migrate_page_pt_mapped(struct page *old_page, struct page *new * allocation, which is used to create CPU mappings. Before returning, the new * page shall be set as movable and not isolated, while the old page shall lose * the movable property. + * + * This function returns early with an error if called when not compiled with + * CONFIG_PAGE_MIGRATION_SUPPORT. */ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct page *new_page) { @@ -235,6 +281,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa dma_addr_t old_dma_addr, new_dma_addr; int ret; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return -EINVAL; old_dma_addr = page_md->dma_addr; new_dma_addr = dma_map_page(kctx->kbdev->dev, new_page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(kctx->kbdev->dev, new_dma_addr)) @@ -246,7 +294,8 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa kbase_gpu_vm_lock(kctx); /* Unmap the old physical range. */ - unmap_mapping_range(kctx->filp->f_inode->i_mapping, page_md->data.mapped.vpfn << PAGE_SHIFT, + unmap_mapping_range(kctx->kfile->filp->f_inode->i_mapping, + page_md->data.mapped.vpfn << PAGE_SHIFT, PAGE_SIZE, 1); ret = kbase_mmu_migrate_page(as_tagged(page_to_phys(old_page)), @@ -290,6 +339,7 @@ static int kbasep_migrate_page_allocated_mapped(struct page *old_page, struct pa * @mode: LRU Isolation modes. * * Callback function for Linux to isolate a page and prepare it for migration. + * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT. * * Return: true on success, false otherwise. */ @@ -299,6 +349,8 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) struct kbase_mem_pool *mem_pool = NULL; struct kbase_page_metadata *page_md = kbase_page_private(p); + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return false; CSTD_UNUSED(mode); if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) @@ -390,6 +442,7 @@ static bool kbase_page_isolate(struct page *p, isolate_mode_t mode) * * Callback function for Linux to migrate the content of the old page to the * new page provided. + * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT. * * Return: 0 on success, error code otherwise. */ @@ -415,7 +468,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum #endif CSTD_UNUSED(mode); - if (!page_md || !IS_PAGE_MOVABLE(page_md->status)) + if (!kbase_is_page_migration_enabled() || !page_md || !IS_PAGE_MOVABLE(page_md->status)) return -EINVAL; if (!spin_trylock(&page_md->migrate_lock)) @@ -500,6 +553,7 @@ static int kbase_page_migrate(struct page *new_page, struct page *old_page, enum * will only be called for a page that has been isolated but failed to * migrate. This function will put back the given page to the state it was * in before it was isolated. + * This callback is not registered if compiled without CONFIG_PAGE_MIGRATION_SUPPORT. */ static void kbase_page_putback(struct page *p) { @@ -509,6 +563,8 @@ static void kbase_page_putback(struct page *p) struct kbase_page_metadata *page_md = kbase_page_private(p); struct kbase_device *kbdev = NULL; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; /* If we don't have page metadata, the page may not belong to the * driver or may already have been freed, and there's nothing we can do */ @@ -585,6 +641,9 @@ static const struct address_space_operations kbase_address_space_ops = { #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct file *const filp) { + if (!kbase_is_page_migration_enabled()) + return; + mutex_lock(&kbdev->fw_load_lock); if (filp) { @@ -607,10 +666,23 @@ void kbase_mem_migrate_set_address_space_ops(struct kbase_device *kbdev, struct void kbase_mem_migrate_init(struct kbase_device *kbdev) { +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) + /* Page migration explicitly disabled at compile time - do nothing */ + return; +#else struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; + /* Page migration support compiled in, either explicitly or + * by default, so the default behaviour is to follow the choice + * of large pages if not selected at insmod. Check insmod parameter + * integer for a negative value to see if insmod parameter was + * passed in at all (it will override the default negative value). + */ if (kbase_page_migration_enabled < 0) - kbase_page_migration_enabled = 0; + kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0; + else + dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.", + kbase_page_migration_enabled ? "enabled" : "disabled"); spin_lock_init(&mem_migrate->free_pages_lock); INIT_LIST_HEAD(&mem_migrate->free_pages_list); @@ -621,12 +693,17 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev) mem_migrate->free_pages_workq = alloc_workqueue("free_pages_workq", WQ_UNBOUND | WQ_MEM_RECLAIM, 1); INIT_WORK(&mem_migrate->free_pages_work, kbase_free_pages_worker); +#endif } void kbase_mem_migrate_term(struct kbase_device *kbdev) { struct kbase_mem_migrate *mem_migrate = &kbdev->mem_migrate; +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) + /* Page migration explicitly disabled at compile time - do nothing */ + return; +#endif if (mem_migrate->free_pages_workq) destroy_workqueue(mem_migrate->free_pages_workq); #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h index 76bbc99..e9f3fc4 100644 --- a/mali_kbase/mali_kbase_mem_migrate.h +++ b/mali_kbase/mali_kbase_mem_migrate.h @@ -18,6 +18,8 @@ * http://www.gnu.org/licenses/gpl-2.0.html. * */ +#ifndef _KBASE_MEM_MIGRATE_H +#define _KBASE_MEM_MIGRATE_H /** * DOC: Base kernel page migration implementation. @@ -43,7 +45,11 @@ /* Global integer used to determine if module parameter value has been * provided and if page migration feature is enabled. */ +#if !IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) +extern const int kbase_page_migration_enabled; +#else extern int kbase_page_migration_enabled; +#endif /** * kbase_alloc_page_metadata - Allocate and initialize page metadata @@ -63,6 +69,8 @@ extern int kbase_page_migration_enabled; bool kbase_alloc_page_metadata(struct kbase_device *kbdev, struct page *p, dma_addr_t dma_addr, u8 group_id); +bool kbase_is_page_migration_enabled(void); + /** * kbase_free_page_later - Defer freeing of given page. * @kbdev: Pointer to kbase device @@ -106,3 +114,5 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev); * and destroy workqueue associated. */ void kbase_mem_migrate_term(struct kbase_device *kbdev); + +#endif /* _KBASE_migrate_H */ diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c index 58716be..d942ff5 100644 --- a/mali_kbase/mali_kbase_mem_pool.c +++ b/mali_kbase/mali_kbase_mem_pool.c @@ -141,17 +141,21 @@ static bool set_pool_new_page_metadata(struct kbase_mem_pool *pool, struct page * Only update page status and add the page to the memory pool if * it is not isolated. */ - spin_lock(&page_md->migrate_lock); - if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) not_movable = true; - } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { - page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); - page_md->data.mem_pool.pool = pool; - page_md->data.mem_pool.kbdev = pool->kbdev; - list_add(&p->lru, page_list); - (*list_size)++; + else { + spin_lock(&page_md->migrate_lock); + if (PAGE_STATUS_GET(page_md->status) == (u8)NOT_MOVABLE) { + not_movable = true; + } else if (!WARN_ON_ONCE(IS_PAGE_ISOLATED(page_md->status))) { + page_md->status = PAGE_STATUS_SET(page_md->status, (u8)MEM_POOL); + page_md->data.mem_pool.pool = pool; + page_md->data.mem_pool.kbdev = pool->kbdev; + list_add(&p->lru, page_list); + (*list_size)++; + } + spin_unlock(&page_md->migrate_lock); } - spin_unlock(&page_md->migrate_lock); if (not_movable) { kbase_free_page_later(pool->kbdev, p); @@ -173,7 +177,7 @@ static void kbase_mem_pool_add_locked(struct kbase_mem_pool *pool, lockdep_assert_held(&pool->pool_lock); - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { if (set_pool_new_page_metadata(pool, p, &pool->page_list, &pool->cur_size)) queue_work_to_free = true; } else { @@ -204,7 +208,7 @@ static void kbase_mem_pool_add_list_locked(struct kbase_mem_pool *pool, lockdep_assert_held(&pool->pool_lock); - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { struct page *p, *tmp; list_for_each_entry_safe(p, tmp, page_list, lru) { @@ -246,7 +250,7 @@ static struct page *kbase_mem_pool_remove_locked(struct kbase_mem_pool *pool, p = list_first_entry(&pool->page_list, struct page, lru); - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { struct kbase_page_metadata *page_md = kbase_page_private(p); spin_lock(&page_md->migrate_lock); @@ -322,7 +326,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) if (pool->order) gfp |= GFP_HIGHUSER | __GFP_NOWARN; else - gfp |= kbase_page_migration_enabled ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; + gfp |= kbase_is_page_migration_enabled() ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; p = kbdev->mgm_dev->ops.mgm_alloc_page(kbdev->mgm_dev, pool->group_id, gfp, pool->order); @@ -339,7 +343,7 @@ struct page *kbase_mem_alloc_page(struct kbase_mem_pool *pool) } /* Setup page metadata for 4KB pages when page migration is enabled */ - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { INIT_LIST_HEAD(&p->lru); if (!kbase_alloc_page_metadata(kbdev, p, dma_addr, pool->group_id)) { dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL); @@ -360,7 +364,7 @@ static void enqueue_free_pool_pages_work(struct kbase_mem_pool *pool) { struct kbase_mem_migrate *mem_migrate = &pool->kbdev->mem_migrate; - if (!pool->order && kbase_page_migration_enabled) + if (!pool->order && kbase_is_page_migration_enabled()) queue_work(mem_migrate->free_pages_workq, &mem_migrate->free_pages_work); } @@ -375,7 +379,7 @@ void kbase_mem_pool_free_page(struct kbase_mem_pool *pool, struct page *p) kbdev = pool->kbdev; - if (!pool->order && kbase_page_migration_enabled) { + if (!pool->order && kbase_is_page_migration_enabled()) { kbase_free_page_later(kbdev, p); pool_dbg(pool, "page to be freed to kernel later\n"); } else { @@ -677,9 +681,10 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool) /* Before returning wait to make sure there are no pages undergoing page isolation * which will require reference to this pool. */ - while (atomic_read(&pool->isolation_in_progress_cnt)) - cpu_relax(); - + if (kbase_is_page_migration_enabled()) { + while (atomic_read(&pool->isolation_in_progress_cnt)) + cpu_relax(); + } pool_dbg(pool, "terminated\n"); } KBASE_EXPORT_TEST_API(kbase_mem_pool_term); diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c index b65f9e7..b446bd5 100644 --- a/mali_kbase/mali_kbase_pbha.c +++ b/mali_kbase/mali_kbase_pbha.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,7 +23,10 @@ #include <device/mali_kbase_device.h> #include <mali_kbase.h> + +#if MALI_USE_CSF #define DTB_SET_SIZE 2 +#endif static bool read_setting_valid(unsigned int id, unsigned int read_setting) { @@ -209,6 +212,7 @@ void kbase_pbha_write_settings(struct kbase_device *kbdev) } } +#if MALI_USE_CSF static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, const struct device_node *pbha_node) { @@ -216,17 +220,28 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, int sz, i; bool valid = true; - sz = of_property_count_elems_of_size(pbha_node, "int_id_override", - sizeof(u32)); + sz = of_property_count_elems_of_size(pbha_node, "int-id-override", sizeof(u32)); + + if (sz == -EINVAL) { + /* There is no int-id-override field. Fallback to int_id_override instead */ + sz = of_property_count_elems_of_size(pbha_node, "int_id_override", sizeof(u32)); + } + if (sz == -EINVAL) { + /* There is no int_id_override field. This is valid - but there's nothing further + * to do here. + */ + return 0; + } if (sz <= 0 || (sz % DTB_SET_SIZE != 0)) { dev_err(kbdev->dev, "Bad DTB format: pbha.int_id_override\n"); return -EINVAL; } - if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, - sz) != 0) { - dev_err(kbdev->dev, - "Failed to read DTB pbha.int_id_override\n"); - return -EINVAL; + if (of_property_read_u32_array(pbha_node, "int-id-override", dtb_data, sz) != 0) { + /* There may be no int-id-override field. Fallback to int_id_override instead */ + if (of_property_read_u32_array(pbha_node, "int_id_override", dtb_data, sz) != 0) { + dev_err(kbdev->dev, "Failed to read DTB pbha.int_id_override\n"); + return -EINVAL; + } } for (i = 0; valid && i < sz; i = i + DTB_SET_SIZE) { @@ -250,17 +265,20 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev, return 0; } -#if MALI_USE_CSF static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, const struct device_node *pbha_node) { - u32 bits; + u32 bits = 0; int err; if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU)) return 0; - err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + err = of_property_read_u32(pbha_node, "propagate-bits", &bits); + + if (err == -EINVAL) { + err = of_property_read_u32(pbha_node, "propagate_bits", &bits); + } if (err < 0) { if (err != -EINVAL) { @@ -268,6 +286,10 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, "DTB value for propagate_bits is improperly formed (err=%d)\n", err); return err; + } else { + /* Property does not exist */ + kbdev->pbha_propagate_bits = 0; + return 0; } } @@ -279,10 +301,11 @@ static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev, kbdev->pbha_propagate_bits = bits; return 0; } -#endif +#endif /* MALI_USE_CSF */ int kbase_pbha_read_dtb(struct kbase_device *kbdev) { +#if MALI_USE_CSF const struct device_node *pbha_node; int err; @@ -295,12 +318,12 @@ int kbase_pbha_read_dtb(struct kbase_device *kbdev) err = kbase_pbha_read_int_id_override_property(kbdev, pbha_node); -#if MALI_USE_CSF if (err < 0) return err; err = kbase_pbha_read_propagate_bits_property(kbdev, pbha_node); -#endif - return err; +#else + return 0; +#endif } diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c index bfd5b7e..40278a8 100644 --- a/mali_kbase/mali_kbase_pm.c +++ b/mali_kbase/mali_kbase_pm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -211,10 +211,28 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev) kbdev->pm.active_count == 0); dev_dbg(kbdev->dev, ">wait_event - waiting done\n"); +#if MALI_USE_CSF + /* At this point, any kbase context termination should either have run to + * completion and any further context termination can only begin after + * the system resumes. Therefore, it is now safe to skip taking the context + * list lock when traversing the context list. + */ + if (kbase_csf_kcpu_queue_halt_timers(kbdev)) { + rt_mutex_lock(&kbdev->pm.lock); + kbdev->pm.suspending = false; + rt_mutex_unlock(&kbdev->pm.lock); + return -1; + } +#endif + /* NOTE: We synchronize with anything that was just finishing a * kbase_pm_context_idle() call by locking the pm.lock below */ if (kbase_hwaccess_pm_suspend(kbdev)) { +#if MALI_USE_CSF + /* Resume the timers in case of suspend failure. */ + kbase_csf_kcpu_queue_resume_timers(kbdev); +#endif rt_mutex_lock(&kbdev->pm.lock); kbdev->pm.suspending = false; rt_mutex_unlock(&kbdev->pm.lock); @@ -262,6 +280,8 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) kbasep_js_resume(kbdev); #else kbase_csf_scheduler_pm_resume(kbdev); + + kbase_csf_kcpu_queue_resume_timers(kbdev); #endif /* Matching idle call, to power off the GPU/cores if we didn't actually @@ -283,6 +303,10 @@ void kbase_pm_driver_resume(struct kbase_device *kbdev, bool arb_gpu_start) /* Resume HW counters intermediaries. */ kbase_vinstr_resume(kbdev->vinstr_ctx); kbase_kinstr_prfcnt_resume(kbdev->kinstr_prfcnt_ctx); + /* System resume callback is complete */ + kbdev->pm.resuming = false; + /* Unblock the threads waiting for the completion of System suspend/resume */ + wake_up_all(&kbdev->pm.resume_wait); } int kbase_pm_suspend(struct kbase_device *kbdev) diff --git a/mali_kbase/mali_kbase_pm.h b/mali_kbase/mali_kbase_pm.h index 0639762..4ff3699 100644 --- a/mali_kbase/mali_kbase_pm.h +++ b/mali_kbase/mali_kbase_pm.h @@ -292,13 +292,14 @@ void kbase_pm_apc_term(struct kbase_device *kbdev); */ void kbase_pm_apc_request(struct kbase_device *kbdev, u32 dur_usec); -/* - * Print debug message indicating power state of GPU. +/** + * Print debug message indicating power state of GPU * @kbdev: The kbase device structure for the device (must be a valid pointer) + * @timeout_msg: A message to print. * * Prerequisite: GPU is powered. - * Takes and releases kbdev->hwaccess_lock + * Takes and releases kbdev->hwaccess_lock on CSF GPUs. */ -void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev); +void kbase_gpu_timeout_debug_message(struct kbase_device *kbdev, const char *timeout_msg); #endif /* _KBASE_PM_H_ */ diff --git a/mali_kbase/mali_kbase_softjobs.c b/mali_kbase/mali_kbase_softjobs.c index d65ff2d..0ad2bf8 100644 --- a/mali_kbase/mali_kbase_softjobs.c +++ b/mali_kbase/mali_kbase_softjobs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -41,6 +41,7 @@ #include <linux/kernel.h> #include <linux/cache.h> #include <linux/file.h> +#include <linux/version_compat_defs.h> #if !MALI_USE_CSF /** @@ -751,7 +752,7 @@ static void *dma_buf_kmap_page(struct kbase_mem_phy_alloc *gpu_alloc, if (page_index == page_num) { *page = sg_page_iter_page(&sg_iter); - return kmap(*page); + return kbase_kmap(*page); } page_index++; } @@ -797,14 +798,13 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, for (i = 0; i < buf_data->nr_extres_pages && target_page_nr < buf_data->nr_pages; i++) { struct page *pg = buf_data->extres_pages[i]; - void *extres_page = kmap(pg); - + void *extres_page = kbase_kmap(pg); if (extres_page) { ret = kbase_mem_copy_to_pinned_user_pages( pages, extres_page, &to_copy, buf_data->nr_pages, &target_page_nr, offset); - kunmap(pg); + kbase_kunmap(pg, extres_page); if (ret) goto out_unlock; } @@ -839,7 +839,7 @@ static int kbase_mem_copy_from_extres(struct kbase_context *kctx, &target_page_nr, offset); #if KERNEL_VERSION(5, 6, 0) <= LINUX_VERSION_CODE - kunmap(pg); + kbase_kunmap(pg, extres_page); #else dma_buf_kunmap(dma_buf, i, extres_page); #endif diff --git a/mali_kbase/mali_kbase_strings.h b/mali_kbase/mali_kbase_strings.h deleted file mode 100644 index c3f94f9..0000000 --- a/mali_kbase/mali_kbase_strings.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -extern const char kbase_drv_name[]; -extern const char kbase_timeline_name[]; diff --git a/mali_kbase/mali_kbase_utility.h b/mali_kbase/mali_kbase_utility.h deleted file mode 100644 index 2dad49b..0000000 --- a/mali_kbase/mali_kbase_utility.h +++ /dev/null @@ -1,52 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * - * (C) COPYRIGHT 2012-2013, 2015, 2018, 2020-2021 ARM Limited. All rights reserved. - * - * This program is free software and is provided to you under the terms of the - * GNU General Public License version 2 as published by the Free Software - * Foundation, and any use by you of this program is subject to the terms - * of such GNU license. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * - */ - -#ifndef _KBASE_UTILITY_H -#define _KBASE_UTILITY_H - -#ifndef _KBASE_H_ -#error "Don't include this file directly, use mali_kbase.h instead" -#endif - -static inline void kbase_timer_setup(struct timer_list *timer, - void (*callback)(struct timer_list *timer)) -{ -#if KERNEL_VERSION(4, 14, 0) > LINUX_VERSION_CODE - setup_timer(timer, (void (*)(unsigned long)) callback, - (unsigned long) timer); -#else - timer_setup(timer, callback, 0); -#endif -} - -#ifndef WRITE_ONCE - #ifdef ASSIGN_ONCE - #define WRITE_ONCE(x, val) ASSIGN_ONCE(val, x) - #else - #define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val)) - #endif -#endif - -#ifndef READ_ONCE - #define READ_ONCE(x) ACCESS_ONCE(x) -#endif - -#endif /* _KBASE_UTILITY_H */ diff --git a/mali_kbase/mali_kbase_vinstr.c b/mali_kbase/mali_kbase_vinstr.c index 5f3dabd..3fce09c 100644 --- a/mali_kbase/mali_kbase_vinstr.c +++ b/mali_kbase/mali_kbase_vinstr.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -541,8 +541,10 @@ void kbase_vinstr_term(struct kbase_vinstr_context *vctx) void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) { - if (WARN_ON(!vctx)) + if (!vctx) { + pr_warn("%s: vctx is NULL\n", __func__); return; + } mutex_lock(&vctx->lock); @@ -571,8 +573,10 @@ void kbase_vinstr_suspend(struct kbase_vinstr_context *vctx) void kbase_vinstr_resume(struct kbase_vinstr_context *vctx) { - if (WARN_ON(!vctx)) + if (!vctx) { + pr_warn("%s:vctx is NULL\n", __func__); return; + } mutex_lock(&vctx->lock); diff --git a/mali_kbase/mali_linux_trace.h b/mali_kbase/mali_linux_trace.h index 49058d3..1293a0b 100644 --- a/mali_kbase/mali_linux_trace.h +++ b/mali_kbase/mali_linux_trace.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2016, 2018-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -173,7 +173,7 @@ TRACE_EVENT(mali_total_alloc_pages_change, ((status) & AS_FAULTSTATUS_ACCESS_TYPE_MASK) #define KBASE_MMU_FAULT_ACCESS_SYMBOLIC_STRINGS _ENSURE_PARENTHESIS(\ {AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC, "ATOMIC" }, \ - {AS_FAULTSTATUS_ACCESS_TYPE_EX, "EXECUTE"}, \ + {AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE, "EXECUTE"}, \ {AS_FAULTSTATUS_ACCESS_TYPE_READ, "READ" }, \ {AS_FAULTSTATUS_ACCESS_TYPE_WRITE, "WRITE" }) #define KBASE_MMU_FAULT_STATUS_ACCESS_PRINT(status) \ diff --git a/mali_kbase/mali_kbase_strings.c b/mali_kbase/mali_power_gpu_work_period_trace.c index 84784be..8e7bf6f 100644 --- a/mali_kbase/mali_kbase_strings.c +++ b/mali_kbase/mali_power_gpu_work_period_trace.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2016, 2020-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -19,10 +19,10 @@ * */ -#include "mali_kbase_strings.h" - -#define KBASE_DRV_NAME "mali" -#define KBASE_TIMELINE_NAME KBASE_DRV_NAME ".timeline" - -const char kbase_drv_name[] = KBASE_DRV_NAME; -const char kbase_timeline_name[] = KBASE_TIMELINE_NAME; +/* Create the trace point if not configured in kernel */ +#ifndef CONFIG_TRACE_POWER_GPU_WORK_PERIOD +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +#define CREATE_TRACE_POINTS +#include "mali_power_gpu_work_period_trace.h" +#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ +#endif diff --git a/mali_kbase/mali_power_gpu_work_period_trace.h b/mali_kbase/mali_power_gpu_work_period_trace.h new file mode 100644 index 0000000..46e86ad --- /dev/null +++ b/mali_kbase/mali_power_gpu_work_period_trace.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * (C) COPYRIGHT 2023 ARM Limited. All rights reserved. + * + * This program is free software and is provided to you under the terms of the + * GNU General Public License version 2 as published by the Free Software + * Foundation, and any use by you of this program is subject to the terms + * of such GNU license. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + */ + +#ifndef _TRACE_POWER_GPU_WORK_PERIOD_MALI +#define _TRACE_POWER_GPU_WORK_PERIOD_MALI +#endif + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mali_power_gpu_work_period_trace +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . + +#if !defined(_TRACE_POWER_GPU_WORK_PERIOD_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_GPU_WORK_PERIOD_H + +#include <linux/tracepoint.h> + +/** + * gpu_work_period - Reports GPU work period metrics + * + * @gpu_id: Unique GPU Identifier + * @uid: UID of an application + * @start_time_ns: Start time of a GPU work period in nanoseconds + * @end_time_ns: End time of a GPU work period in nanoseconds + * @total_active_duration_ns: Total amount of time the GPU was running GPU work for given + * UID during the GPU work period, in nanoseconds. This duration does + * not double-account parallel GPU work for the same UID. + */ +TRACE_EVENT(gpu_work_period, + + TP_PROTO( + u32 gpu_id, + u32 uid, + u64 start_time_ns, + u64 end_time_ns, + u64 total_active_duration_ns + ), + + TP_ARGS(gpu_id, uid, start_time_ns, end_time_ns, total_active_duration_ns), + + TP_STRUCT__entry( + __field(u32, gpu_id) + __field(u32, uid) + __field(u64, start_time_ns) + __field(u64, end_time_ns) + __field(u64, total_active_duration_ns) + ), + + TP_fast_assign( + __entry->gpu_id = gpu_id; + __entry->uid = uid; + __entry->start_time_ns = start_time_ns; + __entry->end_time_ns = end_time_ns; + __entry->total_active_duration_ns = total_active_duration_ns; + ), + + TP_printk("gpu_id=%u uid=%u start_time_ns=%llu end_time_ns=%llu total_active_duration_ns=%llu", + __entry->gpu_id, + __entry->uid, + __entry->start_time_ns, + __entry->end_time_ns, + __entry->total_active_duration_ns) +); + +#endif /* _TRACE_POWER_GPU_WORK_PERIOD_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c index 4cac787..a057d3c 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_csf.c @@ -146,8 +146,7 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx, GPU_FAULTSTATUS_ACCESS_TYPE_SHIFT; int source_id = (status & GPU_FAULTSTATUS_SOURCE_ID_MASK) >> GPU_FAULTSTATUS_SOURCE_ID_SHIFT; - const char *addr_valid = (status & GPU_FAULTSTATUS_ADDR_VALID_FLAG) ? - "true" : "false"; + const char *addr_valid = (status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK) ? "true" : "false"; int as_no = as->number; unsigned long flags; const uintptr_t fault_addr = fault->addr; @@ -247,12 +246,13 @@ void kbase_mmu_report_fault_and_kill(struct kbase_context *kctx, spin_lock_irqsave(&kbdev->hwaccess_lock, flags); kbase_mmu_disable(kctx); kbase_ctx_flag_set(kctx, KCTX_AS_DISABLED_ON_FAULT); + kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); + kbase_csf_ctx_report_page_fault_for_active_groups(kctx, fault); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); mutex_unlock(&kbdev->mmu_hw_mutex); /* AS transaction end */ - kbase_debug_csf_fault_notify(kbdev, kctx, DF_GPU_PAGE_FAULT); /* Switching to UNMAPPED mode above would have enabled the firmware to * recover from the fault (if the memory access was made by firmware) * and it can then respond to CSG termination requests to be sent now. @@ -368,9 +368,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* remember current mask */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)); /* mask interrupts for now */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); while (pf_bits) { @@ -380,11 +380,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) struct kbase_fault *fault = &as->pf_data; /* find faulting address */ - fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_HI)); + fault->addr = kbase_reg_read(kbdev, + MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_HI))); fault->addr <<= 32; - fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_LO)); + fault->addr |= kbase_reg_read( + kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_LO))); /* Mark the fault protected or not */ fault->protected_mode = false; @@ -393,14 +393,14 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) kbase_as_fault_debugfs_new(kbdev, as_no); /* record the fault status */ - fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTSTATUS)); + fault->status = + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTSTATUS))); - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->extra_addr = + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_HI))); fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + fault->extra_addr |= + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_LO))); /* Mark page fault as handled */ pf_bits &= ~(1UL << as_no); @@ -432,9 +432,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* reenable interrupts */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + tmp = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)); new_mask |= tmp; - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), new_mask); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); } @@ -470,19 +470,16 @@ static void kbase_mmu_gpu_fault_worker(struct work_struct *data) spin_lock_irqsave(&kbdev->hwaccess_lock, flags); fault = &faulting_as->gf_data; status = fault->status; - as_valid = status & GPU_FAULTSTATUS_JASID_VALID_FLAG; + as_valid = status & GPU_FAULTSTATUS_JASID_VALID_MASK; address = fault->addr; spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); dev_warn(kbdev->dev, "GPU Fault 0x%08x (%s) in AS%u at 0x%016llx\n" "ASID_VALID: %s, ADDRESS_VALID: %s\n", - status, - kbase_gpu_exception_name( - GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), - as_nr, address, - as_valid ? "true" : "false", - status & GPU_FAULTSTATUS_ADDR_VALID_FLAG ? "true" : "false"); + status, kbase_gpu_exception_name(GPU_FAULTSTATUS_EXCEPTION_TYPE_GET(status)), + as_nr, address, as_valid ? "true" : "false", + status & GPU_FAULTSTATUS_ADDRESS_VALID_MASK ? "true" : "false"); kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_nr); kbase_csf_ctx_handle_fault(kctx, fault); @@ -558,9 +555,8 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; kbdev->as[i].gf_data.addr = 0ULL; - kbdev->as[i].is_unresponsive = false; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%d", WQ_UNBOUND, 0, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c index d716ce0..5c774c2 100644 --- a/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c +++ b/mali_kbase/mmu/backend/mali_kbase_mmu_jm.c @@ -322,9 +322,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* remember current mask */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - new_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + new_mask = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)); /* mask interrupts for now */ - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), 0); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), 0); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); while (bf_bits | pf_bits) { @@ -355,11 +355,11 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) kctx = kbase_ctx_sched_as_to_ctx_refcount(kbdev, as_no); /* find faulting address */ - fault->addr = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_HI)); + fault->addr = kbase_reg_read(kbdev, + MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_HI))); fault->addr <<= 32; - fault->addr |= kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTADDRESS_LO)); + fault->addr |= kbase_reg_read( + kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTADDRESS_LO))); /* Mark the fault protected or not */ fault->protected_mode = kbdev->protected_mode; @@ -372,13 +372,13 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) kbase_as_fault_debugfs_new(kbdev, as_no); /* record the fault status */ - fault->status = kbase_reg_read(kbdev, MMU_AS_REG(as_no, - AS_FAULTSTATUS)); - fault->extra_addr = kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_HI)); + fault->status = + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTSTATUS))); + fault->extra_addr = + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_HI))); fault->extra_addr <<= 32; - fault->extra_addr |= kbase_reg_read(kbdev, - MMU_AS_REG(as_no, AS_FAULTEXTRA_LO)); + fault->extra_addr |= + kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_no, AS_FAULTEXTRA_LO))); if (kbase_as_has_bus_fault(as, fault)) { /* Mark bus fault as handled. @@ -406,9 +406,9 @@ void kbase_mmu_interrupt(struct kbase_device *kbdev, u32 irq_stat) /* reenable interrupts */ spin_lock_irqsave(&kbdev->mmu_mask_change, flags); - tmp = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)); + tmp = kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)); new_mask |= tmp; - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), new_mask); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), new_mask); spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); dev_dbg(kbdev->dev, "Leaving %s irq_stat %u\n", @@ -429,9 +429,8 @@ int kbase_mmu_as_init(struct kbase_device *kbdev, unsigned int i) kbdev->as[i].number = i; kbdev->as[i].bf_data.addr = 0ULL; kbdev->as[i].pf_data.addr = 0ULL; - kbdev->as[i].is_unresponsive = false; - kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 1, i); + kbdev->as[i].pf_wq = alloc_workqueue("mali_mmu%u", 0, 0, i); if (!kbdev->as[i].pf_wq) return -ENOMEM; diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c index ccbd9c3..f8641a6 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.c +++ b/mali_kbase/mmu/mali_kbase_mmu.c @@ -46,6 +46,7 @@ #if !MALI_USE_CSF #include <mali_kbase_hwaccess_jm.h> #endif +#include <linux/version_compat_defs.h> #include <mali_kbase_trace_gpu_mem.h> #include <backend/gpu/mali_kbase_pm_internal.h> @@ -57,6 +58,11 @@ /* Macro to convert updated PDGs to flags indicating levels skip in flush */ #define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF) +static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration); + /* Small wrapper function to factor out GPU-dependent context releasing */ static void release_ctx(struct kbase_device *kbdev, struct kbase_context *kctx) @@ -201,7 +207,7 @@ static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as mutex_lock(&kbdev->mmu_hw_mutex); spin_lock_irqsave(&kbdev->hwaccess_lock, flags); - if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) + if (kbdev->pm.backend.gpu_ready && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param))) dev_err(kbdev->dev, "Flush for GPU page table update did not complete"); spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); @@ -389,7 +395,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, lockdep_assert_held(&mmut->mmu_lock); - if (!kbase_page_migration_enabled) + if (!kbase_is_page_migration_enabled()) return false; spin_lock(&page_md->migrate_lock); @@ -404,8 +410,10 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, page_md->status = PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS); } + } else if ((PAGE_STATUS_GET(page_md->status) == FREE_IN_PROGRESS) || + (PAGE_STATUS_GET(page_md->status) == ALLOCATE_IN_PROGRESS)) { + /* Nothing to do - fall through */ } else { - WARN_ON_ONCE(mmut->kctx); WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE); } spin_unlock(&page_md->migrate_lock); @@ -431,7 +439,7 @@ static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev, * @pgd: Physical address of page directory to be freed. * * This function is supposed to be called with mmu_lock held and after - * ensuring that GPU won't be able to access the page. + * ensuring that the GPU won't be able to access the page. */ static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, phys_addr_t pgd) @@ -727,7 +735,7 @@ static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx, case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: kbase_gpu_mmu_handle_write_fault(kctx, faulting_as); break; - case AS_FAULTSTATUS_ACCESS_TYPE_EX: + case AS_FAULTSTATUS_ACCESS_TYPE_EXECUTE: kbase_mmu_report_fault_and_kill(kctx, faulting_as, "Execute Permission fault", fault); break; @@ -1293,10 +1301,11 @@ page_fault_retry: * so the no_flush version of insert_pages is used which allows * us to unlock the MMU as we see fit. */ - err = kbase_mmu_insert_pages_no_flush( - kbdev, &kctx->mmu, region->start_pfn + pfn_offset, - &kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags, - region->gpu_alloc->group_id, &dirty_pgds, region, false); + err = mmu_insert_pages_no_flush(kbdev, &kctx->mmu, region->start_pfn + pfn_offset, + &kbase_get_gpu_phy_pages(region)[pfn_offset], + new_pages, region->flags, + region->gpu_alloc->group_id, &dirty_pgds, region, + false); if (err) { kbase_free_phy_pages_helper(region->gpu_alloc, new_pages); @@ -1480,7 +1489,8 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, if (!p) return KBASE_MMU_INVALID_PGD_ADDRESS; - page = kmap(p); + page = kbase_kmap(p); + if (page == NULL) goto alloc_free; @@ -1513,7 +1523,7 @@ static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev, */ kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE); - kunmap(p); + kbase_kunmap(p, page); return pgd; alloc_free: @@ -1553,7 +1563,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * vpfn &= 0x1FF; p = pfn_to_page(PFN_DOWN(*pgd)); - page = kmap(p); + page = kbase_kmap(p); if (page == NULL) { dev_err(kbdev->dev, "%s: kmap failure", __func__); return -EINVAL; @@ -1562,7 +1572,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) { dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level, vpfn); - kunmap(p); + kbase_kunmap(p, page); return -EFAULT; } else { target_pgd = kbdev->mmu_mode->pte_to_phy_addr( @@ -1570,7 +1580,7 @@ static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table * kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn])); } - kunmap(p); + kbase_kunmap(p, page); *pgd = target_pgd; return 0; @@ -1700,10 +1710,10 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, level <= MIDGARD_MMU_BOTTOMLEVEL; level++) { idx = (vpfn >> ((3 - level) * 9)) & 0x1FF; pgds[level] = pgd; - page = kmap(p); + page = kbase_kmap(p); if (mmu_mode->ate_is_valid(page[idx], level)) break; /* keep the mapping */ - kunmap(p); + kbase_kunmap(p, page); pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx])); p = phys_to_page(pgd); @@ -1736,7 +1746,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, mmu_mode->entries_invalidate(&page[idx], pcount); if (!num_of_valid_entries) { - kunmap(p); + kbase_kunmap(p, page); kbase_mmu_add_to_free_pgds_list(mmut, p); @@ -1754,7 +1764,7 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev, kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)), kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount, KBASE_MMU_OP_NONE); - kunmap(p); + kbase_kunmap(p, page); next: vpfn += count; } @@ -1764,7 +1774,7 @@ next: * going to happen to these pages at this stage. They might return * movable once they are returned to a memory pool. */ - if (kbase_page_migration_enabled && !ignore_page_migration && phys) { + if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) { const u64 num_pages = to_vpfn - from_vpfn + 1; u64 i; @@ -1831,7 +1841,6 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, * The bottom PGD level. * @insert_level: The level of MMU page table where the chain of newly allocated * PGDs needs to be linked-in/inserted. - * The top-most PDG level to be updated. * @insert_vpfn: The virtual page frame number for the ATE. * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains * the physical addresses of newly allocated PGDs from index @@ -1839,7 +1848,7 @@ static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev, * insert_level. * * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD - * at insert_level which already exists in the MMU Page Tables.Migration status is also + * at insert_level which already exists in the MMU Page Tables. Migration status is also * updated for all the newly allocated PGD pages. * * Return: @@ -1873,7 +1882,8 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table goto failure_recovery; } - parent_page_va = kmap(parent_page); + parent_page_va = kbase_kmap(parent_page); + if (unlikely(parent_page_va == NULL)) { dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -EINVAL; @@ -1886,7 +1896,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte); kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1); - kunmap(parent_page); + kbase_kunmap(parent_page, parent_page_va); if (parent_index != insert_level) { /* Newly allocated PGDs */ @@ -1905,7 +1915,7 @@ static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table } /* Update the new target_pgd page to its stable state */ - if (kbase_page_migration_enabled) { + if (kbase_is_page_migration_enabled()) { struct kbase_page_metadata *page_md = kbase_page_private(phys_to_page(target_pgd)); @@ -1934,11 +1944,11 @@ failure_recovery: for (; pgd_index < cur_level; pgd_index++) { phys_addr_t pgd = pgds_to_insert[pgd_index]; struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd)); - u64 *pgd_page_va = kmap(pgd_page); + u64 *pgd_page_va = kbase_kmap(pgd_page); u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF; kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1); - kunmap(pgd_page); + kbase_kunmap(pgd_page, pgd_page_va); } return err; @@ -2001,10 +2011,11 @@ static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_ta return 0; } -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, - struct tagged_addr phys, size_t nr, unsigned long flags, - int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, - bool ignore_page_migration) +static int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, + struct tagged_addr phys, size_t nr, unsigned long flags, + int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; @@ -2034,7 +2045,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, /* If page migration is enabled, pages involved in multiple GPU mappings * are always treated as not movable. */ - if (kbase_page_migration_enabled && !ignore_page_migration) { + if (kbase_is_page_migration_enabled() && !ignore_page_migration) { struct page *phys_page = as_page(phys); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); @@ -2099,7 +2110,8 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, } p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); + + pgd_page = kbase_kmap(p); if (!pgd_page) { dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; @@ -2147,14 +2159,14 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn, kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); - kunmap(p); + kbase_kunmap(p, pgd_page); goto fail_unlock_free_pgds; } } insert_vpfn += count; remain -= count; - kunmap(p); + kbase_kunmap(p, pgd_page); } rt_mutex_unlock(&mmut->mmu_lock); @@ -2211,6 +2223,9 @@ static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys, struct page *phys_page = as_page(phys); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + spin_lock(&page_md->migrate_lock); /* If no GPU va region is given: the metadata provided are @@ -2245,6 +2260,9 @@ static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev, { size_t i; + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return; + for (i = 0; i < requested_nr; i++) { struct page *phys_page = as_page(phys[i]); struct kbase_page_metadata *page_md = kbase_page_private(phys_page); @@ -2294,10 +2312,10 @@ u64 kbase_mmu_create_ate(struct kbase_device *const kbdev, group_id, level, entry); } -int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - const u64 start_vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int const group_id, u64 *dirty_pgds, - struct kbase_va_region *reg, bool ignore_page_migration) +static int mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg, bool ignore_page_migration) { phys_addr_t pgd; u64 *pgd_page; @@ -2378,7 +2396,8 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu } p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); + pgd_page = kbase_kmap(p); + if (!pgd_page) { dev_err(kbdev->dev, "%s: kmap failure", __func__); err = -ENOMEM; @@ -2415,7 +2434,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu /* If page migration is enabled, this is the right time * to update the status of the page. */ - if (kbase_page_migration_enabled && !ignore_page_migration && + if (kbase_is_page_migration_enabled() && !ignore_page_migration && !is_huge(phys[i]) && !is_partial(phys[i])) kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut, insert_vpfn + i); @@ -2450,7 +2469,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count); - kunmap(p); + kbase_kunmap(p, pgd_page); goto fail_unlock_free_pgds; } } @@ -2458,7 +2477,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu phys += count; insert_vpfn += count; remain -= count; - kunmap(p); + kbase_kunmap(p, pgd_page); } rt_mutex_unlock(&mmut->mmu_lock); @@ -2485,6 +2504,23 @@ fail_unlock: return err; } +int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + const u64 start_vpfn, struct tagged_addr *phys, size_t nr, + unsigned long flags, int const group_id, u64 *dirty_pgds, + struct kbase_va_region *reg) +{ + int err; + + /* Early out if there is nothing to do */ + if (nr == 0) + return 0; + + err = mmu_insert_pages_no_flush(kbdev, mmut, start_vpfn, phys, nr, flags, group_id, + dirty_pgds, reg, false); + + return err; +} + /* * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space * number 'as_nr'. @@ -2492,7 +2528,7 @@ fail_unlock: int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg, bool ignore_page_migration) + struct kbase_va_region *reg) { int err; u64 dirty_pgds = 0; @@ -2501,8 +2537,8 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m if (nr == 0) return 0; - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds, reg, ignore_page_migration); + err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, + reg, false); if (err) return err; @@ -2513,11 +2549,12 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages); -int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int const group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg) +int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int const group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg) { int err; u64 dirty_pgds = 0; @@ -2529,8 +2566,8 @@ int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu /* Imported allocations don't have metadata and therefore always ignore the * page migration logic. */ - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds, reg, true); + err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, + reg, true); if (err) return err; @@ -2555,8 +2592,8 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_ /* Memory aliases are always built on top of existing allocations, * therefore the state of physical pages shall be updated. */ - err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, - &dirty_pgds, reg, false); + err = mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id, &dirty_pgds, + reg, false); if (err) return err; @@ -2771,7 +2808,8 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, current_level--) { phys_addr_t current_pgd = pgds[current_level]; struct page *p = phys_to_page(current_pgd); - u64 *current_page = kmap(p); + + u64 *current_page = kbase_kmap(p); unsigned int current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(current_page); int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF; @@ -2783,7 +2821,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, kbdev->mmu_mode->entries_invalidate(¤t_page[index], 1); if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) { - kunmap(p); + kbase_kunmap(p, current_page); /* Ensure the cacheline containing the last valid entry * of PGD is invalidated from the GPU cache, before the @@ -2800,7 +2838,7 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev, kbdev->mmu_mode->set_num_valid_entries( current_page, current_valid_entries); - kunmap(p); + kbase_kunmap(p, current_page); kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)), kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64), @@ -2856,7 +2894,7 @@ static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev, for (i = 0; !flush_done && i < phys_page_nr; i++) { spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); - if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + if (kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE, KBASE_MMU_OP_FLUSH_MEM); else @@ -2897,7 +2935,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase phys_addr_t next_pgd; index = (vpfn >> ((3 - level) * 9)) & 0x1FF; - page = kmap(p); + page = kbase_kmap(p); if (mmu_mode->ate_is_valid(page[index], level)) break; /* keep the mapping */ else if (!mmu_mode->pte_is_valid(page[index], level)) { @@ -2923,7 +2961,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase next_pgd = mmu_mode->pte_to_phy_addr( kbdev->mgm_dev->ops.mgm_pte_to_original_pte( kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index])); - kunmap(p); + kbase_kunmap(p, page); pgds[level] = pgd; pgd = next_pgd; p = phys_to_page(pgd); @@ -2934,7 +2972,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase case MIDGARD_MMU_LEVEL(1): dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__, level); - kunmap(p); + kbase_kunmap(p, page); goto out; case MIDGARD_MMU_LEVEL(2): /* can only teardown if count >= 512 */ @@ -2972,7 +3010,7 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase mmu_mode->entries_invalidate(&page[index], pcount); if (!num_of_valid_entries) { - kunmap(p); + kbase_kunmap(p, page); /* Ensure the cacheline(s) containing the last valid entries * of PGD is invalidated from the GPU cache, before the @@ -2998,17 +3036,48 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64), flush_op); next: - kunmap(p); - vpfn += count; - nr -= count; + kbase_kunmap(p, page); + vpfn += count; + nr -= count; } out: return 0; } -int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, - struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, - int as_nr, bool ignore_page_migration) +/** + * mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table + * + * @kbdev: Pointer to kbase device. + * @mmut: Pointer to GPU MMU page table. + * @vpfn: Start page frame number of the GPU virtual pages to unmap. + * @phys: Array of physical pages currently mapped to the virtual + * pages to unmap, or NULL. This is used for GPU cache maintenance + * and page migration support. + * @nr_phys_pages: Number of physical pages to flush. + * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. + * @as_nr: Address space number, for GPU cache maintenance operations + * that happen outside a specific kbase context. + * @ignore_page_migration: Whether page migration metadata should be ignored. + * + * We actually discard the ATE and free the page table pages if no valid entries + * exist in the PGD. + * + * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is + * currently scheduled into the runpool, and so potentially uses a lot of locks. + * These locks must be taken in the correct order with respect to others + * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more + * information. + * + * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, + * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, + * GPU cache maintenance will be done as usual; that is, invalidating the whole GPU caches + * instead of specific physical address ranges. + * + * Return: 0 on success, otherwise an error code. + */ +static int mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr, bool ignore_page_migration) { u64 start_vpfn = vpfn; enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE; @@ -3089,7 +3158,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table * updated before releasing the lock to protect against concurrent * requests to migrate the pages, if they have been isolated. */ - if (kbase_page_migration_enabled && phys && !ignore_page_migration) + if (kbase_is_page_migration_enabled() && phys && !ignore_page_migration) kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages); kbase_mmu_free_pgds_list(kbdev, mmut); @@ -3098,7 +3167,22 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table return err; } -KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages); + +int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, + int as_nr) +{ + return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, + false); +} + +int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, + size_t nr_virt_pages, int as_nr) +{ + return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr, + true); +} /** * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU @@ -3162,7 +3246,7 @@ int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu goto fail_unlock; p = pfn_to_page(PFN_DOWN(pgd)); - pgd_page = kmap(p); + pgd_page = kbase_kmap(p); if (!pgd_page) { dev_warn(kbdev->dev, "kmap failure on update_pages"); err = -ENOMEM; @@ -3217,7 +3301,7 @@ int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu vpfn += count; nr -= count; - kunmap(p); + kbase_kunmap(p, pgd_page); } rt_mutex_unlock(&mmut->mmu_lock); @@ -3339,6 +3423,9 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p unsigned int num_of_valid_entries; u8 vmap_count = 0; + /* If page migration support is not compiled in, return with fault */ + if (!IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT)) + return -EINVAL; /* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param, * here we skip the no kctx case, which is only used with MCU's mmut. */ @@ -3356,21 +3443,21 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p index = (vpfn >> ((3 - level) * 9)) & 0x1FF; /* Create all mappings before copying content. - * This is done as early as possible because is the only operation that may + * This is done as early as possible because it is the only operation that may * fail. It is possible to do this before taking any locks because the * pages to migrate are not going to change and even the parent PGD is not * going to be affected by any other concurrent operation, since the page * has been isolated before migration and therefore it cannot disappear in * the middle of this function. */ - old_page = kmap(as_page(old_phys)); + old_page = kbase_kmap(as_page(old_phys)); if (!old_page) { dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__); ret = -EINVAL; goto old_page_map_error; } - new_page = kmap(as_page(new_phys)); + new_page = kbase_kmap(as_page(new_phys)); if (!new_page) { dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__); ret = -EINVAL; @@ -3457,14 +3544,13 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p goto get_pgd_at_level_error; } - pgd_page = kmap(phys_to_page(pgd)); + pgd_page = kbase_kmap(phys_to_page(pgd)); if (!pgd_page) { dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__); ret = -EINVAL; goto pgd_page_map_error; } - rt_mutex_lock(&kbdev->pm.lock); mutex_lock(&kbdev->mmu_hw_mutex); /* Lock MMU region and flush GPU cache by using GPU control, @@ -3475,14 +3561,13 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p /* Defer the migration as L2 is in a transitional phase */ spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); mutex_unlock(&kbdev->mmu_hw_mutex); - rt_mutex_unlock(&kbdev->pm.lock); dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__); ret = -EAGAIN; goto l2_state_defer_out; } /* Prevent transitional phases in L2 by starting the transaction */ mmu_page_migration_transaction_begin(kbdev); - if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) { int as_nr = mmut->kctx->as_nr; struct kbase_as *as = &kbdev->as[as_nr]; @@ -3498,7 +3583,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p if (ret < 0) { mutex_unlock(&kbdev->mmu_hw_mutex); - rt_mutex_unlock(&kbdev->pm.lock); dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__); goto undo_mappings; } @@ -3574,7 +3658,7 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p * won't have any effect on them. */ spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); - if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) { + if (kbdev->pm.backend.gpu_ready && mmut->kctx->as_nr >= 0) { int as_nr = mmut->kctx->as_nr; struct kbase_as *as = &kbdev->as[as_nr]; @@ -3590,7 +3674,6 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags); /* Releasing locks before checking the migration transaction error state */ mutex_unlock(&kbdev->mmu_hw_mutex); - rt_mutex_unlock(&kbdev->pm.lock); spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags); /* Release the transition prevention in L2 by ending the transaction */ @@ -3623,24 +3706,24 @@ int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_p set_page_private(as_page(old_phys), 0); l2_state_defer_out: - kunmap(phys_to_page(pgd)); + kbase_kunmap(phys_to_page(pgd), pgd_page); pgd_page_map_error: get_pgd_at_level_error: page_state_change_out: rt_mutex_unlock(&mmut->mmu_lock); - kunmap(as_page(new_phys)); + kbase_kunmap(as_page(new_phys), new_page); new_page_map_error: - kunmap(as_page(old_phys)); + kbase_kunmap(as_page(old_phys), old_page); old_page_map_error: return ret; undo_mappings: /* Unlock the MMU table and undo mappings. */ rt_mutex_unlock(&mmut->mmu_lock); - kunmap(phys_to_page(pgd)); - kunmap(as_page(new_phys)); - kunmap(as_page(old_phys)); + kbase_kunmap(phys_to_page(pgd), pgd_page); + kbase_kunmap(as_page(new_phys), new_page); + kbase_kunmap(as_page(old_phys), old_page); return ret; } @@ -3657,7 +3740,7 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl lockdep_assert_held(&mmut->mmu_lock); - pgd_page = kmap_atomic(p); + pgd_page = kbase_kmap_atomic(p); /* kmap_atomic should NEVER fail. */ if (WARN_ON_ONCE(pgd_page == NULL)) return; @@ -3673,11 +3756,11 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl * there are no pages left mapped on the GPU for a context. Hence the count * of valid entries is expected to be zero here. */ - if (kbase_page_migration_enabled && mmut->kctx) + if (kbase_is_page_migration_enabled() && mmut->kctx) WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page)); /* Invalidate page after copying */ mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES); - kunmap_atomic(pgd_page); + kbase_kunmap_atomic(pgd_page); pgd_page = pgd_page_buffer; if (level < MIDGARD_MMU_BOTTOMLEVEL) { @@ -3696,6 +3779,24 @@ static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_tabl kbase_mmu_free_pgd(kbdev, mmut, pgd); } +static void kbase_mmu_mark_non_movable(struct page *page) +{ + struct kbase_page_metadata *page_md; + + if (!kbase_is_page_migration_enabled()) + return; + + page_md = kbase_page_private(page); + + spin_lock(&page_md->migrate_lock); + page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE); + + if (IS_PAGE_MOVABLE(page_md->status)) + page_md->status = PAGE_MOVABLE_CLEAR(page_md->status); + + spin_unlock(&page_md->migrate_lock); +} + int kbase_mmu_init(struct kbase_device *const kbdev, struct kbase_mmu_table *const mmut, struct kbase_context *const kctx, int const group_id) @@ -3729,11 +3830,10 @@ int kbase_mmu_init(struct kbase_device *const kbdev, return -ENOMEM; } - rt_mutex_lock(&mmut->mmu_lock); mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut); - rt_mutex_unlock(&mmut->mmu_lock); } + kbase_mmu_mark_non_movable(pfn_to_page(PFN_DOWN(mmut->pgd))); return 0; } @@ -3769,7 +3869,7 @@ void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context * spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags); if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) && - kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) + kbdev->pm.backend.gpu_ready && (!kctx || kctx->as_nr >= 0)) mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT); spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags); #endif @@ -3794,7 +3894,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, kbdev = kctx->kbdev; mmu_mode = kbdev->mmu_mode; - pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd))); + pgd_page = kbase_kmap(pfn_to_page(PFN_DOWN(pgd))); if (!pgd_page) { dev_warn(kbdev->dev, "%s: kmap failure", __func__); return 0; @@ -3829,7 +3929,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, target_pgd, level + 1, buffer, size_left); if (!dump_size) { - kunmap(pfn_to_page(PFN_DOWN(pgd))); + kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page); return 0; } size += dump_size; @@ -3837,7 +3937,7 @@ static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd, } } - kunmap(pfn_to_page(PFN_DOWN(pgd))); + kbase_kunmap(pfn_to_page(PFN_DOWN(pgd)), pgd_page); return size; } diff --git a/mali_kbase/mmu/mali_kbase_mmu.h b/mali_kbase/mmu/mali_kbase_mmu.h index 861a5f4..e13e9b9 100644 --- a/mali_kbase/mmu/mali_kbase_mmu.h +++ b/mali_kbase/mmu/mali_kbase_mmu.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -36,8 +36,8 @@ struct kbase_va_region; * A pointer to this type is passed down from the outer-most callers in the kbase * module - where the information resides as to the synchronous / asynchronous * nature of the call flow, with respect to MMU operations. ie - does the call flow relate to - * existing GPU work does it come from requests (like ioctl) from user-space, power management, - * etc. + * existing GPU work or does it come from requests (like ioctl) from user-space, power + * management, etc. * * @CALLER_MMU_UNSET_SYNCHRONICITY: default value must be invalid to avoid accidental choice * of a 'valid' value @@ -154,25 +154,43 @@ u64 kbase_mmu_create_ate(struct kbase_device *kbdev, int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int group_id, u64 *dirty_pgds, - struct kbase_va_region *reg, bool ignore_page_migration); + struct kbase_va_region *reg); int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg, bool ignore_page_migration); -int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, - u64 vpfn, struct tagged_addr *phys, size_t nr, - unsigned long flags, int as_nr, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info, - struct kbase_va_region *reg); + struct kbase_va_region *reg); + +/** + * kbase_mmu_insert_pages_skip_status_update - Map 'nr' pages pointed to by 'phys' + * at GPU PFN 'vpfn' for GPU address space number 'as_nr'. + * + * @kbdev: Instance of GPU platform device, allocated from the probe method. + * @mmut: GPU page tables. + * @vpfn: Start page frame number of the GPU virtual pages to map. + * @phys: Physical address of the page to be mapped. + * @nr: The number of pages to map. + * @flags: Bitmask of attributes of the GPU memory region being mapped. + * @as_nr: The GPU address space number. + * @group_id: The physical memory group in which the page was allocated. + * @mmu_sync_info: MMU-synchronous caller info. + * @reg: The region whose physical allocation is to be mapped. + * + * Similar to kbase_mmu_insert_pages() but skips updating each pages metadata + * for page migration. + * + * Return: 0 if successful, otherwise a negative error code. + */ +int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev, + struct kbase_mmu_table *mmut, u64 vpfn, + struct tagged_addr *phys, size_t nr, + unsigned long flags, int as_nr, int group_id, + enum kbase_caller_mmu_sync_info mmu_sync_info, + struct kbase_va_region *reg); int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info, struct kbase_va_region *reg); -int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, - size_t nr, unsigned long flags, int group_id, - enum kbase_caller_mmu_sync_info mmu_sync_info, - bool ignore_page_migration); int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn, struct tagged_addr phys, size_t nr, unsigned long flags, int group_id, @@ -182,40 +200,16 @@ int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn, int group_id, enum kbase_caller_mmu_sync_info mmu_sync_info); -/** - * kbase_mmu_teardown_pages - Remove GPU virtual addresses from the MMU page table - * - * @kbdev: Pointer to kbase device. - * @mmut: Pointer to GPU MMU page table. - * @vpfn: Start page frame number of the GPU virtual pages to unmap. - * @phys: Array of physical pages currently mapped to the virtual - * pages to unmap, or NULL. This is used for GPU cache maintenance - * and page migration support. - * @nr_phys_pages: Number of physical pages to flush. - * @nr_virt_pages: Number of virtual pages whose PTEs should be destroyed. - * @as_nr: Address space number, for GPU cache maintenance operations - * that happen outside a specific kbase context. - * @ignore_page_migration: Whether page migration metadata should be ignored. - * - * We actually discard the ATE and free the page table pages if no valid entries - * exist in PGD. - * - * IMPORTANT: This uses kbasep_js_runpool_release_ctx() when the context is - * currently scheduled into the runpool, and so potentially uses a lot of locks. - * These locks must be taken in the correct order with respect to others - * already held by the caller. Refer to kbasep_js_runpool_release_ctx() for more - * information. - * - * The @p phys pointer to physical pages is not necessary for unmapping virtual memory, - * but it is used for fine-grained GPU cache maintenance. If @p phys is NULL, - * GPU cache maintenance will be done as usual, that is invalidating the whole GPU caches - * instead of specific physical address ranges. - * - * Return: 0 on success, otherwise an error code. - */ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages, - int as_nr, bool ignore_page_migration); + int as_nr); +int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, + u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages, + size_t nr_virt_pages, int as_nr); +#define kbase_mmu_teardown_firmware_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, \ + as_nr) \ + kbase_mmu_teardown_imported_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, \ + as_nr) int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys, size_t nr, diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw.h b/mali_kbase/mmu/mali_kbase_mmu_hw.h index d53f928..49e050e 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw.h +++ b/mali_kbase/mmu/mali_kbase_mmu_hw.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2015, 2018-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -58,7 +58,7 @@ enum kbase_mmu_fault_type { * struct kbase_mmu_hw_op_param - parameters for kbase_mmu_hw_do_* functions * @vpfn: MMU Virtual Page Frame Number to start the operation on. * @nr: Number of pages to work on. - * @op: Operation type (written to ASn_COMMAND). + * @op: Operation type (written to AS_COMMAND). * @kctx_id: Kernel context ID for MMU command tracepoint. * @mmu_sync_info: Indicates whether this call is synchronous wrt MMU ops. * @flush_skip_levels: Page table levels to skip flushing. (Only diff --git a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c index ecfa23d..ca9f060 100644 --- a/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c +++ b/mali_kbase/mmu/mali_kbase_mmu_hw_direct.c @@ -170,10 +170,10 @@ static int lock_region(struct kbase_gpu_props const *gpu_props, u64 *lockaddr, static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) { const ktime_t wait_loop_start = ktime_get_raw(); - const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_as_inactive_wait_time_ms; + const u32 mmu_as_inactive_wait_time_ms = kbdev->mmu_or_gpu_cache_op_wait_time_ms; s64 diff; - if (unlikely(kbdev->as[as_nr].is_unresponsive)) + if (unlikely(kbdev->mmu_unresponsive)) return -EBUSY; do { @@ -181,7 +181,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) for (i = 0; i < 1000; i++) { /* Wait for the MMU status to indicate there is no active command */ - if (!(kbase_reg_read(kbdev, MMU_AS_REG(as_nr, AS_STATUS)) & + if (!(kbase_reg_read(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_STATUS))) & AS_STATUS_AS_ACTIVE)) return 0; } @@ -192,7 +192,7 @@ static int wait_ready(struct kbase_device *kbdev, unsigned int as_nr) dev_err(kbdev->dev, "AS_ACTIVE bit stuck for as %u. Might be caused by unstable GPU clk/pwr or faulty system", as_nr); - kbdev->as[as_nr].is_unresponsive = true; + kbdev->mmu_unresponsive = true; if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)) kbase_reset_gpu_locked(kbdev); @@ -205,7 +205,7 @@ static int write_cmd(struct kbase_device *kbdev, int as_nr, u32 cmd) const int status = wait_ready(kbdev, as_nr); if (likely(status == 0)) - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_COMMAND), cmd); + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_COMMAND)), cmd); else if (status == -EBUSY) { dev_dbg(kbdev->dev, "Skipped the wait for AS_ACTIVE bit for as %u, before sending MMU command %u", @@ -277,9 +277,8 @@ static int apply_hw_issue_GPU2019_3901_wa(struct kbase_device *kbdev, u32 *mmu_c * the workaround can be safely skipped. */ if (kbdev->pm.backend.l2_state != KBASE_L2_OFF) { - if (*mmu_cmd != AS_COMMAND_FLUSH_MEM) { - dev_warn(kbdev->dev, - "Unexpected mmu command received"); + if (unlikely(*mmu_cmd != AS_COMMAND_FLUSH_MEM)) { + dev_warn(kbdev->dev, "Unexpected MMU command(%u) received", *mmu_cmd); return -EINVAL; } @@ -341,19 +340,18 @@ void kbase_mmu_hw_configure(struct kbase_device *kbdev, struct kbase_as *as) transcfg = (transcfg | AS_TRANSCFG_PTW_SH_OS); } - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_LO), - transcfg); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSCFG_HI), + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSCFG_LO)), transcfg); + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSCFG_HI)), (transcfg >> 32) & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_LO), + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSTAB_LO)), current_setup->transtab & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_TRANSTAB_HI), + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_TRANSTAB_HI)), (current_setup->transtab >> 32) & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_LO), + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_MEMATTR_LO)), current_setup->memattr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as->number, AS_MEMATTR_HI), + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_MEMATTR_HI)), (current_setup->memattr >> 32) & 0xFFFFFFFFUL); KBASE_TLSTREAM_TL_ATTRIB_AS_CONFIG(kbdev, as, @@ -401,9 +399,9 @@ static int mmu_hw_set_lock_addr(struct kbase_device *kbdev, int as_nr, u64 *lock if (!ret) { /* Set the region that needs to be updated */ - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_LO), + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_LOCKADDR_LO)), *lock_addr & 0xFFFFFFFFUL); - kbase_reg_write(kbdev, MMU_AS_REG(as_nr, AS_LOCKADDR_HI), + kbase_reg_write(kbdev, MMU_STAGE1_REG(MMU_AS_REG(as_nr, AS_LOCKADDR_HI)), (*lock_addr >> 32) & 0xFFFFFFFFUL); } return ret; @@ -490,9 +488,11 @@ int kbase_mmu_hw_do_unlock_no_addr(struct kbase_device *kbdev, struct kbase_as * if (likely(!ret)) { u64 lock_addr = 0x0; /* read MMU_AS_CONTROL.LOCKADDR register */ - lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_HI)) + lock_addr |= (u64)kbase_reg_read( + kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_LOCKADDR_HI))) << 32; - lock_addr |= (u64)kbase_reg_read(kbdev, MMU_AS_REG(as->number, AS_LOCKADDR_LO)); + lock_addr |= (u64)kbase_reg_read( + kbdev, MMU_STAGE1_REG(MMU_AS_REG(as->number, AS_LOCKADDR_LO))); mmu_command_instr(kbdev, op_param->kctx_id, AS_COMMAND_UNLOCK, lock_addr, op_param->mmu_sync_info); @@ -572,8 +572,14 @@ static int mmu_hw_do_flush(struct kbase_device *kbdev, struct kbase_as *as, ret = apply_hw_issue_GPU2019_3901_wa(kbdev, &mmu_cmd, as->number); } - if (ret) - return ret; + if (ret) { + dev_warn( + kbdev->dev, + "Failed to apply WA for HW issue when doing MMU flush op on VA range %llx-%llx for AS %u", + op_param->vpfn << PAGE_SHIFT, + ((op_param->vpfn + op_param->nr) << PAGE_SHIFT) - 1, as->number); + /* Continue with the MMU flush operation */ + } } #endif @@ -664,7 +670,7 @@ void kbase_mmu_hw_clear_fault(struct kbase_device *kbdev, struct kbase_as *as, type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) pf_bf_mask |= MMU_BUS_ERROR(as->number); #endif - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_CLEAR), pf_bf_mask); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_CLEAR), pf_bf_mask); unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); @@ -688,15 +694,15 @@ void kbase_mmu_hw_enable_fault(struct kbase_device *kbdev, struct kbase_as *as, if (kbdev->irq_reset_flush) goto unlock; - irq_mask = kbase_reg_read(kbdev, MMU_REG(MMU_IRQ_MASK)) | - MMU_PAGE_FAULT(as->number); + irq_mask = + kbase_reg_read(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK)) | MMU_PAGE_FAULT(as->number); #if !MALI_USE_CSF if (type == KBASE_MMU_FAULT_TYPE_BUS || type == KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED) irq_mask |= MMU_BUS_ERROR(as->number); #endif - kbase_reg_write(kbdev, MMU_REG(MMU_IRQ_MASK), irq_mask); + kbase_reg_write(kbdev, MMU_CONTROL_REG(MMU_IRQ_MASK), irq_mask); unlock: spin_unlock_irqrestore(&kbdev->mmu_mask_change, flags); diff --git a/mali_kbase/platform/Kconfig b/mali_kbase/platform/Kconfig index de4203c..b190e26 100644 --- a/mali_kbase/platform/Kconfig +++ b/mali_kbase/platform/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2012-2013, 2017, 2021 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ # Add your platform specific Kconfig file here # -# "drivers/gpu/arm/midgard/platform/xxx/Kconfig" +# "$(MALI_KCONFIG_EXT_PREFIX)drivers/gpu/arm/midgard/platform/xxx/Kconfig" # # Where xxx is the platform name is the name set in MALI_PLATFORM_NAME # diff --git a/mali_kbase/platform/meson/mali_kbase_config_platform.h b/mali_kbase/platform/meson/mali_kbase_config_platform.h index 06279e2..866a7de 100644 --- a/mali_kbase/platform/meson/mali_kbase_config_platform.h +++ b/mali_kbase/platform/meson/mali_kbase_config_platform.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2014-2017, 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2014-2017, 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -20,7 +20,7 @@ */ /** - * Power management configuration + * POWER_MANAGEMENT_CALLBACKS - Power management configuration * * Attached value: pointer to @ref kbase_pm_callback_conf * Default value: See @ref kbase_pm_callback_conf @@ -28,7 +28,7 @@ #define POWER_MANAGEMENT_CALLBACKS (&pm_callbacks) /** - * Platform specific configuration functions + * PLATFORM_FUNCS - Platform specific configuration functions * * Attached value: pointer to @ref kbase_platform_funcs_conf * Default value: See @ref kbase_platform_funcs_conf @@ -38,7 +38,7 @@ extern struct kbase_pm_callback_conf pm_callbacks; /** - * Autosuspend delay + * AUTO_SUSPEND_DELAY - Autosuspend delay * * The delay time (in milliseconds) to be used for autosuspend */ diff --git a/mali_kbase/platform/pixel/pixel_gpu_sscd.c b/mali_kbase/platform/pixel/pixel_gpu_sscd.c index c65e6ce..75f3c2a 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_sscd.c +++ b/mali_kbase/platform/pixel/pixel_gpu_sscd.c @@ -119,7 +119,7 @@ static void get_fw_trace(struct kbase_device *kbdev, struct sscd_segment *seg) .version = 1, }; - tb = kbase_csf_firmware_get_trace_buffer(kbdev, FIRMWARE_LOG_BUF_NAME); + tb = kbase_csf_firmware_get_trace_buffer(kbdev, KBASE_CSFFW_LOG_BUF_NAME); if (tb == NULL) { dev_err(kbdev->dev, "pixel: failed to open firmware trace buffer"); diff --git a/mali_kbase/tests/Kbuild b/mali_kbase/tests/Kbuild index 38e4dd4..72ca70a 100644 --- a/mali_kbase/tests/Kbuild +++ b/mali_kbase/tests/Kbuild @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -17,6 +17,7 @@ # http://www.gnu.org/licenses/gpl-2.0.html. # # +src:=$(if $(patsubst /%,,$(src)),$(srctree)/$(src),$(src)) ccflags-y += -I$(src)/include \ -I$(src) @@ -29,3 +30,4 @@ obj-$(CONFIG_MALI_KUTF_IRQ_TEST) += mali_kutf_irq_test/ obj-$(CONFIG_MALI_KUTF_CLK_RATE_TRACE) += mali_kutf_clk_rate_trace/kernel/ obj-$(CONFIG_MALI_KUTF_MGM_INTEGRATION) += mali_kutf_mgm_integration_test/ + diff --git a/mali_kbase/tests/Kconfig b/mali_kbase/tests/Kconfig index e6f0376..f100901 100644 --- a/mali_kbase/tests/Kconfig +++ b/mali_kbase/tests/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2017, 2020-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2017, 2020-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -65,5 +65,6 @@ config MALI_KUTF_MGM_INTEGRATION_TEST - mali_kutf_mgm_integration_test.ko + comment "Enable MALI_DEBUG for KUTF modules support" depends on MALI_MIDGARD && !MALI_DEBUG && MALI_KUTF diff --git a/mali_kbase/tests/Mconfig b/mali_kbase/tests/Mconfig index d81c639..aa09274 100644 --- a/mali_kbase/tests/Mconfig +++ b/mali_kbase/tests/Mconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # -# (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved. +# (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved. # # This program is free software and is provided to you under the terms of the # GNU General Public License version 2 as published by the Free Software @@ -65,6 +65,7 @@ config MALI_KUTF_MGM_INTEGRATION_TEST - mali_kutf_mgm_integration_test.ko + # Enable MALI_DEBUG for KUTF modules support config UNIT_TEST_KERNEL_MODULES diff --git a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c index a6f54b6..8b86fb0 100644 --- a/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c +++ b/mali_kbase/tests/mali_kutf_clk_rate_trace/kernel/mali_kutf_clk_rate_trace_test.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2020-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -442,8 +442,9 @@ static const char *kutf_clk_trace_do_get_platform( #if defined(CONFIG_MALI_ARBITER_SUPPORT) && defined(CONFIG_OF) struct kutf_clk_rate_trace_fixture_data *data = context->fixture; - arbiter_if_node = - of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); + arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter-if", NULL); + if (!arbiter_if_node) + arbiter_if_node = of_get_property(data->kbdev->dev->of_node, "arbiter_if", NULL); #endif if (arbiter_if_node) { power_node = of_find_compatible_node(NULL, NULL, diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c index 1e636b9..20f7496 100644 --- a/mali_kbase/thirdparty/mali_kbase_mmap.c +++ b/mali_kbase/thirdparty/mali_kbase_mmap.c @@ -303,8 +303,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, * is no free region at the address found originally by too large a * same_va_end_addr here, and will fail the allocation gracefully. */ - struct kbase_reg_zone *zone = - kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA); + struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, SAME_VA_ZONE); u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT; #if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE) const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags); @@ -386,7 +385,7 @@ unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx, #ifndef CONFIG_64BIT } else { return current->mm->get_unmapped_area( - kctx->filp, addr, len, pgoff, flags); + kctx->kfile->filp, addr, len, pgoff, flags); #endif } |