diff options
Diffstat (limited to 'mali_kbase/mali_kbase_defs.h')
-rw-r--r-- | mali_kbase/mali_kbase_defs.h | 512 |
1 files changed, 377 insertions, 135 deletions
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h index 25e4f32..bdc3f6d 100644 --- a/mali_kbase/mali_kbase_defs.h +++ b/mali_kbase/mali_kbase_defs.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* * - * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -35,13 +35,13 @@ #include <backend/gpu/mali_kbase_instr_defs.h> #include <mali_kbase_pm.h> #include <mali_kbase_gpuprops_types.h> -#include <mali_kbase_hwcnt_watchdog_if.h> +#include <hwcnt/mali_kbase_hwcnt_watchdog_if.h> #if MALI_USE_CSF -#include <mali_kbase_hwcnt_backend_csf.h> +#include <hwcnt/backend/mali_kbase_hwcnt_backend_csf.h> #else -#include <mali_kbase_hwcnt_backend_jm.h> -#include <mali_kbase_hwcnt_backend_jm_watchdog.h> +#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h> +#include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h> #endif #include <protected_mode_switcher.h> @@ -53,11 +53,7 @@ #include <linux/sizes.h> #include <linux/rtmutex.h> -#if defined(CONFIG_SYNC) -#include <sync.h> -#else #include "mali_kbase_fence_defs.h" -#endif #if IS_ENABLED(CONFIG_DEBUG_FS) #include <linux/debugfs.h> @@ -154,8 +150,7 @@ /* Maximum number of pages of memory that require a permanent mapping, per * kbase_context */ -#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((32 * 1024ul * 1024ul) >> \ - PAGE_SHIFT) +#define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT) /* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer * clients, to reduce undesired system load. * If a virtualizer client requests a dump within this threshold period after @@ -188,6 +183,60 @@ struct kbase_as; struct kbase_mmu_setup; struct kbase_kinstr_jm; +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) +/** + * struct kbase_gpu_metrics - Object containing members that are used to emit + * GPU metrics tracepoints for all applications that + * created Kbase context(s) for a GPU. + * + * @active_list: List of applications that did some GPU activity in the recent work period. + * @inactive_list: List of applications that didn't do any GPU activity in the recent work period. + */ +struct kbase_gpu_metrics { + struct list_head active_list; + struct list_head inactive_list; +}; + +/** + * struct kbase_gpu_metrics_ctx - Object created for every application, that created + * Kbase context(s), containing members that are used + * to emit GPU metrics tracepoints for the application. + * + * @link: Links the object in kbase_device::gpu_metrics::active_list + * or kbase_device::gpu_metrics::inactive_list. + * @first_active_start_time: Records the time at which the application first became + * active in the current work period. + * @last_active_start_time: Records the time at which the application last became + * active in the current work period. + * @last_active_end_time: Records the time at which the application last became + * inactive in the current work period. + * @total_active: Tracks the time for which application has been active + * in the current work period. + * @prev_wp_active_end_time: Records the time at which the application last became + * inactive in the previous work period. + * @aid: Unique identifier for an application. + * @kctx_count: Counter to keep a track of the number of Kbase contexts + * created for an application. There may be multiple Kbase + * contexts contributing GPU activity data to a single GPU + * metrics context. + * @active_cnt: Counter that is updated every time the GPU activity starts + * and ends in the current work period for an application. + * @flags: Flags to track the state of GPU metrics context. + */ +struct kbase_gpu_metrics_ctx { + struct list_head link; + u64 first_active_start_time; + u64 last_active_start_time; + u64 last_active_end_time; + u64 total_active; + u64 prev_wp_active_end_time; + unsigned int aid; + unsigned int kctx_count; + u8 active_cnt; + u8 flags; +}; +#endif + /** * struct kbase_io_access - holds information about 1 register access * @@ -269,12 +318,25 @@ struct kbase_fault { bool protected_mode; }; +/** Maximum number of memory pages that should be allocated for the array + * of pointers to free PGDs. + * + * This number has been pre-calculated to deal with the maximum allocation + * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE. + * This is supposed to be enough for almost the entirety of MMU operations. + * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down + * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE + * bytes. + * + * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes. + */ +#define MAX_PAGES_FOR_FREE_PGDS ((size_t)9) + +/* Maximum number of pointers to free PGDs */ +#define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS) + /** * struct kbase_mmu_table - object representing a set of GPU page tables - * @mmu_teardown_pages: Array containing pointers to 3 separate pages, used - * to cache the entries of top (L0) & intermediate level - * page tables (L1 & L2) to avoid repeated calls to - * kmap_atomic() during the MMU teardown. * @mmu_lock: Lock to serialize the accesses made to multi level GPU * page tables * @pgd: Physical address of the page allocated for the top @@ -286,29 +348,106 @@ struct kbase_fault { * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). * @kctx: If this set of MMU tables belongs to a context then * this is a back-reference to the context, otherwise - * it is NULL + * it is NULL. + * @scratch_mem: Scratch memory used for MMU operations, which are + * serialized by the @mmu_lock. */ struct kbase_mmu_table { - u64 *mmu_teardown_pages[MIDGARD_MMU_BOTTOMLEVEL]; struct rt_mutex mmu_lock; phys_addr_t pgd; u8 group_id; struct kbase_context *kctx; + union { + /** + * @teardown_pages: Scratch memory used for backup copies of whole + * PGD pages when tearing down levels upon + * termination of the MMU table. + */ + struct { + /** + * @levels: Array of PGD pages, large enough to copy one PGD + * for each level of the MMU table. + */ + u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)]; + } teardown_pages; + /** + * @free_pgds: Scratch memory used for insertion, update and teardown + * operations to store a temporary list of PGDs to be freed + * at the end of the operation. + */ + struct { + /** @pgds: Array of pointers to PGDs to free. */ + struct page *pgds[MAX_FREE_PGDS]; + /** @head_index: Index of first free element in the PGDs array. */ + size_t head_index; + } free_pgds; + } scratch_mem; +}; + +/** + * enum kbase_memory_zone - Kbase memory zone identifier + * @SAME_VA_ZONE: Memory zone for allocations where the GPU and CPU VA coincide. + * @CUSTOM_VA_ZONE: When operating in compatibility mode, this zone is used to + * allow 32-bit userspace (either on a 32-bit device or a + * 32-bit application on a 64-bit device) to address the entirety + * of the GPU address space. The @CUSTOM_VA_ZONE is also used + * for JIT allocations: on 64-bit systems, the zone is created + * by reducing the size of the SAME_VA zone by a user-controlled + * amount, whereas on 32-bit systems, it is created as part of + * the existing CUSTOM_VA_ZONE + * @EXEC_VA_ZONE: Memory zone used to track GPU-executable memory. The start + * and end of this zone depend on the individual platform, + * and it is initialized upon user process request. + * @EXEC_FIXED_VA_ZONE: Memory zone used to contain GPU-executable memory + * that also permits FIXED/FIXABLE allocations. + * @FIXED_VA_ZONE: Memory zone used to allocate memory at userspace-supplied + * addresses. + * @MCU_SHARED_ZONE: Memory zone created for mappings shared between the MCU + * and Kbase. Currently this is the only zone type that is + * created on a per-device, rather than a per-context + * basis. + * @MEMORY_ZONE_MAX: Sentinel value used for iterating over all the memory zone + * identifiers. + * @CONTEXT_ZONE_MAX: Sentinel value used to keep track of the last per-context + * zone for iteration. + */ +enum kbase_memory_zone { + SAME_VA_ZONE, + CUSTOM_VA_ZONE, + EXEC_VA_ZONE, +#if IS_ENABLED(MALI_USE_CSF) + EXEC_FIXED_VA_ZONE, + FIXED_VA_ZONE, + MCU_SHARED_ZONE, +#endif + MEMORY_ZONE_MAX, +#if IS_ENABLED(MALI_USE_CSF) + CONTEXT_ZONE_MAX = FIXED_VA_ZONE + 1 +#else + CONTEXT_ZONE_MAX = EXEC_VA_ZONE + 1 +#endif }; /** - * struct kbase_reg_zone - Information about GPU memory region zones + * struct kbase_reg_zone - GPU memory zone information and region tracking + * @reg_rbtree: RB tree used to track kbase memory regions. * @base_pfn: Page Frame Number in GPU virtual address space for the start of * the Zone * @va_size_pages: Size of the Zone in pages + * @id: Memory zone identifier + * @cache: Pointer to a per-device slab allocator to allow for quickly allocating + * new regions * * Track information about a zone KBASE_REG_ZONE() and related macros. * In future, this could also store the &rb_root that are currently in * &kbase_context and &kbase_csf_device. */ struct kbase_reg_zone { + struct rb_root reg_rbtree; u64 base_pfn; u64 va_size_pages; + enum kbase_memory_zone id; + struct kmem_cache *cache; }; #if MALI_USE_CSF @@ -317,6 +456,8 @@ struct kbase_reg_zone { #include "jm/mali_kbase_jm_defs.h" #endif +#include "mali_kbase_hwaccess_time.h" + static inline int kbase_as_has_bus_fault(struct kbase_as *as, struct kbase_fault *fault) { @@ -403,7 +544,15 @@ struct kbase_clk_rate_trace_manager { * Note that some code paths keep shaders/the tiler * powered whilst this is 0. * Use kbase_pm_is_active() instead to check for such cases. - * @suspending: Flag indicating suspending/suspended + * @suspending: Flag set to true when System suspend of GPU device begins and + * set to false only when System resume of GPU device starts. + * So GPU device could be in suspended state while the flag is set. + * The flag is updated with @lock held. + * @resuming: Flag set to true when System resume of GPU device starts and is set + * to false when resume ends. The flag is set to true at the same time + * when @suspending is set to false with @lock held. + * The flag is currently used only to prevent Kbase context termination + * during System resume of GPU device. * @runtime_active: Flag to track if the GPU is in runtime suspended or active * state. This ensures that runtime_put and runtime_get * functions are called in pairs. For example if runtime_get @@ -414,7 +563,7 @@ struct kbase_clk_rate_trace_manager { * This structure contains data for the power management framework. * There is one instance of this structure per device in the system. * @zero_active_count_wait: Wait queue set when active_count == 0 - * @resume_wait: system resume of GPU device. + * @resume_wait: Wait queue to wait for the System suspend/resume of GPU device. * @debug_core_mask: Bit masks identifying the available shader cores that are * specified via sysfs. One mask per job slot. * @debug_core_mask_all: Bit masks identifying the available shader cores that @@ -432,9 +581,10 @@ struct kbase_clk_rate_trace_manager { * @clk_rtm: The state of the GPU clock rate trace manager */ struct kbase_pm_device_data { - struct mutex lock; + struct rt_mutex lock; int active_count; bool suspending; + bool resuming; #if MALI_USE_CSF bool runtime_active; #endif @@ -465,36 +615,40 @@ struct kbase_pm_device_data { /** * struct kbase_mem_pool - Page based memory pool for kctx/kbdev - * @kbdev: Kbase device where memory is used - * @cur_size: Number of free pages currently in the pool (may exceed - * @max_size in some corner cases) - * @max_size: Maximum number of free pages in the pool - * @order: order = 0 refers to a pool of 4 KB pages - * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) - * @group_id: A memory group ID to be passed to a platform-specific - * memory group manager, if present. Immutable. - * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). - * @pool_lock: Lock protecting the pool - must be held when modifying - * @cur_size and @page_list - * @page_list: List of free pages in the pool - * @reclaim: Shrinker for kernel reclaim of free pages - * @next_pool: Pointer to next pool where pages can be allocated when this - * pool is empty. Pages will spill over to the next pool when - * this pool is full. Can be NULL if there is no next pool. - * @dying: true if the pool is being terminated, and any ongoing - * operations should be abandoned - * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from - * this pool, eg during a grow operation + * @kbdev: Kbase device where memory is used + * @cur_size: Number of free pages currently in the pool (may exceed + * @max_size in some corner cases) + * @max_size: Maximum number of free pages in the pool + * @order: order = 0 refers to a pool of 4 KB pages + * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB) + * @group_id: A memory group ID to be passed to a platform-specific + * memory group manager, if present. Immutable. + * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). + * @pool_lock: Lock protecting the pool - must be held when modifying + * @cur_size and @page_list + * @page_list: List of free pages in the pool + * @reclaim: Shrinker for kernel reclaim of free pages + * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation. + * This is used to avoid race condition between pool termination + * and page isolation for page migration. + * @next_pool: Pointer to next pool where pages can be allocated when this + * pool is empty. Pages will spill over to the next pool when + * this pool is full. Can be NULL if there is no next pool. + * @dying: true if the pool is being terminated, and any ongoing + * operations should be abandoned + * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from + * this pool, eg during a grow operation */ struct kbase_mem_pool { struct kbase_device *kbdev; - size_t cur_size; - size_t max_size; - u8 order; - u8 group_id; - spinlock_t pool_lock; - struct list_head page_list; - struct shrinker reclaim; + size_t cur_size; + size_t max_size; + u8 order; + u8 group_id; + spinlock_t pool_lock; + struct list_head page_list; + struct shrinker reclaim; + atomic_t isolation_in_progress_cnt; struct kbase_mem_pool *next_pool; @@ -581,7 +735,7 @@ struct kbase_devfreq_opp { * @entry_set_pte: program the pte to be a valid entry to encode the physical * address of the next lower level page table and also update * the number of valid entries. - * @entry_invalidate: clear out or invalidate the pte. + * @entries_invalidate: clear out or invalidate a range of ptes. * @get_num_valid_entries: returns the number of valid entries for a specific pgd. * @set_num_valid_entries: sets the number of valid entries for a specific pgd * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants. @@ -598,8 +752,8 @@ struct kbase_mmu_mode { int (*pte_is_valid)(u64 pte, int level); void (*entry_set_ate)(u64 *entry, struct tagged_addr phy, unsigned long flags, int level); - void (*entry_set_pte)(u64 *pgd, u64 vpfn, phys_addr_t phy); - void (*entry_invalidate)(u64 *entry); + void (*entry_set_pte)(u64 *entry, phys_addr_t phy); + void (*entries_invalidate)(u64 *entry, u32 count); unsigned int (*get_num_valid_entries)(u64 *pgd); void (*set_num_valid_entries)(u64 *pgd, unsigned int num_of_valid_entries); @@ -675,6 +829,33 @@ struct kbase_process { }; /** + * struct kbase_mem_migrate - Object representing an instance for managing + * page migration. + * + * @free_pages_list: List of deferred pages to free. Mostly used when page migration + * is enabled. Pages in memory pool that require migrating + * will be freed instead. However page cannot be freed + * right away as Linux will need to release the page lock. + * Therefore page will be added to this list and freed later. + * @free_pages_lock: This lock should be held when adding or removing pages + * from @free_pages_list. + * @free_pages_workq: Work queue to process the work items queued to free + * pages in @free_pages_list. + * @free_pages_work: Work item to free pages in @free_pages_list. + * @inode: Pointer to inode whose address space operations are used + * for page migration purposes. + */ +struct kbase_mem_migrate { + struct list_head free_pages_list; + spinlock_t free_pages_lock; + struct workqueue_struct *free_pages_workq; + struct work_struct free_pages_work; +#if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE) + struct inode *inode; +#endif +}; + +/** * struct kbase_device - Object representing an instance of GPU platform device, * allocated from the probe method of mali driver. * @hw_quirks_sc: Configuration to be used for the shader cores as per @@ -712,6 +893,10 @@ struct kbase_process { * @opp_token: Token linked to the device OPP structure maintaining the * link to OPPs attached to a device. This is obtained * after setting regulator names for the device. + * @token: Integer replacement for opp_table in kernel versions + * 6 and greater. Value is a token id number when 0 or greater, + * and a linux errno when negative. Must be initialised + * to an non-zero value as 0 is valid token id. * @devname: string containing the name used for GPU device instance, * miscellaneous device is registered using the same name. * @id: Unique identifier for the device, indicates the number of @@ -752,12 +937,18 @@ struct kbase_process { * to the GPU device. This points to an internal memory * group manager if no platform-specific memory group * manager was retrieved through device tree. + * @mmu_unresponsive: Flag to indicate MMU is not responding. + * Set if a MMU command isn't completed within + * &kbase_device:mmu_or_gpu_cache_op_wait_time_ms. + * Clear by kbase_ctx_sched_restore_all_as() after GPU reset completes. * @as: Array of objects representing address spaces of GPU. - * @as_free: Bitpattern of free/available GPU address spaces. * @as_to_kctx: Array of pointers to struct kbase_context, having * GPU adrress spaces assigned to them. + * @as_free: Bitpattern of free/available GPU address spaces. * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask * register used in the handling of Bus & Page faults. + * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are + * supported and used where possible. * @gpu_props: Object containing complete information about the * configuration/properties of GPU HW device in use. * @hw_issues_mask: List of SW workarounds for HW issues @@ -803,6 +994,7 @@ struct kbase_process { * GPU reset. * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used * to calculate suitable timeouts for wait operations. + * @backend_time: Kbase backend time related attributes. * @cache_clean_in_progress: Set when a cache clean has been started, and * cleared when it has finished. This prevents multiple * cache cleans being done simultaneously. @@ -909,6 +1101,10 @@ struct kbase_process { * GPU2019-3878. PM state machine is invoked after * clearing this flag and @hwaccess_lock is used to * serialize the access. + * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction + * and cleared after the transaction completes. PM L2 state is + * prevented from entering powering up/down transitions when the + * flag is set, @hwaccess_lock is used to serialize the access. * @poweroff_pending: Set when power off operation for GPU is started, reset when * power on for GPU is started. * @infinite_cache_active_default: Set to enable using infinite cache for all the @@ -978,11 +1174,8 @@ struct kbase_process { * @total_gpu_pages for both native and dma-buf imported * allocations. * @job_done_worker: Worker for job_done work. - * @job_done_worker_thread: Thread for job_done work. * @event_worker: Worker for event work. - * @event_worker_thread: Thread for event work. * @apc.worker: Worker for async power control work. - * @apc.thread: Thread for async power control work. * @apc.power_on_work: Work struct for powering on the GPU. * @apc.power_off_work: Work struct for powering off the GPU. * @apc.end_ts: The latest end timestamp to power off the GPU. @@ -1002,6 +1195,16 @@ struct kbase_process { * @oom_notifier_block: notifier_block containing kernel-registered out-of- * memory handler. * @proc_sysfs_node: Sysfs directory node to store per-process stats. + * @mem_migrate: Per device object for managing page migration. + * @live_fence_metadata: Count of live fence metadata structures created by + * KCPU queue. These structures may outlive kbase module + * itself. Therefore, in such a case, a warning should be + * be produced. + * @mmu_or_gpu_cache_op_wait_time_ms: Maximum waiting time in ms for the completion of + * a cache operation via MMU_AS_CONTROL or GPU_CONTROL. + * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures. + * @fence_signal_timeout_enabled: Global flag for whether fence signal timeout tracking + * is enabled. */ struct kbase_device { u32 hw_quirks_sc; @@ -1026,12 +1229,16 @@ struct kbase_device { #if IS_ENABLED(CONFIG_REGULATOR) struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS]; unsigned int nr_regulators; - int opp_token; +#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) + int token; +#elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE) + struct opp_table *opp_table; +#endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */ #endif /* CONFIG_REGULATOR */ char devname[DEVNAME_SIZE]; u32 id; -#if IS_ENABLED(CONFIG_MALI_NO_MALI) +#if !IS_ENABLED(CONFIG_MALI_REAL_HW) void *model; struct kmem_cache *irq_slab; struct workqueue_struct *irq_workq; @@ -1039,7 +1246,7 @@ struct kbase_device { atomic_t serving_gpu_irq; atomic_t serving_mmu_irq; spinlock_t reg_op_lock; -#endif /* CONFIG_MALI_NO_MALI */ +#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */ struct kbase_pm_device_data pm; struct kbase_mem_pool_group mem_pools; @@ -1048,12 +1255,15 @@ struct kbase_device { struct memory_group_manager_device *mgm_dev; + bool mmu_unresponsive; struct kbase_as as[BASE_MAX_NR_AS]; - u16 as_free; struct kbase_context *as_to_kctx[BASE_MAX_NR_AS]; + u16 as_free; spinlock_t mmu_mask_change; + bool pagesize_2mb; + struct kbase_gpu_props gpu_props; unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG]; @@ -1067,6 +1277,12 @@ struct kbase_device { s8 nr_hw_address_spaces; s8 nr_user_address_spaces; + /** + * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to + * restore to L2_CONFIG upon GPU reset. + */ + u8 pbha_propagate_bits; + #if MALI_USE_CSF struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw; #else @@ -1101,6 +1317,8 @@ struct kbase_device { u64 lowest_gpu_freq_khz; + struct kbase_backend_time backend_time; + bool cache_clean_in_progress; u32 cache_clean_queued; wait_queue_head_t cache_clean_wait; @@ -1148,7 +1366,9 @@ struct kbase_device { #endif /* CONFIG_MALI_DEVFREQ */ unsigned long previous_frequency; +#if !MALI_USE_CSF atomic_t job_fault_debug; +#endif /* !MALI_USE_CSF */ #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *mali_debugfs_directory; @@ -1159,11 +1379,13 @@ struct kbase_device { u64 debugfs_as_read_bitmap; #endif /* CONFIG_MALI_DEBUG */ +#if !MALI_USE_CSF wait_queue_head_t job_fault_wq; wait_queue_head_t job_fault_resume_wq; struct workqueue_struct *job_fault_resume_workq; struct list_head job_fault_event_list; spinlock_t job_fault_event_lock; +#endif /* !MALI_USE_CSF */ #if !MALI_CUSTOMER_RELEASE struct { @@ -1185,13 +1407,11 @@ struct kbase_device { #if MALI_USE_CSF bool mmu_hw_operation_in_progress; #endif + bool mmu_page_migrate_in_progress; bool poweroff_pending; -#if (KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE) bool infinite_cache_active_default; -#else - u32 infinite_cache_active_default; -#endif + struct kbase_mem_pool_group_config mem_pool_defaults; u32 current_gpu_coherency_mode; @@ -1240,9 +1460,7 @@ struct kbase_device { struct kbasep_js_device_data js_data; struct kthread_worker job_done_worker; - struct task_struct *job_done_worker_thread; struct kthread_worker event_worker; - struct task_struct *event_worker_thread; /* See KBASE_JS_*_PRIORITY_MODE for details. */ u32 js_ctx_scheduling_mode; @@ -1258,7 +1476,6 @@ struct kbase_device { struct { struct kthread_worker worker; - struct task_struct *thread; struct kthread_work power_on_work; struct kthread_work power_off_work; ktime_t end_ts; @@ -1292,6 +1509,24 @@ struct kbase_device { struct notifier_block oom_notifier_block; struct kobject *proc_sysfs_node; + + struct kbase_mem_migrate mem_migrate; + +#if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE) + atomic_t live_fence_metadata; +#endif + u32 mmu_or_gpu_cache_op_wait_time_ms; + struct kmem_cache *va_region_slab; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics: GPU device wide structure used for emitting GPU metrics tracepoints. + */ + struct kbase_gpu_metrics gpu_metrics; +#endif +#if MALI_USE_CSF + atomic_t fence_signal_timeout_enabled; +#endif }; /** @@ -1308,6 +1543,9 @@ struct kbase_device { * @KBASE_FILE_COMPLETE: Indicates if the setup for context has * completed, i.e. flags have been set for the * context. + * @KBASE_FILE_DESTROY_CTX: Indicates that destroying of context has begun or + * is complete. This state can only be reached after + * @KBASE_FILE_COMPLETE. * * The driver allows only limited interaction with user-space until setup * is complete. @@ -1317,7 +1555,8 @@ enum kbase_file_state { KBASE_FILE_VSN_IN_PROGRESS, KBASE_FILE_NEED_CTX, KBASE_FILE_CTX_IN_PROGRESS, - KBASE_FILE_COMPLETE + KBASE_FILE_COMPLETE, + KBASE_FILE_DESTROY_CTX }; /** @@ -1327,6 +1566,12 @@ enum kbase_file_state { * allocated from the probe method of the Mali driver. * @filp: Pointer to the struct file corresponding to device file * /dev/malixx instance, passed to the file's open method. + * @owner: Pointer to the file table structure of a process that + * created the instance of /dev/malixx device file. Set to + * NULL when that process closes the file instance. No more + * file operations would be allowed once set to NULL. + * It would be updated only in the Userspace context, i.e. + * when @kbase_open or @kbase_flush is called. * @kctx: Object representing an entity, among which GPU is * scheduled and which gets its own GPU address space. * Invalid until @setup_state is KBASE_FILE_COMPLETE. @@ -1335,13 +1580,44 @@ enum kbase_file_state { * @setup_state is KBASE_FILE_NEED_CTX. * @setup_state: Initialization state of the file. Values come from * the kbase_file_state enumeration. + * @destroy_kctx_work: Work item for destroying the @kctx, enqueued only when + * @fops_count and @map_count becomes zero after /dev/malixx + * file was previously closed by the @owner. + * @lock: Lock to serialize the access to members like @owner, @fops_count, + * @map_count. + * @fops_count: Counter that is incremented at the beginning of a method + * defined for @kbase_fops and is decremented at the end. + * So the counter keeps a track of the file operations in progress + * for /dev/malixx file, that are being handled by the Kbase. + * The counter is needed to defer the context termination as + * Userspace can close the /dev/malixx file and flush() method + * can get called when some other file operation is in progress. + * @map_count: Counter to keep a track of the memory mappings present on + * /dev/malixx file instance. The counter is needed to defer the + * context termination as Userspace can close the /dev/malixx + * file and flush() method can get called when mappings are still + * present. + * @zero_fops_count_wait: Waitqueue used to wait for the @fops_count to become 0. + * Currently needed only for the "mem_view" debugfs file. + * @event_queue: Wait queue used for blocking the thread, which consumes + * the base_jd_event corresponding to an atom, when there + * are no more posted events. */ struct kbase_file { struct kbase_device *kbdev; struct file *filp; + fl_owner_t owner; struct kbase_context *kctx; unsigned long api_version; atomic_t setup_state; + struct work_struct destroy_kctx_work; + spinlock_t lock; + int fops_count; + int map_count; +#if IS_ENABLED(CONFIG_DEBUG_FS) + wait_queue_head_t zero_fops_count_wait; +#endif + wait_queue_head_t event_queue; }; #if MALI_JIT_PRESSURE_LIMIT_BASE /** @@ -1374,10 +1650,6 @@ struct kbase_file { * * @KCTX_DYING: Set when the context process is in the process of being evicted. * - * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this - * context, to disable use of implicit dma-buf fences. This is used to avoid - * potential synchronization deadlocks. - * * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory * allocations. For 64-bit clients it is enabled by default, and disabled by * default on 32-bit clients. Being able to clear this flag is only used for @@ -1420,7 +1692,6 @@ enum kbase_context_flags { KCTX_PRIVILEGED = 1U << 7, KCTX_SCHEDULED = 1U << 8, KCTX_DYING = 1U << 9, - KCTX_NO_IMPLICIT_SYNC = 1U << 10, KCTX_FORCE_SAME_VA = 1U << 11, KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, @@ -1459,9 +1730,6 @@ enum kbase_context_flags { * * @KCTX_DYING: Set when the context process is in the process of being evicted. * - * @KCTX_NO_IMPLICIT_SYNC: Set when explicit Android fences are in use on this - * context, to disable use of implicit dma-buf fences. This is used to avoid - * potential synchronization deadlocks. * * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory * allocations. For 64-bit clients it is enabled by default, and disabled by @@ -1502,7 +1770,6 @@ enum kbase_context_flags { KCTX_PRIVILEGED = 1U << 7, KCTX_SCHEDULED = 1U << 8, KCTX_DYING = 1U << 9, - KCTX_NO_IMPLICIT_SYNC = 1U << 10, KCTX_FORCE_SAME_VA = 1U << 11, KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12, KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13, @@ -1520,8 +1787,8 @@ struct kbase_sub_alloc { /** * struct kbase_context - Kernel base context * - * @filp: Pointer to the struct file corresponding to device file - * /dev/malixx instance, passed to the file's open method. + * @kfile: Pointer to the object representing the /dev/malixx device + * file instance. * @kbdev: Pointer to the Kbase device for which the context is created. * @kctx_list_link: Node into Kbase device list of contexts. * @mmu: Structure holding details of the MMU tables for this @@ -1556,22 +1823,6 @@ struct kbase_sub_alloc { * for the allocations >= 2 MB in size. * @reg_lock: Lock used for GPU virtual address space management operations, * like adding/freeing a memory region in the address space. - * Can be converted to a rwlock ?. - * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA - * zone of the GPU virtual address space. Used for allocations - * having the same value for GPU & CPU virtual address. - * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA - * zone of the GPU virtual address space. - * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA - * zone of the GPU virtual address space. Used for GPU-executable - * allocations which don't need the SAME_VA property. - * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the - * EXEC_FIXED_VA zone of the GPU virtual address space. Used for - * GPU-executable allocations with FIXED/FIXABLE GPU virtual - * addresses. - * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone - * of the GPU virtual address space. Used for allocations with - * FIXED/FIXABLE GPU virtual addresses. * @num_fixable_allocs: A count for the number of memory allocations with the * BASE_MEM_FIXABLE property. * @num_fixed_allocs: A count for the number of memory allocations with the @@ -1588,9 +1839,6 @@ struct kbase_sub_alloc { * used in conjunction with @cookies bitmask mainly for * providing a mechansim to have the same value for CPU & * GPU virtual address. - * @event_queue: Wait queue used for blocking the thread, which consumes - * the base_jd_event corresponding to an atom, when there - * are no more posted events. * @tgid: Thread group ID of the process whose thread created * the context (by calling KBASE_IOCTL_VERSION_CHECK or * KBASE_IOCTL_SET_FLAGS, depending on the @api_version). @@ -1652,11 +1900,13 @@ struct kbase_sub_alloc { * is scheduled in and an atom is pulled from the context's per * slot runnable tree in JM GPU or GPU command queue * group is programmed on CSG slot in CSF GPU. - * @mm_update_lock: lock used for handling of special tracking page. * @process_mm: Pointer to the memory descriptor of the process which * created the context. Used for accounting the physical * pages used for GPU allocations, done for the context, - * to the memory consumed by the process. + * to the memory consumed by the process. A reference is taken + * on this descriptor for the Userspace created contexts so that + * Kbase can safely access it to update the memory usage counters. + * The reference is dropped on context termination. * @gpu_va_end: End address of the GPU va space (in 4KB page units) * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all * tiler heaps of the kbase context. @@ -1707,12 +1957,6 @@ struct kbase_sub_alloc { * memory allocations. * @jit_current_allocations_per_bin: Current number of in-flight just-in-time * memory allocations per bin. - * @jit_version: Version number indicating whether userspace is using - * old or new version of interface for just-in-time - * memory allocations. - * 1 -> client used KBASE_IOCTL_MEM_JIT_INIT_10_2 - * 2 -> client used KBASE_IOCTL_MEM_JIT_INIT_11_5 - * 3 -> client used KBASE_IOCTL_MEM_JIT_INIT * @jit_group_id: A memory group ID to be passed to a platform-specific * memory group manager. * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1). @@ -1784,6 +2028,11 @@ struct kbase_sub_alloc { * @limited_core_mask: The mask that is applied to the affinity in case of atoms * marked with BASE_JD_REQ_LIMITED_CORE_MASK. * @platform_data: Pointer to platform specific per-context data. + * @task: Pointer to the task structure of the main thread of the process + * that created the Kbase context. It would be set only for the + * contexts created by the Userspace and not for the contexts + * created internally by the Kbase. + * @comm: Record the process name * * A kernel base context is an entity among which the GPU is scheduled. * Each context has its own GPU address space. @@ -1792,7 +2041,7 @@ struct kbase_sub_alloc { * is made on the device file. */ struct kbase_context { - struct file *filp; + struct kbase_file *kfile; struct kbase_device *kbdev; struct list_head kctx_list_link; struct kbase_mmu_table mmu; @@ -1817,17 +2066,11 @@ struct kbase_context { struct list_head mem_partials; struct mutex reg_lock; - - struct rb_root reg_rbtree_same; - struct rb_root reg_rbtree_custom; - struct rb_root reg_rbtree_exec; #if MALI_USE_CSF - struct rb_root reg_rbtree_exec_fixed; - struct rb_root reg_rbtree_fixed; atomic64_t num_fixable_allocs; atomic64_t num_fixed_allocs; #endif - struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX]; + struct kbase_reg_zone reg_zone[CONTEXT_ZONE_MAX]; #if MALI_USE_CSF struct kbase_csf_context csf; @@ -1851,7 +2094,6 @@ struct kbase_context { DECLARE_BITMAP(cookies, BITS_PER_LONG); struct kbase_va_region *pending_regions[BITS_PER_LONG]; - wait_queue_head_t event_queue; pid_t tgid; pid_t pid; atomic_t used_pages; @@ -1866,19 +2108,12 @@ struct kbase_context { struct list_head waiting_soft_jobs; spinlock_t waiting_soft_jobs_lock; -#ifdef CONFIG_MALI_DMA_FENCE - struct { - struct list_head waiting_resource; - struct workqueue_struct *wq; - } dma_fence; -#endif /* CONFIG_MALI_DMA_FENCE */ int as_nr; atomic_t refcount; - spinlock_t mm_update_lock; - struct mm_struct __rcu *process_mm; + struct mm_struct *process_mm; u64 gpu_va_end; #if MALI_USE_CSF u32 running_total_tiler_heap_nr_chunks; @@ -1903,7 +2138,6 @@ struct kbase_context { u8 jit_max_allocations; u8 jit_current_allocations; u8 jit_current_allocations_per_bin[256]; - u8 jit_version; u8 jit_group_id; #if MALI_JIT_PRESSURE_LIMIT_BASE u64 jit_phys_pages_limit; @@ -1939,9 +2173,19 @@ struct kbase_context { u64 limited_core_mask; -#if !MALI_USE_CSF void *platform_data; + + struct task_struct *task; + +#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) + /** + * @gpu_metrics_ctx: Pointer to the GPU metrics context corresponding to the + * application that created the Kbase context. + */ + struct kbase_gpu_metrics_ctx *gpu_metrics_ctx; #endif + + char comm[TASK_COMM_LEN]; }; #ifdef CONFIG_MALI_CINSTR_GWT @@ -1970,17 +2214,15 @@ struct kbasep_gwt_list_element { * to a @kbase_context. * @ext_res_node: List head for adding the metadata to a * @kbase_context. - * @alloc: The physical memory allocation structure - * which is mapped. - * @gpu_addr: The GPU virtual address the resource is - * mapped to. + * @reg: External resource information, containing + * the corresponding VA region * @ref: Reference count. * * External resources can be mapped into multiple contexts as well as the same * context multiple times. - * As kbase_va_region itself isn't refcounted we can't attach our extra - * information to it as it could be removed under our feet leaving external - * resources pinned. + * As kbase_va_region is refcounted, we guarantee that it will be available + * for the duration of the external resource, meaning it is sufficient to use + * it to rederive any additional data, like the GPU address. * This metadata structure binds a single external resource to a single * context, ensuring that per context mapping is tracked separately so it can * be overridden when needed and abuses by the application (freeing the resource @@ -1988,8 +2230,7 @@ struct kbasep_gwt_list_element { */ struct kbase_ctx_ext_res_meta { struct list_head ext_res_node; - struct kbase_mem_phy_alloc *alloc; - u64 gpu_addr; + struct kbase_va_region *reg; u32 ref; }; @@ -2044,6 +2285,7 @@ static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props con /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */ #define KBASE_CLEAN_CACHE_MAX_LOOPS 100000 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */ -#define KBASE_AS_INACTIVE_MAX_LOOPS 100000000 - +#define KBASE_AS_INACTIVE_MAX_LOOPS 100000 +/* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */ +#define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000 #endif /* _KBASE_DEFS_H_ */ |