diff options
-rw-r--r-- | common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h | 45 | ||||
-rw-r--r-- | mali_kbase/backend/gpu/mali_kbase_js_backend.c | 3 | ||||
-rw-r--r-- | mali_kbase/csf/mali_kbase_csf_scheduler.c | 4 | ||||
-rw-r--r-- | mali_kbase/mali_kbase_config.c | 9 | ||||
-rw-r--r-- | mali_kbase/mali_kbase_config.h | 18 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/Kbuild | 10 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/mali_kbase_config_platform.h | 35 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/pixel_gpu.c | 1 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/pixel_gpu_slc.c | 403 | ||||
-rw-r--r-- | mali_kbase/platform/pixel/pixel_gpu_slc.h | 25 | ||||
-rw-r--r-- | mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu | 7 | ||||
-rw-r--r-- | mali_pixel/Kbuild | 6 | ||||
-rw-r--r-- | mali_pixel/memory_group_manager.c | 539 | ||||
-rw-r--r-- | mali_pixel/pixel_slc.c | 405 | ||||
-rw-r--r-- | mali_pixel/pixel_slc.h | 98 |
15 files changed, 753 insertions, 855 deletions
diff --git a/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h b/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h index b575c79..2a27f4f 100644 --- a/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h +++ b/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h @@ -7,49 +7,10 @@ #ifndef _UAPI_PIXEL_MEMORY_GROUP_MANAGER_H_ #define _UAPI_PIXEL_MEMORY_GROUP_MANAGER_H_ -/** - * enum pixel_mgm_group_id - Symbolic names for used memory groups - */ -enum pixel_mgm_group_id -{ - /* The Mali driver requires that allocations made on one of the groups - * are not treated specially. - */ - MGM_RESERVED_GROUP_ID = 0, - - /* Group for memory that should be cached in the system level cache. */ - MGM_SLC_GROUP_ID = 1, - - /* Group for memory explicitly allocated in SLC. */ - MGM_SLC_EXPLICIT_GROUP_ID = 2, +void pixel_mgm_slc_update_signal(struct memory_group_manager_device* mgm_dev, u64 signal); - /* Imported memory is handled by the allocator of the memory, and the Mali - * DDK will request a group_id for such memory via mgm_get_import_memory_id(). - * We specify which group we want to use for this here. - */ - MGM_IMPORTED_MEMORY_GROUP_ID = (MEMORY_GROUP_MANAGER_NR_GROUPS - 1), -}; +void pixel_mgm_slc_inc_refcount(struct memory_group_manager_device* mgm_dev); -/** - * pixel_mgm_query_group_size - Query the current size of a memory group - * - * @mgm_dev: The memory group manager through which the request is being made. - * @group_id: Memory group to query. - * - * Returns the actual size of the memory group's active partition - */ -extern u64 pixel_mgm_query_group_size(struct memory_group_manager_device* mgm_dev, - enum pixel_mgm_group_id group_id); - -/** - * pixel_mgm_resize_group_to_fit - Resize a memory group to meet @demand, if possible - * - * @mgm_dev: The memory group manager through which the request is being made. - * @group_id: Memory group for which we will change the backing partition. - * @demand: The demanded space from the memory group. - */ -extern void pixel_mgm_resize_group_to_fit(struct memory_group_manager_device* mgm_dev, - enum pixel_mgm_group_id group_id, - u64 demand); +void pixel_mgm_slc_dec_refcount(struct memory_group_manager_device* mgm_dev); #endif /* _UAPI_PIXEL_MEMORY_GROUP_MANAGER_H_ */ diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c index be72c4a..304737f 100644 --- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c +++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c @@ -244,6 +244,9 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer) spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags); + /* Inform platform of scheduling event */ + kbasep_platform_event_tick_tock(kbdev); + return HRTIMER_NORESTART; } diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c index 8ead416..9e47a1e 100644 --- a/mali_kbase/csf/mali_kbase_csf_scheduler.c +++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c @@ -43,6 +43,7 @@ #include <mali_kbase_gpu_metrics.h> #include <csf/mali_kbase_csf_trace_buffer.h> #endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */ +#include <uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h> /* Value to indicate that a queue group is not groups_to_schedule list */ #define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX) @@ -6685,6 +6686,9 @@ static int kbase_csf_scheduler_kthread(void *data) dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration."); wake_up_all(&kbdev->csf.event_wait); + + /* Inform platform of scheduling event */ + kbasep_platform_event_tick_tock(kbdev); } /* Wait for the other thread, that signaled the exit, to call kthread_stop() */ diff --git a/mali_kbase/mali_kbase_config.c b/mali_kbase/mali_kbase_config.c index 72080a7..669e1c3 100644 --- a/mali_kbase/mali_kbase_config.c +++ b/mali_kbase/mali_kbase_config.c @@ -119,6 +119,15 @@ void kbasep_platform_event_work_end(void *param) platform_funcs_p->platform_handler_work_end_func(param); } +void kbasep_platform_event_tick_tock(struct kbase_device *kbdev) +{ + struct kbase_platform_funcs_conf *platform_funcs_p; + + platform_funcs_p = (struct kbase_platform_funcs_conf*)PLATFORM_FUNCS; + if (platform_funcs_p && platform_funcs_p->platform_handler_tick_tock) + platform_funcs_p->platform_handler_tick_tock(kbdev); +} + int kbasep_platform_fw_config_init(struct kbase_device *kbdev) { struct kbase_platform_funcs_conf *platform_funcs_p; diff --git a/mali_kbase/mali_kbase_config.h b/mali_kbase/mali_kbase_config.h index 7f6d3ed..549c170 100644 --- a/mali_kbase/mali_kbase_config.h +++ b/mali_kbase/mali_kbase_config.h @@ -138,6 +138,14 @@ struct kbase_platform_funcs_conf { */ void (*platform_handler_context_idle)(struct kbase_context *kctx); /** + * platform_handler_tick_tock - Platform specific callback when a scheduler tick/tock occurs. + * + * @kbdev: kbase_device pointer + * + * Context: Process context + */ + void (*platform_handler_tick_tock)(struct kbase_device *kbdev); + /** * platform_handler_work_begin_func - Platform specific handler whose * function changes depending on the * backend used. @@ -634,6 +642,16 @@ void kbasep_platform_event_work_begin(void *param); void kbasep_platform_event_work_end(void *param); /** + * kbasep_platform_tick_tock - Platform specific callback when a scheduler tick/tock occurs. + * + * @kbdev: kbase_device pointer + * + * Function calls a platform defined routine if specified in the configuration attributes. + * + */ +void kbasep_platform_event_tick_tock(struct kbase_device *kbdev); + +/** * kbasep_platform_fw_config_init - Platform specific callback to configure FW * * @kbdev - kbase_device pointer diff --git a/mali_kbase/platform/pixel/Kbuild b/mali_kbase/platform/pixel/Kbuild index 6d6b0a8..c35c0be 100644 --- a/mali_kbase/platform/pixel/Kbuild +++ b/mali_kbase/platform/pixel/Kbuild @@ -45,8 +45,14 @@ mali_kbase-y += \ platform/$(MALI_PLATFORM_DIR)/pixel_gpu_sscd.o endif -mali_kbase-$(CONFIG_MALI_PIXEL_GPU_SLC) += \ - platform/$(MALI_PLATFORM_DIR)/pixel_gpu_slc.o +ifeq ($(CONFIG_MALI_PIXEL_GPU_SLC),y) + mali_kbase-y += \ + platform/$(MALI_PLATFORM_DIR)/pixel_gpu_slc.o + + ifeq ($(CONFIG_SOC_ZUMA),y) + ccflags-y += -DPIXEL_GPU_SLC_ACPM_SIGNAL + endif +endif mali_kbase-$(CONFIG_MALI_CSF_SUPPORT) += \ platform/$(MALI_PLATFORM_DIR)/pixel_gpu_debug.o diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h index 991e5d4..4ad3318 100644 --- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h +++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h @@ -320,15 +320,12 @@ struct gpu_dvfs_metrics_uid_stats; * @dvfs.qos.bts.threshold: The G3D shader stack clock at which BTS will be enabled. Set via DT. * @dvfs.qos.bts.scenario: The index of the BTS scenario to be used. Set via DT. * - * @slc.lock: Synchronize updates to the SLC partition accounting variables. - * @slc.demand: The total demand for SLC space, an aggregation of each kctx's demand. - * @slc.usage: The total amount of SLC space used, an aggregation of each kctx's usage. - * * @itmon.wq: A workqueue for ITMON page table search. * @itmon.work: The work item for the above. * @itmon.nb: The ITMON notifier block. * @itmon.pa: The faulting physical address. * @itmon.active: Active count, non-zero while a search is active. + * @slc_demand: Tracks demand for SLC space */ struct pixel_context { struct kbase_device *kbdev; @@ -461,12 +458,6 @@ struct pixel_context { } dvfs; #endif /* CONFIG_MALI_MIDGARD_DVFS */ - struct { - struct mutex lock; - u64 demand; - u64 usage; - } slc; - #if IS_ENABLED(CONFIG_EXYNOS_ITMON) struct { struct workqueue_struct *wq; @@ -476,28 +467,26 @@ struct pixel_context { atomic_t active; } itmon; #endif +#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL + atomic_t slc_demand; +#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */ }; /** * struct pixel_platform_data - Per kbase_context Pixel specific platform data * - * @kctx: Handle to the parent kctx - * @stats: Tracks the dvfs metrics for the UID associated with this context - * - * @slc.peak_demand: The parent context's maximum demand for SLC space - * @slc.peak_usage: The parent context's maximum use of SLC space - * @slc.idle_work: Work item used to queue SLC partition shrink upon context idle - * @slc.idle_work_cancelled: Flag for async cancellation of idle_work + * @kctx: Handle to the parent kctx + * @stats: Tracks the dvfs metrics for the UID associated with this context + * @slc_vote: Tracks whether this context is voting for slc + * @slc_demand: Tracks demand for SLC space */ struct pixel_platform_data { struct kbase_context *kctx; struct gpu_dvfs_metrics_uid_stats* stats; - struct { - u64 peak_demand; - u64 peak_usage; - struct work_struct idle_work; - atomic_t idle_work_cancelled; - } slc; + int slc_vote; +#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL + atomic_t slc_demand; +#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */ }; #endif /* _KBASE_CONFIG_PLATFORM_H_ */ diff --git a/mali_kbase/platform/pixel/pixel_gpu.c b/mali_kbase/platform/pixel/pixel_gpu.c index 5ac8807..1ae6db6 100644 --- a/mali_kbase/platform/pixel/pixel_gpu.c +++ b/mali_kbase/platform/pixel/pixel_gpu.c @@ -328,6 +328,7 @@ struct kbase_platform_funcs_conf platform_funcs = { #endif /* CONFIG_MALI_MIDGARD_DVFS */ .platform_handler_context_active = &gpu_slc_kctx_active, .platform_handler_context_idle = &gpu_slc_kctx_idle, + .platform_handler_tick_tock = &gpu_slc_tick_tock, .platform_fw_cfg_init_func = &gpu_fw_cfg_init, .platform_handler_core_dump_func = &gpu_sscd_dump, }; diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.c b/mali_kbase/platform/pixel/pixel_gpu_slc.c index 62a4e9e..1aac4d8 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_slc.c +++ b/mali_kbase/platform/pixel/pixel_gpu_slc.c @@ -17,189 +17,53 @@ #include "mali_kbase_config_platform.h" #include "pixel_gpu_slc.h" -struct dirty_region { - u64 first_vpfn; - u64 last_vpfn; - u64 dirty_pgds; -}; +#include <uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h> /** - * struct gpu_slc_liveness_update_info - Buffer info, and live ranges - * - * @buffer_va: Array of buffer base virtual addresses - * @buffer_sizes: Array of buffer sizes - * @buffer_count: Number of elements in the va and sizes buffers - * @live_ranges: Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for - * each buffer - * @live_ranges_count: Number of elements in the live ranges buffer + * enum slc_vote_state - Whether a context is voting for SLC */ -struct gpu_slc_liveness_update_info { - u64* buffer_va; - u64* buffer_sizes; - u64 buffer_count; - struct kbase_pixel_gpu_slc_liveness_mark* live_ranges; - u64 live_ranges_count; +enum slc_vote_state { + /** @IDLE: Idle, not voting for SLC */ + IDLE = 0, + /** @VOTING: Active, voting for SLC */ + VOTING = 1, }; /** - * gpu_slc_lock_as - Lock the current process address space + * transition() - Try to transition from one value to another * - * @kctx: The &struct kbase_context - */ -static void gpu_slc_lock_as(struct kbase_context *kctx) -{ - down_write(kbase_mem_get_process_mmap_lock()); - kbase_gpu_vm_lock_with_pmode_sync(kctx); -} - -/** - * gpu_slc_unlock_as - Unlock the current process address space - * - * @kctx: The &struct kbase_context - */ -static void gpu_slc_unlock_as(struct kbase_context *kctx) -{ - kbase_gpu_vm_unlock_with_pmode_sync(kctx); - up_write(kbase_mem_get_process_mmap_lock()); -} - -/** - * gpu_slc_in_group - Check whether the region is SLC cacheable + * @v: Value to transition + * @old: Starting state to transition from + * @new: Destination state to transition to * - * @reg: The gpu memory region to check for an SLC cacheable memory group. + * Return: Whether the transition was successful */ -static bool gpu_slc_in_group(struct kbase_va_region* reg) +static bool transition(int *v, int old, int new) { - return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID; -} - -/** - * gpu_slc_get_region - Find the gpu memory region from a virtual address - * - * @kctx: The &struct kbase_context - * @va: The base gpu virtual address of the region - * - * Return: On success, returns a valid memory region. On failure NULL is returned. - */ -static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va) -{ - struct kbase_va_region *reg; - - if (!va) - goto invalid; + bool const cond = *v == old; - if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE)) - goto invalid; + if (cond) + *v = new; - /* Find the region that the virtual address belongs to */ - reg = kbase_region_tracker_find_region_base_address(kctx, va); - - /* Validate the region */ - if (kbase_is_region_invalid_or_free(reg)) - goto invalid; - /* Might be shrunk */ - if (kbase_is_region_shrinkable(reg)) - goto invalid; - /* Driver internal alloc */ - if (kbase_va_region_is_no_user_free(reg)) - goto invalid; - - return reg; - -invalid: - dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va); - return NULL; -} - -/** - * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable - * - * @kctx: The &struct kbase_context - * @reg: The gpu memory region migrate to an SLC cacheable memory group - * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries - */ -static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg) -{ - int err; - u64 vpfn; - size_t page_nr; - - KBASE_DEBUG_ASSERT(kctx); - KBASE_DEBUG_ASSERT(reg); - - if (gpu_slc_in_group(reg)) { - return; - } - - vpfn = reg->start_pfn; - page_nr = kbase_reg_current_backed_size(reg); - - err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn, - kbase_get_gpu_phy_pages(reg), - page_nr, - reg->flags, - MGM_SLC_GROUP_ID, - &dirty_reg->dirty_pgds); - - /* Track the dirty region */ - dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn); - dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr); - - if (err) - dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err); - else - /* If everything is good, then set the new group on the region. */ - reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID; -} - -/** - * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region - * - * @kctx: The &struct kbase_context - * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries - */ -static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg) -{ - size_t const dirty_page_nr = - (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn)); - - if (!dirty_page_nr) - return; - - kbase_mmu_flush_invalidate_update_pages( - kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds); -} - -/** - * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand. - * - * @kbdev: The &struct kbase_device for the GPU. - */ -static void gpu_slc_resize_partition(struct kbase_device* kbdev) -{ - struct pixel_context *pc = kbdev->platform_context; - - /* Request that the mgm select an SLC partition that fits our demand */ - pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand); - - dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand); + return cond; } +#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL /** - * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition. - * - * @kbdev: The &struct kbase_device for the GPU. + * struct gpu_slc_liveness_update_info - Buffer info, and live ranges * - * Returns the size of the GPU's SLC partition. + * @buffer_sizes: Array of buffer sizes + * @buffer_count: Number of elements in the va and sizes buffers + * @live_ranges: Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for + * each buffer + * @live_ranges_count: Number of elements in the live ranges buffer */ -static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev) -{ - u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID); - - dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size); - - return partition_size; -} +struct gpu_slc_liveness_update_info { + u64* buffer_sizes; + u64 buffer_count; + struct kbase_pixel_gpu_slc_liveness_mark* live_ranges; + u64 live_ranges_count; +}; /** * gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free @@ -215,127 +79,40 @@ static void gpu_slc_liveness_update(struct kbase_context* kctx, struct kbase_device* kbdev = kctx->kbdev; struct pixel_context *pc = kbdev->platform_context; struct pixel_platform_data *kctx_pd = kctx->platform_data; - struct dirty_region dirty_reg = { - .first_vpfn = U64_MAX, - .last_vpfn = 0, - .dirty_pgds = 0, - }; - u64 current_usage = 0; - u64 current_demand = 0; - u64 free_space; + s64 current_demand = 0, peak_demand = 0, old_demand; int i; - /* Lock the process address space before modifying ATE's */ - gpu_slc_lock_as(kctx); - - /* Synchronize updates to the partition size and usage */ - mutex_lock(&pc->slc.lock); - dev_dbg(kbdev->dev, "pixel: buffer liveness update received"); - /* Remove the usage and demand from the previous liveness update */ - pc->slc.demand -= kctx_pd->slc.peak_demand; - pc->slc.usage -= kctx_pd->slc.peak_usage; - kctx_pd->slc.peak_demand = 0; - kctx_pd->slc.peak_usage = 0; - - /* Calculate the remaining free space in the SLC partition (floored at 0) */ - free_space = gpu_slc_get_partition_size(kbdev); - free_space -= min(free_space, pc->slc.usage); - for (i = 0; i < info->live_ranges_count; ++i) { - struct kbase_va_region *reg; u64 size; - u64 va; u32 index = info->live_ranges[i].index; if (unlikely(index >= info->buffer_count)) continue; size = info->buffer_sizes[index]; - va = info->buffer_va[index]; - - reg = gpu_slc_get_region(kctx, va); - if(!reg) - continue; switch (info->live_ranges[i].type) { case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN: /* Update demand as though there's no size limit */ current_demand += size; - kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand); - - /* Check whether there's free space in the partition to store the buffer */ - if (free_space >= current_usage + size) - gpu_slc_migrate_region(kctx, reg, &dirty_reg); - - /* This may be true, even if the space calculation above returned false, - * as a previous call to this function may have migrated the region. - * In such a scenario, the current_usage may exceed the available free_space - * and we will be oversubscribed to the SLC partition. - * We could migrate the region back to the non-SLC group, but this would - * require an SLC flush, so for now we do nothing. - */ - if (gpu_slc_in_group(reg)) { - current_usage += size; - kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage); - } + peak_demand = max(peak_demand, current_demand); break; case KBASE_PIXEL_GPU_LIVE_RANGE_END: current_demand -= size; - if (gpu_slc_in_group(reg)) - current_usage -= size; break; } } - /* Perform single page table flush */ - gpu_slc_flush_dirty_region(kctx, &dirty_reg); /* Indicates a missing live range end marker */ - WARN_ON_ONCE(current_demand != 0 || current_usage != 0); - - /* Update the total usage and demand */ - pc->slc.demand += kctx_pd->slc.peak_demand; - pc->slc.usage += kctx_pd->slc.peak_usage; + WARN_ON_ONCE(current_demand != 0); - dev_dbg(kbdev->dev, - "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu", - kctx->id, - kctx_pd->slc.peak_demand, - kctx_pd->slc.peak_usage); - dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage); - - /* Trigger partition resize based on the new demand */ - gpu_slc_resize_partition(kctx->kbdev); - - mutex_unlock(&pc->slc.lock); - gpu_slc_unlock_as(kctx); -} - -static void gpu_slc_kctx_idle_worker(struct work_struct *work) -{ - struct pixel_platform_data *pd = - container_of(work, struct pixel_platform_data, slc.idle_work); - struct kbase_context *kctx = pd->kctx; - struct kbase_device *kbdev = kctx->kbdev; - struct pixel_context *pc = kbdev->platform_context; - - if (atomic_read(&pd->slc.idle_work_cancelled)) - return; - - mutex_lock(&pc->slc.lock); - - pc->slc.demand -= pd->slc.peak_demand; - pc->slc.usage -= pd->slc.peak_usage; - - pd->slc.peak_demand = 0; - pd->slc.peak_usage = 0; - - gpu_slc_resize_partition(kctx->kbdev); - - mutex_unlock(&pc->slc.lock); + /* Update the demand */ + old_demand = atomic_xchg(&kctx_pd->slc_demand, peak_demand); + atomic_add(peak_demand - old_demand, &pc->slc_demand); } /** @@ -369,13 +146,13 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count) goto done; /* Guard against nullptr */ - if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address) + if (!update->live_ranges_address || !update->buffer_sizes_address) goto done; /* Calculate the total buffer size required and detect overflows */ - if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size) + if ((U64_MAX - live_ranges_size) < buffer_info_size) goto done; - total_buff_size = buffer_info_size * 2 + live_ranges_size; + total_buff_size = buffer_info_size + live_ranges_size; /* Allocate the memory we require to copy from user space */ buff = kmalloc(total_buff_size, GFP_KERNEL); @@ -387,16 +164,15 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, /* Set up the info struct by pointing into the allocation. All 8 byte aligned */ info = (struct gpu_slc_liveness_update_info){ - .buffer_va = buff, - .buffer_sizes = buff + update->buffer_count, + .buffer_sizes = buff, .buffer_count = update->buffer_count, - .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2), + .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count), .live_ranges_count = update->live_ranges_count, }; /* Copy the data from user space */ - err = - copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size); + err = copy_from_user( + info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size); if (err) { dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges"); err = -EFAULT; @@ -411,13 +187,6 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, goto done; } - err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size); - if (err) { - dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses"); - err = -EFAULT; - goto done; - } - /* Execute an slc update */ gpu_slc_liveness_update(kctx, &info); @@ -426,6 +195,7 @@ done: return err; } +#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */ /** * gpu_slc_kctx_init() - Called when a kernel context is created @@ -439,10 +209,7 @@ done: */ int gpu_slc_kctx_init(struct kbase_context *kctx) { - struct pixel_platform_data *pd = kctx->platform_data; - - INIT_WORK(&pd->slc.idle_work, gpu_slc_kctx_idle_worker); - + (void)kctx; return 0; } @@ -450,28 +217,23 @@ int gpu_slc_kctx_init(struct kbase_context *kctx) * gpu_slc_kctx_term() - Called when a kernel context is terminated * * @kctx: The &struct kbase_context that is being terminated - * - * Free up SLC space used by the buffers that this context owns. */ void gpu_slc_kctx_term(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; - struct pixel_context *pc = kbdev->platform_context; - struct pixel_platform_data *kctx_pd = kctx->platform_data; - - atomic_set(&kctx_pd->slc.idle_work_cancelled, 1); - cancel_work_sync(&kctx_pd->slc.idle_work); - - mutex_lock(&pc->slc.lock); - - /* Deduct the usage and demand, freeing that SLC space for the next update */ - pc->slc.demand -= kctx_pd->slc.peak_demand; - pc->slc.usage -= kctx_pd->slc.peak_usage; + struct pixel_platform_data *pd = kctx->platform_data; - /* Trigger partition resize based on the new demand */ - gpu_slc_resize_partition(kctx->kbdev); + /* Contexts can be terminated without being idled first */ + if (transition(&pd->slc_vote, VOTING, IDLE)) + pixel_mgm_slc_dec_refcount(kctx->kbdev->mgm_dev); - mutex_unlock(&pc->slc.lock); +#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL + { + struct pixel_context* pc = kctx->kbdev->platform_context; + /* Deduct the usage and demand, freeing that SLC space for the next update */ + u64 kctx_demand = atomic_xchg(&pd->slc_demand, 0); + atomic_sub(kctx_demand, &pc->slc_demand); + } +#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */ } /** @@ -481,19 +243,12 @@ void gpu_slc_kctx_term(struct kbase_context *kctx) */ void gpu_slc_kctx_active(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; struct pixel_platform_data *pd = kctx->platform_data; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); - /* Asynchronously cancel the idle work, since we're in atomic context. - * The goal here is not to ensure that the idle_work doesn't run. Instead we need to ensure - * that any queued idle_work does not run *after* a liveness update for the now active kctx. - * Either the idle_work is executing now, and beats the cancellation check, or it runs later - * and early-exits at the cancellation check. - * In neither scenario will a 'cancelled' idle_work interfere with a later liveness update. - */ - atomic_set(&pd->slc.idle_work_cancelled, 1); + if (transition(&pd->slc_vote, IDLE, VOTING)) + pixel_mgm_slc_inc_refcount(kctx->kbdev->mgm_dev); } /** @@ -503,22 +258,34 @@ void gpu_slc_kctx_active(struct kbase_context *kctx) */ void gpu_slc_kctx_idle(struct kbase_context *kctx) { - struct kbase_device *kbdev = kctx->kbdev; struct pixel_platform_data *pd = kctx->platform_data; - lockdep_assert_held(&kbdev->hwaccess_lock); + lockdep_assert_held(&kctx->kbdev->hwaccess_lock); + + if (transition(&pd->slc_vote, VOTING, IDLE)) + pixel_mgm_slc_dec_refcount(kctx->kbdev->mgm_dev); +} - /* In the event that this line 'un-cancels' the idle_work, and that idle_work is executing, - * we will re-queue on the following line anyway, resulting in a unnecessary additional - * execution of the worker. - * While not optimal, it won't result in a correctness problem. - */ - atomic_set(&pd->slc.idle_work_cancelled, 0); - queue_work(system_highpri_wq, &pd->slc.idle_work); +/** + * gpu_slc_tick_tock() - Called when a GPU scheduling kick occurs + * + * @kbdev: The &struct kbase_device for the GPU. + */ +void gpu_slc_tick_tock(struct kbase_device *kbdev) +{ +#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL + struct pixel_context* pc = kbdev->platform_context; + /* Threshold of 4MB */ + u64 signal = atomic_read(&pc->slc_demand) / (4 << 20); + + pixel_mgm_slc_update_signal(kbdev->mgm_dev, signal); +#else + pixel_mgm_slc_update_signal(kbdev->mgm_dev, 0); +#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */ } /** - * gpu_slc_init - Initialize the SLC partition for the GPU + * gpu_slc_init - Initialize the SLC context for the GPU * * @kbdev: The &struct kbase_device for the GPU. * @@ -526,15 +293,11 @@ void gpu_slc_kctx_idle(struct kbase_context *kctx) */ int gpu_slc_init(struct kbase_device *kbdev) { - struct pixel_context *pc = kbdev->platform_context; - - mutex_init(&pc->slc.lock); - return 0; } /** - * gpu_slc_term() - Terminates the Pixel GPU SLC partition. + * gpu_slc_term() - Terminates the Pixel GPU SLC context. * * @kbdev: The &struct kbase_device for the GPU. */ diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.h b/mali_kbase/platform/pixel/pixel_gpu_slc.h index 82d0779..8a59df0 100644 --- a/mali_kbase/platform/pixel/pixel_gpu_slc.h +++ b/mali_kbase/platform/pixel/pixel_gpu_slc.h @@ -8,9 +8,6 @@ #define _PIXEL_GPU_SLC_H_ #ifdef CONFIG_MALI_PIXEL_GPU_SLC -int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, - struct kbase_ioctl_buffer_liveness_update* update); - int gpu_slc_init(struct kbase_device *kbdev); void gpu_slc_term(struct kbase_device *kbdev); @@ -22,13 +19,9 @@ void gpu_slc_kctx_term(struct kbase_context *kctx); void gpu_slc_kctx_active(struct kbase_context *kctx); void gpu_slc_kctx_idle(struct kbase_context *kctx); -#else -static int __maybe_unused gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, - struct kbase_ioctl_buffer_liveness_update* update) -{ - return (void)kctx, (void)update, 0; -} +void gpu_slc_tick_tock(struct kbase_device *kbdev); +#else static int __maybe_unused gpu_slc_init(struct kbase_device *kbdev) { return (void)kbdev, 0; } static void __maybe_unused gpu_slc_term(struct kbase_device *kbdev) { (void)kbdev; } @@ -40,6 +33,20 @@ static void __maybe_unused gpu_slc_kctx_term(struct kbase_context* kctx) { (void static void __maybe_unused gpu_slc_kctx_active(struct kbase_context *kctx) { (void)kctx; } static void __maybe_unused gpu_slc_kctx_idle(struct kbase_context *kctx) { (void)kctx; } + +static void __maybe_unused gpu_slc_tick_tock(struct kbase_device *kbdev) { (void)kbdev; } #endif /* CONFIG_MALI_PIXEL_GPU_SLC */ +#if defined(CONFIG_MALI_PIXEL_GPU_SLC) && !defined(PIXEL_GPU_SLC_ACPM_SIGNAL) +int +gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, + struct kbase_ioctl_buffer_liveness_update* update); +#else +static int __maybe_unused gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx, + struct kbase_ioctl_buffer_liveness_update* update) +{ + return (void)kctx, (void)update, 0; +} +#endif + #endif /* _PIXEL_GPU_SLC_H_ */ diff --git a/mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu b/mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu new file mode 100644 index 0000000..1d3bc11 --- /dev/null +++ b/mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu @@ -0,0 +1,7 @@ +What: /sys/kernel/pixel_stat/gpu/mem/slc_pin_partition +Date: Feb 2024 +Contact: "Jack Diver" <diverj@google.com> +Description: + Write-only node to manually pin the SLC partition in the enabled + state. This useful when profiling SLC performance. + diff --git a/mali_pixel/Kbuild b/mali_pixel/Kbuild index d20ce03..4f65a95 100644 --- a/mali_pixel/Kbuild +++ b/mali_pixel/Kbuild @@ -25,7 +25,7 @@ CONFIG_MALI_MEMORY_GROUP_MANAGER ?= m CONFIG_MALI_PRIORITY_CONTROL_MANAGER ?= m CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= m CONFIG_MALI_PIXEL_STATS ?= m -CONFIG_MALI_PIXEL_GPU_SLC=y +CONFIG_MALI_PIXEL_GPU_SLC ?= y mali_pixel-objs := @@ -39,6 +39,7 @@ endif ifeq ($(CONFIG_MALI_MEMORY_GROUP_MANAGER),m) DEFINES += -DCONFIG_MALI_MEMORY_GROUP_MANAGER mali_pixel-objs += memory_group_manager.o + mali_pixel-objs += pixel_slc.o endif ifeq ($(CONFIG_MALI_PRIORITY_CONTROL_MANAGER),m) DEFINES += -DCONFIG_MALI_PRIORITY_CONTROL_MANAGER @@ -50,6 +51,9 @@ ifeq ($(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR),m) endif ifeq ($(CONFIG_MALI_PIXEL_GPU_SLC),y) DEFINES += -DCONFIG_MALI_PIXEL_GPU_SLC + ifeq ($(CONFIG_SOC_ZUMA),y) + DEFINES += -DPIXEL_GPU_SLC_ACPM_SIGNAL + endif endif # Use our defines when compiling, and include mali platform module headers diff --git a/mali_pixel/memory_group_manager.c b/mali_pixel/memory_group_manager.c index 0c9a241..9076a65 100644 --- a/mali_pixel/memory_group_manager.c +++ b/mali_pixel/memory_group_manager.c @@ -23,7 +23,7 @@ #include <linux/memory_group_manager.h> -#include <soc/google/pt.h> +#include "pixel_slc.h" #include <uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h> @@ -31,29 +31,30 @@ #define ORDER_SMALL_PAGE 0 #define ORDER_LARGE_PAGE const_ilog2(NUM_PAGES_IN_2MB_LARGE_PAGE) -/* Borr does not have "real" PBHA support. However, since we only use a 36-bit PA on the bus, - * AxADDR[39:36] is wired up to the GPU AxUSER[PBHA] field seen by the rest of the system. - * Those AxADDR bits come from [39:36] in the page descriptor. - * - * Odin and Turse have "real" PBHA support using a dedicated output signal and page descriptor field. - * The AxUSER[PBHA] field is driven by the GPU's PBHA signal, and AxADDR[39:36] is dropped. - * The page descriptor PBHA field is [62:59]. - * - * We could write to both of these locations, as each SoC only reads from its respective PBHA - * location with the other being ignored or dropped. - * - * b/148988078 contains confirmation of the above description. +/** + * enum mgm_group_id - Symbolic names for used memory groups */ -#if IS_ENABLED(CONFIG_SOC_GS101) -#define PBHA_BIT_POS (36) -#else -#define PBHA_BIT_POS (59) -#endif -#define PBHA_BIT_MASK (0xf) +enum mgm_group_id +{ + /** + * @MGM_RESERVED_GROUP_ID: The Mali driver requires that allocations made on one of the + * groups are not treated specially. + */ + MGM_RESERVED_GROUP_ID = 0, -#define MGM_PBHA_DEFAULT 0 + /** + * @MGM_SLC_GROUP_ID: Group for memory that should be cached in the system level cache. + */ + MGM_SLC_GROUP_ID = 1, -#define MGM_SENTINEL_PT_SIZE U64_MAX + /** + * @MGM_IMPORTED_MEMORY_GROUP_ID: Imported memory is handled by the allocator of the memory, + * and the Mali DDK will request a group_id for such memory + * via mgm_get_import_memory_id(). We specify which group we + * want to use for this here. + */ + MGM_IMPORTED_MEMORY_GROUP_ID = (MEMORY_GROUP_MANAGER_NR_GROUPS - 1), +}; #define INVALID_GROUP_ID(group_id) \ WARN_ON((group_id) >= MEMORY_GROUP_MANAGER_NR_GROUPS) @@ -81,13 +82,6 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, * @lp_size: The number of allocated large(2MB) pages * @insert_pfn: The number of calls to map pages for CPU access. * @update_gpu_pte: The number of calls to update GPU page table entries. - * @ptid: The active partition ID for this group - * @pbha: The PBHA bits assigned to this group, - * @base_pt: The base partition ID available to this group. - * @pt_num: The number of partitions available to this group. - * @active_pt_idx: The relative index for the partition backing the group. - * Different from the absolute ptid. - * @state: The lifecycle state of the partition associated with this group * This structure allows page allocation information to be displayed via * debugfs. Display is organized per group with small and large sized pages. */ @@ -98,30 +92,6 @@ struct mgm_group { atomic_t insert_pfn; atomic_t update_gpu_pte; #endif - - ptid_t ptid; - ptpbha_t pbha; - - u32 base_pt; - u32 pt_num; - u32 active_pt_idx; - enum { - MGM_GROUP_STATE_NEW = 0, - MGM_GROUP_STATE_ENABLED = 10, - MGM_GROUP_STATE_DISABLED_NOT_FREED = 20, - MGM_GROUP_STATE_DISABLED = 30, - } state; -}; - -/** - * struct partition_stats - Structure for tracking sizing of a partition - * - * @capacity: The total capacity of each partition - * @size: The current size of each partition - */ -struct partition_stats { - u64 capacity; - atomic64_t size; }; /** @@ -130,26 +100,22 @@ struct partition_stats { * @groups: To keep track of the number of allocated pages of all groups * @ngroups: Number of groups actually used * @npartitions: Number of partitions used by all groups combined - * @pt_stats: The sizing info for each partition * @dev: device attached - * @pt_handle: Link to SLC partition data * @kobj: &sruct kobject used for linking to pixel_stats_sysfs node * @mgm_debugfs_root: debugfs root directory of memory group manager + * @slc_data: To track GPU SLC partitions. * * This structure allows page allocation information to be displayed via * debugfs. Display is organized per group with small and large sized pages. */ struct mgm_groups { struct mgm_group groups[MEMORY_GROUP_MANAGER_NR_GROUPS]; - size_t ngroups; - size_t npartitions; - struct partition_stats *pt_stats; struct device *dev; - struct pt_handle *pt_handle; struct kobject kobj; #ifdef CONFIG_MALI_MEMORY_GROUP_MANAGER_DEBUG_FS struct dentry *mgm_debugfs_root; #endif + struct slc_data slc_data; }; /* @@ -158,13 +124,6 @@ struct mgm_groups { #ifdef CONFIG_MALI_MEMORY_GROUP_MANAGER_DEBUG_FS -static int mgm_debugfs_state_get(void *data, u64 *val) -{ - struct mgm_group *group = data; - *val = (u64)group->state; - return 0; -} - static int mgm_debugfs_size_get(void *data, u64 *val) { struct mgm_group *group = data; @@ -193,8 +152,6 @@ static int mgm_debugfs_update_gpu_pte_get(void *data, u64 *val) return 0; } -DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_state, mgm_debugfs_state_get, - NULL, "%llu\n"); DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_size, mgm_debugfs_size_get, NULL, "%llu\n"); DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_lp_size, mgm_debugfs_lp_size_get, @@ -218,7 +175,6 @@ static struct { const char *name; const struct file_operations *fops; } attribs[] = { - { "state", &fops_mgm_state}, { "size", &fops_mgm_size}, { "lp_size", &fops_mgm_lp_size}, { "insert_pfn", &fops_mgm_insert_pfn}, @@ -298,6 +254,8 @@ extern struct kobject *pixel_stat_gpu_kobj; #define MGM_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) +#define MGM_ATTR_WO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_WO(_name) static ssize_t total_page_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -340,10 +298,31 @@ static ssize_t large_page_count_show(struct kobject *kobj, } MGM_ATTR_RO(large_page_count); +static ssize_t slc_pin_partition_store(struct kobject* kobj, + struct kobj_attribute* attr, + const char* buf, + size_t count) +{ + struct mgm_groups *data = container_of(kobj, struct mgm_groups, kobj); + bool pin; + + if (!data) + return -ENODEV; + + if (kstrtobool(buf, &pin)) + return -EINVAL; + + slc_pin(&data->slc_data, pin); + + return count; +} +MGM_ATTR_WO(slc_pin_partition); + static struct attribute *mgm_attrs[] = { &total_page_count_attr.attr, &small_page_count_attr.attr, &large_page_count_attr.attr, + &slc_pin_partition_attr.attr, NULL, }; ATTRIBUTE_GROUPS(mgm); @@ -392,20 +371,6 @@ static void mgm_sysfs_term(struct mgm_groups *data) #endif /* CONFIG_MALI_PIXEL_STATS */ -static int group_pt_id(struct mgm_groups *data, enum pixel_mgm_group_id group_id, int pt_index) -{ - struct mgm_group *group = &data->groups[group_id]; - if (WARN_ON_ONCE(pt_index >= group->pt_num)) - return 0; - - return group->base_pt + pt_index; -} - -static int group_active_pt_id(struct mgm_groups *data, enum pixel_mgm_group_id group_id) -{ - return group_pt_id(data, group_id, data->groups[group_id].active_pt_idx); -} - static atomic64_t total_gpu_pages = ATOMIC64_INIT(0); static atomic_t* get_size_counter(struct memory_group_manager_device* mgm_dev, unsigned int group_id, unsigned int order) @@ -434,26 +399,7 @@ static void update_size(struct memory_group_manager_device *mgm_dev, unsigned in atomic_inc(size); atomic64_add(1 << order, &total_gpu_pages); } else { - if (atomic_dec_return(size) < 0) { - /* b/289501175 - * Pages are often 'migrated' to the SLC group, which needs special - * accounting. - * - * TODO: Remove after SLC MGM decoupling b/290354607 - */ - if (!WARN_ON(group_id != MGM_SLC_GROUP_ID)) { - /* Undo the dec, and instead decrement the reserved group counter. - * This is still making the assumption that the migration came from - * the reserved group. Currently this is always true, however it - * might not be in future. It would be invasive and costly to track - * where every page came from, so instead this will be fixed as part - * of the b/290354607 effort. - */ - atomic_inc(size); - update_size(mgm_dev, MGM_RESERVED_GROUP_ID, order, alloc); - return; - } - } + WARN_ON(atomic_dec_return(size) < 0); atomic64_sub(1 << order, &total_gpu_pages); } @@ -462,185 +408,6 @@ static void update_size(struct memory_group_manager_device *mgm_dev, unsigned in pr_warn("total_gpu_pages %lld\n", atomic64_read(&total_gpu_pages)); } -static void pt_size_invalidate(struct mgm_groups* data, int pt_idx) -{ - /* Set the size to a known sentinel value so that we can later detect an update */ - atomic64_set(&data->pt_stats[pt_idx].size, MGM_SENTINEL_PT_SIZE); -} - -static void pt_size_init(struct mgm_groups* data, int pt_idx, size_t size) -{ - /* The resize callback may have already been executed, which would have set - * the correct size. Only update the size if this has not happened. - * We can tell that no resize took place if the size is still a sentinel. - */ - atomic64_cmpxchg(&data->pt_stats[pt_idx].size, MGM_SENTINEL_PT_SIZE, size); -} - -static void validate_ptid(struct mgm_groups* data, enum pixel_mgm_group_id group_id, int ptid) -{ - if (ptid == -EINVAL) - dev_err(data->dev, "Failed to get partition for group: %d\n", group_id); - else - dev_info(data->dev, "pt_client_mutate returned ptid=%d for group=%d", ptid, group_id); -} - -static void update_group(struct mgm_groups* data, - enum pixel_mgm_group_id group_id, - int ptid, - int relative_pt_idx) -{ - int const abs_pt_idx = group_pt_id(data, group_id, relative_pt_idx); - int const pbha = pt_pbha(data->dev->of_node, abs_pt_idx); - - if (pbha == PT_PBHA_INVALID) - dev_err(data->dev, "Failed to get PBHA for group: %d\n", group_id); - else - dev_info(data->dev, "pt_pbha returned PBHA=%d for group=%d", pbha, group_id); - - data->groups[group_id].ptid = ptid; - data->groups[group_id].pbha = pbha; - data->groups[group_id].state = MGM_GROUP_STATE_ENABLED; - data->groups[group_id].active_pt_idx = relative_pt_idx; -} - -static void disable_partition(struct mgm_groups* data, enum pixel_mgm_group_id group_id) -{ - int const active_idx = group_active_pt_id(data, group_id); - - /* Skip if not already enabled */ - if (data->groups[group_id].state != MGM_GROUP_STATE_ENABLED) - return; - - pt_client_disable_no_free(data->pt_handle, active_idx); - data->groups[group_id].state = MGM_GROUP_STATE_DISABLED_NOT_FREED; - - pt_size_invalidate(data, active_idx); - pt_size_init(data, active_idx, 0); -} - -static void enable_partition(struct mgm_groups* data, enum pixel_mgm_group_id group_id) -{ - int ptid; - size_t size = 0; - int const active_idx = group_active_pt_id(data, group_id); - - /* Skip if already enabled */ - if (data->groups[group_id].state == MGM_GROUP_STATE_ENABLED) - return; - - pt_size_invalidate(data, active_idx); - - ptid = pt_client_enable_size(data->pt_handle, active_idx, &size); - - validate_ptid(data, group_id, ptid); - - update_group(data, group_id, ptid, data->groups[group_id].active_pt_idx); - - pt_size_init(data, active_idx, size); -} - -static void set_group_partition(struct mgm_groups* data, - enum pixel_mgm_group_id group_id, - int new_pt_index) -{ - int ptid; - size_t size = 0; - int const active_idx = group_active_pt_id(data, group_id); - int const new_idx = group_pt_id(data, group_id, new_pt_index); - - /* Early out if no changes are needed */ - if (new_idx == active_idx) - return; - - pt_size_invalidate(data, new_idx); - - ptid = pt_client_mutate_size(data->pt_handle, active_idx, new_idx, &size); - - validate_ptid(data, group_id, ptid); - - update_group(data, group_id, ptid, new_pt_index); - - pt_size_init(data, new_idx, size); - /* Reset old partition size */ - atomic64_set(&data->pt_stats[active_idx].size, data->pt_stats[active_idx].capacity); -} - -u64 pixel_mgm_query_group_size(struct memory_group_manager_device* mgm_dev, - enum pixel_mgm_group_id group_id) -{ - struct mgm_groups *data; - struct mgm_group *group; - u64 size = 0; - - /* Early out if the group doesn't exist */ - if (INVALID_GROUP_ID(group_id)) - goto done; - - data = mgm_dev->data; - group = &data->groups[group_id]; - - /* Early out if the group has no partitions */ - if (group->pt_num == 0) - goto done; - - size = atomic64_read(&data->pt_stats[group_active_pt_id(data, group_id)].size); - -done: - return size; -} -EXPORT_SYMBOL(pixel_mgm_query_group_size); - -void pixel_mgm_resize_group_to_fit(struct memory_group_manager_device* mgm_dev, - enum pixel_mgm_group_id group_id, - u64 demand) -{ - struct mgm_groups *data; - struct mgm_group *group; - s64 diff, cur_size, min_diff = S64_MAX; - int pt_idx; - - /* Early out if the group doesn't exist */ - if (INVALID_GROUP_ID(group_id)) - goto done; - - data = mgm_dev->data; - group = &data->groups[group_id]; - - /* Early out if the group has no partitions */ - if (group->pt_num == 0) - goto done; - - /* We can disable the partition if there's no demand */ - if (demand == 0) - { - disable_partition(data, group_id); - goto done; - } - - /* Calculate best partition to use, by finding the nearest capacity */ - for (pt_idx = 0; pt_idx < group->pt_num; ++pt_idx) - { - cur_size = data->pt_stats[group_pt_id(data, group_id, pt_idx)].capacity; - diff = abs(demand - cur_size); - - if (diff > min_diff) - break; - - min_diff = diff; - } - - /* Ensure the partition is enabled before trying to mutate it */ - enable_partition(data, group_id); - set_group_partition(data, group_id, pt_idx - 1); - -done: - dev_dbg(data->dev, "%s: resized memory_group_%d for demand: %lldB", __func__, group_id, demand); - - return; -} -EXPORT_SYMBOL(pixel_mgm_resize_group_to_fit); - static struct page *mgm_alloc_page( struct memory_group_manager_device *mgm_dev, unsigned int group_id, gfp_t gfp_mask, unsigned int order) @@ -655,35 +422,12 @@ static struct page *mgm_alloc_page( if (INVALID_GROUP_ID(group_id)) return NULL; - if (WARN_ON_ONCE((group_id != MGM_RESERVED_GROUP_ID) && - (group_active_pt_id(data, group_id) >= data->npartitions))) - return NULL; - /* We don't expect to be allocting pages into the group used for * external or imported memory */ if (WARN_ON(group_id == MGM_IMPORTED_MEMORY_GROUP_ID)) return NULL; - /* If we are allocating a page in this group for the first time then - * ensure that we have enabled the relevant partitions for it. - */ - if (group_id != MGM_RESERVED_GROUP_ID) { - switch (data->groups[group_id].state) { - case MGM_GROUP_STATE_NEW: - enable_partition(data, group_id); - break; - case MGM_GROUP_STATE_ENABLED: - case MGM_GROUP_STATE_DISABLED_NOT_FREED: - case MGM_GROUP_STATE_DISABLED: - /* Everything should already be set up*/ - break; - default: - dev_err(data->dev, "Group %u in invalid state %d\n", - group_id, data->groups[group_id].state); - } - } - p = alloc_pages(gfp_mask, order); if (p) { @@ -742,7 +486,7 @@ static u64 mgm_update_gpu_pte( int const mmu_level, u64 pte) { struct mgm_groups *const data = mgm_dev->data; - unsigned int pbha; + u64 const old_pte = pte; dev_dbg(data->dev, "%s(mgm_dev=%p, group_id=%u, mmu_level=%d, pte=0x%llx)\n", @@ -751,40 +495,22 @@ static u64 mgm_update_gpu_pte( if (INVALID_GROUP_ID(group_id)) return pte; - /* Clear any bits set in the PBHA range */ - if (pte & ((u64)PBHA_BIT_MASK << PBHA_BIT_POS)) { - dev_warn(data->dev, - "%s: updating pte with bits already set in PBHA range", - __func__); - pte &= ~((u64)PBHA_BIT_MASK << PBHA_BIT_POS); - } - switch (group_id) { case MGM_RESERVED_GROUP_ID: case MGM_IMPORTED_MEMORY_GROUP_ID: /* The reserved group doesn't set PBHA bits */ - /* TODO: Determine what to do with imported memory */ + pte = slc_wipe_pbha(pte); break; + case MGM_SLC_GROUP_ID: + /* Map requests for SLC memory groups to SLC */ + pte = slc_set_pbha(&data->slc_data, pte); default: - /* All other groups will have PBHA bits */ - if (data->groups[group_id].state > MGM_GROUP_STATE_NEW) { - u64 old_pte = pte; - pbha = data->groups[group_id].pbha; - - pte |= ((u64)pbha & PBHA_BIT_MASK) << PBHA_BIT_POS; - - dev_dbg(data->dev, - "%s: group_id=%u pbha=%d " - "pte=0x%llx -> 0x%llx\n", - __func__, group_id, pbha, old_pte, pte); - - } else { - dev_err(data->dev, - "Tried to get PBHA of uninitialized group=%d", - group_id); - } + break; } + dev_dbg(data->dev, "%s: group_id=%u pte=0x%llx -> 0x%llx\n", + __func__, group_id, old_pte, pte); + #ifdef CONFIG_MALI_MEMORY_GROUP_MANAGER_DEBUG_FS atomic_inc(&data->groups[group_id].update_gpu_pte); #endif @@ -795,27 +521,10 @@ static u64 mgm_update_gpu_pte( static u64 mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, unsigned int group_id, int mmu_level, u64 pte) { - struct mgm_groups *const data = mgm_dev->data; - u64 old_pte; - if (INVALID_GROUP_ID(group_id)) return pte; - switch (group_id) { - case MGM_RESERVED_GROUP_ID: - case MGM_IMPORTED_MEMORY_GROUP_ID: - /* The reserved group doesn't set PBHA bits */ - /* TODO: Determine what to do with imported memory */ - break; - default: - /* All other groups will have PBHA bits, so clear them */ - old_pte = pte; - pte &= ~((u64)PBHA_BIT_MASK << PBHA_BIT_POS); - dev_dbg(data->dev, "%s: group_id=%u pte=0x%llx -> 0x%llx\n", __func__, group_id, - old_pte, pte); - } - - return pte; + return slc_wipe_pbha(pte); } static vm_fault_t mgm_vmf_insert_pfn_prot( @@ -847,49 +556,36 @@ static vm_fault_t mgm_vmf_insert_pfn_prot( return fault; } -static void mgm_resize_callback(void *data, int id, size_t size_allocated) +void pixel_mgm_slc_update_signal(struct memory_group_manager_device* mgm_dev, u64 signal) { - struct mgm_groups *const mgm_data = (struct mgm_groups *)data; - dev_dbg(mgm_data->dev, "Resize callback called, size_allocated: %zu\n", size_allocated); - /* Update the partition size for the group */ - atomic64_set(&mgm_data->pt_stats[id].size, size_allocated); + struct mgm_groups *const data = mgm_dev->data; + + slc_update_signal(&data->slc_data, signal); } +EXPORT_SYMBOL_GPL(pixel_mgm_slc_update_signal); -static int mgm_initialize_data(struct mgm_groups *mgm_data) +void pixel_mgm_slc_inc_refcount(struct memory_group_manager_device* mgm_dev) { - int i, ret; + struct mgm_groups *const data = mgm_dev->data; - /* +1 to include the required default group */ - const int ngroups = of_property_count_strings(mgm_data->dev->of_node, "groups") + 1; - if (WARN_ON(ngroups < 0) || - WARN_ON(ngroups > MEMORY_GROUP_MANAGER_NR_GROUPS)) { - mgm_data->ngroups = 0; - } else { - mgm_data->ngroups = ngroups; - } - mgm_data->npartitions = of_property_count_strings(mgm_data->dev->of_node, "pt_id"); + slc_inc_refcount(&data->slc_data); +} +EXPORT_SYMBOL_GPL(pixel_mgm_slc_inc_refcount); - mgm_data->pt_stats = kzalloc(mgm_data->npartitions * sizeof(struct partition_stats), GFP_KERNEL); - if (mgm_data->pt_stats == NULL) { - dev_err(mgm_data->dev, "failed to allocate space for pt_stats"); - ret = -ENOMEM; - goto out_err; - } +void pixel_mgm_slc_dec_refcount(struct memory_group_manager_device* mgm_dev) +{ + struct mgm_groups *const data = mgm_dev->data; - for (i = 0; i < mgm_data->npartitions; i++) { - struct partition_stats* stats; - u32 capacity_kb; - ret = of_property_read_u32_index(mgm_data->dev->of_node, "pt_size", i, &capacity_kb); - if (ret) { - dev_err(mgm_data->dev, "failed to read pt_size[%d]", i); - continue; - } + slc_dec_refcount(&data->slc_data); +} +EXPORT_SYMBOL_GPL(pixel_mgm_slc_dec_refcount); - stats = &mgm_data->pt_stats[i]; - // Convert from KB to bytes - stats->capacity = (u64)capacity_kb << 10; - atomic64_set(&stats->size, stats->capacity); - } +static int mgm_initialize_data(struct mgm_groups *mgm_data) +{ + int i, ret; + + if ((ret = slc_init_data(&mgm_data->slc_data, mgm_data->dev))) + goto out_err; for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) { atomic_set(&mgm_data->groups[i].size, 0); @@ -898,50 +594,8 @@ static int mgm_initialize_data(struct mgm_groups *mgm_data) atomic_set(&mgm_data->groups[i].insert_pfn, 0); atomic_set(&mgm_data->groups[i].update_gpu_pte, 0); #endif - - mgm_data->groups[i].pbha = MGM_PBHA_DEFAULT; - mgm_data->groups[i].base_pt = 0; - mgm_data->groups[i].pt_num = 0; - mgm_data->groups[i].active_pt_idx = 0; - mgm_data->groups[i].state = MGM_GROUP_STATE_NEW; - } - - /* Discover the partitions belonging to each memory group, skipping the reserved group */ - for (i = 1; i < mgm_data->ngroups; i++) { - /* Device tree has no description for the reserved group */ - int const dt_idx = i - 1; - - int err = of_property_read_u32_index( - mgm_data->dev->of_node, "group_base_pt", dt_idx, &mgm_data->groups[i].base_pt); - if (err) { - dev_warn(mgm_data->dev, "failed to read base pt index for group %d", i); - continue; - } - - err = of_property_read_u32_index( - mgm_data->dev->of_node, "group_pt_num", dt_idx, &mgm_data->groups[i].pt_num); - if (err) - dev_warn(mgm_data->dev, "failed to read pt number for group %d", i); } - /* - * Initialize SLC partitions. We don't enable partitions until - * we actually allocate memory to the corresponding memory - * group - */ - mgm_data->pt_handle = - pt_client_register(mgm_data->dev->of_node, (void*)mgm_data, &mgm_resize_callback); - - if (IS_ERR(mgm_data->pt_handle)) { - ret = PTR_ERR(mgm_data->pt_handle); - dev_err(mgm_data->dev, "pt_client_register returned %d\n", ret); - goto out_err; - } - - /* We don't use PBHA bits for the reserved memory group, and so - * it is effectively already initialized. - */ - mgm_data->groups[MGM_RESERVED_GROUP_ID].state = MGM_GROUP_STATE_ENABLED; if ((ret = mgm_debugfs_init(mgm_data))) goto out_err; @@ -949,20 +603,9 @@ static int mgm_initialize_data(struct mgm_groups *mgm_data) if ((ret = mgm_sysfs_init(mgm_data))) goto out_err; -#ifdef CONFIG_MALI_PIXEL_GPU_SLC - /* We enable the SLC partition by default to support dynamic SLC caching. - * Enabling will initialize the partition, by querying the pbha and assigning a ptid. - * We then immediately disable the partition, effectively resizing the group to zero, - * whilst still retaining other properties such as pbha. - */ - enable_partition(mgm_data, MGM_SLC_GROUP_ID); - disable_partition(mgm_data, MGM_SLC_GROUP_ID); -#endif - return ret; out_err: - kfree(mgm_data->pt_stats); return ret; } @@ -983,29 +626,9 @@ static void mgm_term_data(struct mgm_groups *data) dev_warn(data->dev, "%zu 9 order pages in group(%d) leaked\n", (size_t)atomic_read(&group->lp_size), i); - - /* Disable partition indices and free the partition */ - switch (group->state) { - - case MGM_GROUP_STATE_NEW: - case MGM_GROUP_STATE_DISABLED: - /* Nothing to do */ - break; - - case MGM_GROUP_STATE_ENABLED: - pt_client_disable(data->pt_handle, group_active_pt_id(data, i)); - break; - case MGM_GROUP_STATE_DISABLED_NOT_FREED: - pt_client_free(data->pt_handle, group_active_pt_id(data, i)); - break; - - default: - dev_err(data->dev, "Group %d in invalid state %d\n", - i, group->state); - } } - pt_client_unregister(data->pt_handle); + slc_term_data(&data->slc_data); mgm_debugfs_term(data); mgm_sysfs_term(data); diff --git a/mali_pixel/pixel_slc.c b/mali_pixel/pixel_slc.c new file mode 100644 index 0000000..45506ab --- /dev/null +++ b/mali_pixel/pixel_slc.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2024 Google LLC. + * + * Author: Jack Diver <diverj@google.com> + */ + +#include <linux/atomic.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/dev_printk.h> +/* Pixel integration includes */ +#include <soc/google/acpm_ipc_ctrl.h> +#include "pixel_slc.h" + + +/** + * DOC: PBHA + * + * Borr does not have "real" PBHA support. However, since we only use a 36-bit PA on the bus, + * AxADDR[39:36] is wired up to the GPU AxUSER[PBHA] field seen by the rest of the system. + * Those AxADDR bits come from [39:36] in the page descriptor. + * + * Odin and Turse have "real" PBHA support using a dedicated output signal and page descriptor field. + * The AxUSER[PBHA] field is driven by the GPU's PBHA signal, and AxADDR[39:36] is dropped. + * The page descriptor PBHA field is [62:59]. + * + * We could write to both of these locations, as each SoC only reads from its respective PBHA + * location with the other being ignored or dropped. + * + * b/148988078 contains confirmation of the above description. + */ +#if IS_ENABLED(CONFIG_SOC_GS101) +#define PBHA_BIT_POS (36) +#else +#define PBHA_BIT_POS (59) +#endif +#define PBHA_BIT_MASK (0xf) + +#define PARTITION_DISABLE_HYSTERESIS (msecs_to_jiffies(100)) +#define PARTITION_ENABLE_THRESHOLD (7) + + +/** + * partition_required() - Determine whether we require a partition to be enabled + * + * @pt: The partition to check. + * + * Check whether a partition meets the requirements for being enabled. + * + * Return: True, if the partition is required to be enabled, otherwise false. + */ +static bool partition_required(struct slc_partition *pt) +{ + lockdep_assert_held(&pt->lock); + + return (atomic_read(&pt->refcount) && (pt->signal >= PARTITION_ENABLE_THRESHOLD)) || + pt->pinned; +} + +/** + * pixel_atomic_dec_and_lock_irqsave - lock on reaching reference count zero + * + * @val: The atomic counter + * @lock: The spinlock in question + * @flags: Storage for the current interrupt enable state + * + * Decrements @val by 1, if the result is 0, locks @lock. + * + * Return: True if the lock was taken, false for all other cases. + */ +static int pixel_atomic_dec_and_lock_irqsave(atomic_t* val, spinlock_t* lock, unsigned long* flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(val, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(val)) + return 1; + spin_unlock_irqrestore(lock, *flags); + + return 0; +} + +/** + * slc_wipe_pbha - Clear any set PBHA bits from the pte. + * + * @pte: The pte to strip of PBHA. + * + * Return: The PTE with all PBHA stripped. + */ +u64 slc_wipe_pbha(u64 pte) +{ + return pte & ~((u64)PBHA_BIT_MASK << PBHA_BIT_POS); +} + +/** + * slc_set_pbha - Apply the PBHA to @pte. + * + * @data: The &struct slc_data tracking partition information. + * @pte: The pte to modify. + * + * Return: On success, returns a modified PTE. On failure the original PTE is returned. + */ +u64 slc_set_pbha(struct slc_data const *data, u64 pte) +{ + /* Clear any bits set in the PBHA range */ + pte = slc_wipe_pbha(pte); + + /* Apply the PBHA for the given virtual partition */ + return pte | (((u64)data->partition.pbha) & PBHA_BIT_MASK) << PBHA_BIT_POS; +} + +/** + * enable_partition - Enable @pt + * + * @data: The &struct slc_data tracking partition information. + * @pt: The &struct slc_partition representing the partition to enable. + */ +static void enable_partition(struct slc_data *data, struct slc_partition *pt) +{ + /* Skip if already enabled */ + if (pt->enabled) + return; + + (void)pt_client_enable(data->pt_handle, pt->index); + pt->enabled = true; + + dev_dbg(data->dev, "enabled partition %d", pt->index); +} + +/** + * disable_partition - Disable @pt + * + * @data: The &struct slc_data tracking partition information. + * @pt: The &struct slc_partition representing the partition to disable. + */ +static void disable_partition(struct slc_data *data, struct slc_partition *pt) +{ + /* Skip if not enabled */ + if (!pt->enabled) + return; + + pt_client_disable_no_free(data->pt_handle, pt->index); + pt->enabled = false; + + dev_dbg(data->dev, "disabled partition %d", pt->index); +} + +/** + * queue_disable_worker - Queue a delayed partition disable op + * + * @data: The &struct slc_data tracking partition information. + */ +static void queue_disable_worker(struct slc_data *data) +{ + queue_delayed_work(system_highpri_wq, &data->disable_work, PARTITION_DISABLE_HYSTERESIS); +} + +/** + * partition_disable_worker - Callback to lazily disable a partition + * + * @work: The &struct work_struct dequeued + */ +static void partition_disable_worker(struct work_struct *work) +{ + struct slc_data* data = container_of(work, struct slc_data, disable_work.work); + struct slc_partition *pt = &data->partition; + unsigned long flags; + + /* Complete any pending disable ops */ + spin_lock_irqsave(&pt->lock, flags); + + if (!partition_required(pt)) + disable_partition(data, pt); + + spin_unlock_irqrestore(&pt->lock, flags); +} + +/** + * slc_inc_refcount - Increase the partition reference count. + * + * @data: The &struct slc_data tracking partition information. + * + * If this is the first reference being taken, the partition will be enabled. + */ +void slc_inc_refcount(struct slc_data *data) +{ + struct slc_partition *pt = &data->partition; + + /* Try to re-enable the partition if this is the first reference */ + if (atomic_inc_return(&pt->refcount) == 1) { + unsigned long flags; + + spin_lock_irqsave(&pt->lock, flags); + + /* Enable the partition immediately if it's required */ + if (partition_required(pt)) + enable_partition(data, pt); + + spin_unlock_irqrestore(&pt->lock, flags); + } +} + +/** + * slc_dec_refcount - Decrease the partition reference count. + * + * @data: The &struct slc_data tracking partition information. + * + * If this is the last reference being released, the partition will be disabled. + */ +void slc_dec_refcount(struct slc_data *data) +{ + struct slc_partition *pt = &data->partition; + unsigned long flags; + + /* Disable the partition if this was the last reference */ + if (pixel_atomic_dec_and_lock_irqsave(&pt->refcount, &pt->lock, &flags)) { + + /* Lazily disable the partition if it's no longer required */ + if (!partition_required(pt)) + queue_disable_worker(data); + + spin_unlock_irqrestore(&pt->lock, flags); + } +} + +void slc_update_signal(struct slc_data *data, u64 signal) +{ + struct slc_partition *pt = &data->partition; + unsigned long flags; + + spin_lock_irqsave(&pt->lock, flags); + + /* Use ACPM signal when available */ + if (data->signal) + pt->signal = ioread64((u64 __iomem*)data->signal); + else + pt->signal = signal; + + if (partition_required(pt)) + /* Enable the partition immediately if it's required */ + enable_partition(data, pt); + else + /* Lazily disable the partition if it's no longer required */ + queue_disable_worker(data); + + spin_unlock_irqrestore(&pt->lock, flags); +} + +void slc_pin(struct slc_data *data, bool pin) +{ + struct slc_partition *pt = &data->partition; + unsigned long flags; + + spin_lock_irqsave(&pt->lock, flags); + + pt->pinned = pin; + if (pin) + enable_partition(data, pt); + else if (!partition_required(pt)) + queue_disable_worker(data); + + spin_unlock_irqrestore(&pt->lock, flags); +} + +/** + * init_partition - Register and initialize a partition with the SLC driver. + * + * @data: The &struct slc_data tracking partition information. + * @pt: The &struct slc_partition to store the configured partition information. + * @index: The index of the partition, relative to the DT node. + * + * Returns EINVAL on error, otherwise 0. + */ +static int init_partition(struct slc_data *data, struct slc_partition *pt, u32 index) +{ + ptid_t ptid; + ptpbha_t pbha; + int err = -EINVAL; + + ptid = pt_client_enable(data->pt_handle, index); + if (ptid == PT_PTID_INVALID) { + dev_err(data->dev, "failed to enable pt: %d\n", index); + goto err_exit; + } + + pbha = pt_pbha(data->dev->of_node, index); + if (pbha == PT_PBHA_INVALID) { + dev_err(data->dev, "failed to get PBHA for pt: %d\n", index); + goto err_exit; + } + + /* This retains the allocated ptid */ + pt_client_disable_no_free(data->pt_handle, index); + + /* Success */ + err = 0; + + *pt = (struct slc_partition) { + .index = index, + .ptid = ptid, + .pbha = pbha, + .enabled = false, + .refcount = ATOMIC_INIT(0), + .signal = 0, + .pinned = false, + }; + spin_lock_init(&pt->lock); + +err_exit: + return err; +} + + +/** + * term_partition - Disable and free a partition, unregistering it. + * + * @data: The &struct slc_data tracking partition information. + * @pt: The &struct slc_partition to terminate. + * + * Returns EINVAL on error, otherwise 0. + */ +static void term_partition(struct slc_data *data, struct slc_partition *pt) +{ + disable_partition(data, pt); + pt_client_free(data->pt_handle, pt->index); +} + +/** + * slc_init_data - Read all SLC partition information, init the partitions, and track within @data. + * + * @data: The &struct slc_data tracking partition information. + * @dev: The platform device associated with the parent node. + * + * Return: On success, returns 0. On failure an error code is returned. + */ +int slc_init_data(struct slc_data *data, struct device* dev) +{ + int ret = -EINVAL; + + if (data == NULL || dev == NULL) + goto err_exit; + + /* Inherit the platform device */ + data->dev = dev; + + INIT_DELAYED_WORK(&data->disable_work, partition_disable_worker); + + /* Register our node with the SLC driver. + * This detects our partitions defined within the DT. + */ + data->pt_handle = pt_client_register(data->dev->of_node, NULL, NULL); + if (IS_ERR(data->pt_handle)) { + ret = PTR_ERR(data->pt_handle); + dev_err(data->dev, "pt_client_register failed with: %d\n", ret); + goto err_exit; + } + + if (IS_ENABLED(PIXEL_GPU_SLC_ACPM_SIGNAL)) { + u32 size; + + /* Obtain a handle to the ACPM provided GPU partition signal */ + if ((ret = acpm_ipc_get_buffer("GPU_SIGNAL", &data->signal, &size))) { + dev_err(data->dev, "failed to retrieve SLC GPU signal: %d", ret); + goto err_exit; + } + + /* Validate the signal buffer size */ + if (size != sizeof(u64)) { + dev_err(data->dev, "SLC GPU signal size incorrect: %d", size); + goto err_exit; + } + } + + if ((ret = init_partition(data, &data->partition, 0))) + goto pt_init_err_exit; + + return 0; + +pt_init_err_exit: + pt_client_unregister(data->pt_handle); + +err_exit: + return ret; +} + +/** + * slc_term_data - Tear down SLC partitions and free tracking data. + * + * @data: The &struct slc_data tracking partition information. + */ +void slc_term_data(struct slc_data *data) +{ + /* Ensure all pending disable ops are complete */ + cancel_delayed_work_sync(&data->disable_work); + + term_partition(data, &data->partition); + + pt_client_unregister(data->pt_handle); +} diff --git a/mali_pixel/pixel_slc.h b/mali_pixel/pixel_slc.h new file mode 100644 index 0000000..cb8e90d --- /dev/null +++ b/mali_pixel/pixel_slc.h @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2024 Google LLC. + * + * Author: Jack Diver <diverj@google.com> + */ +#ifndef _PIXEL_SLC_H_ +#define _PIXEL_SLC_H_ + +#include <soc/google/pt.h> + +/** + * DOC: SLC partition management + * + * Key definitions: + * + Partition index - The unique index of a partition, relative to the dt node that owns it. + * This index is used when communicating with the underlying SLC driver. + * + ptid - This is the HW level ID associated with an enabled partition. These id's are allocated + * at partition enable time. The GPU driver will never directly use the ptid, but will + * track it. + * External analysis of the caching behavior (e.g. hit and eviction counters), are + * associated with a ptid, not a physical partition index. + * This driver attempts to hold on to any allocated ptids until driver termination to make + * profiling of caching performance easier. + * + PBHA - Acronym: Page Based Hardware Attributes. Every physical partition has a PBHA value + * associated with it. We insert these attributes into PTEs so that transactions with a + * page carry the PBHA within their high bits. + * Transactions with PBHA bits set are intercepted by the SLC, where the corresponding + * partition and it's caching behavior (Read/write alloc etc.) are looked up and applied to + * the transaction. + */ + +/** + * struct slc_partition - Structure for tracking partition state. + */ +struct slc_partition { + /** @index: The active partition ID for this virtual partition */ + u32 index; + + /** @ptid: The active partition ID for this virtual partition */ + ptid_t ptid; + + /** @pbha: The page based HW attributes for this partition */ + ptpbha_t pbha; + + /** @enabled: Is the partition currently enabled */ + bool enabled; + + /** @refcount: Reference count for this partition */ + atomic_t refcount; + + /** @lock: Lock protecting enable/disable ops on this partition */ + spinlock_t lock; + + /** @signal: Partition enable/disable signal from SLC governor */ + u64 signal; + + /** @pinned: Is the partition pinned to the enabled state */ + bool pinned; +}; + +/** + * struct slc_data - Structure for tracking SLC context. + */ +struct slc_data { + /** @pt_handle: Link to ACPM SLC partition data */ + struct pt_handle *pt_handle; + + /** @partition: Information specific to an individual SLC partition */ + struct slc_partition partition; + + /** @dev: Inherited pointer to device attached */ + struct device *dev; + + /** @disable_work: Work item used to queue lazy SLC partition disable ops. */ + struct delayed_work disable_work; + + /** @signal: Partition enable/disable signal from SLC governor. */ + char __iomem *signal; +}; + +int slc_init_data(struct slc_data *data, struct device* dev); + +void slc_term_data(struct slc_data *data); + +u64 slc_set_pbha(struct slc_data const *data, u64 pte); + +u64 slc_wipe_pbha(u64 pte); + +void slc_inc_refcount(struct slc_data *data); + +void slc_dec_refcount(struct slc_data *data); + +void slc_pin(struct slc_data *data, bool pin); + +void slc_update_signal(struct slc_data *data, u64 signal); + +#endif /* _PIXEL_SLC_H_ */ |