summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Diver <diverj@google.com>2024-03-13 16:33:34 +0000
committerJack Diver <diverj@google.com>2024-03-14 10:13:37 +0000
commitad771ce991d6e0855a90d15fa5993332edbbe183 (patch)
treee5713a696500e6d4892f1f9a06d3709badb23326
parent9052ae5d77f9ad5c8552ec915936a531cd899e2a (diff)
downloadgpu-ad771ce991d6e0855a90d15fa5993332edbbe183.tar.gz
[DO NOT MERGE ANYWHERE] Revert "mali_kbase: platform: Remove liveness based SLC"
Revert submission 2753879-gpu-slcv2-gs201 Reason for revert: Prebuild did not land before cutoff Reverted changes: /q/submissionid:2753879-gpu-slcv2-gs201 Bug: 329447972 Change-Id: I60d8cbb170affb4edc126aeee390b16935244b21
-rw-r--r--mali_kbase/platform/pixel/mali_kbase_config_platform.h21
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_slc.c451
2 files changed, 464 insertions, 8 deletions
diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
index 06b76ea..a0bf623 100644
--- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
@@ -302,6 +302,10 @@ struct gpu_dvfs_metrics_uid_stats;
* @dvfs.qos.bts.threshold: The G3D shader stack clock at which BTS will be enabled. Set via DT.
* @dvfs.qos.bts.scenario: The index of the BTS scenario to be used. Set via DT.
*
+ * @slc.lock: Synchronize updates to the SLC partition accounting variables.
+ * @slc.demand: The total demand for SLC space, an aggregation of each kctx's demand.
+ * @slc.usage: The total amount of SLC space used, an aggregation of each kctx's usage.
+ *
* @itmon.wq: A workqueue for ITMON page table search.
* @itmon.work: The work item for the above.
* @itmon.nb: The ITMON notifier block.
@@ -414,6 +418,12 @@ struct pixel_context {
} dvfs;
#endif /* CONFIG_MALI_MIDGARD_DVFS */
+ struct {
+ struct mutex lock;
+ u64 demand;
+ u64 usage;
+ } slc;
+
#if IS_ENABLED(CONFIG_EXYNOS_ITMON)
struct {
struct workqueue_struct *wq;
@@ -430,10 +440,21 @@ struct pixel_context {
*
* @kctx: Handle to the parent kctx
* @stats: Tracks the dvfs metrics for the UID associated with this context
+ *
+ * @slc.peak_demand: The parent context's maximum demand for SLC space
+ * @slc.peak_usage: The parent context's maximum use of SLC space
+ * @slc.idle_work: Work item used to queue SLC partition shrink upon context idle
+ * @slc.idle_work_cancelled: Flag for async cancellation of idle_work
*/
struct pixel_platform_data {
struct kbase_context *kctx;
struct gpu_dvfs_metrics_uid_stats* stats;
+ struct {
+ u64 peak_demand;
+ u64 peak_usage;
+ struct work_struct idle_work;
+ atomic_t idle_work_cancelled;
+ } slc;
};
#endif /* _KBASE_CONFIG_PLATFORM_H_ */
diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.c b/mali_kbase/platform/pixel/pixel_gpu_slc.c
index 8e46be1..d6cb131 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_slc.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_slc.c
@@ -17,6 +17,321 @@
#include "mali_kbase_config_platform.h"
#include "pixel_gpu_slc.h"
+struct dirty_region {
+ u64 first_vpfn;
+ u64 last_vpfn;
+ u64 dirty_pgds;
+};
+
+/**
+ * struct gpu_slc_liveness_update_info - Buffer info, and live ranges
+ *
+ * @buffer_va: Array of buffer base virtual addresses
+ * @buffer_sizes: Array of buffer sizes
+ * @buffer_count: Number of elements in the va and sizes buffers
+ * @live_ranges: Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for
+ * each buffer
+ * @live_ranges_count: Number of elements in the live ranges buffer
+ */
+struct gpu_slc_liveness_update_info {
+ u64* buffer_va;
+ u64* buffer_sizes;
+ u64 buffer_count;
+ struct kbase_pixel_gpu_slc_liveness_mark* live_ranges;
+ u64 live_ranges_count;
+};
+
+/**
+ * gpu_slc_lock_as - Lock the current process address space
+ *
+ * @kctx: The &struct kbase_context
+ */
+static void gpu_slc_lock_as(struct kbase_context *kctx)
+{
+ down_write(kbase_mem_get_process_mmap_lock());
+ kbase_gpu_vm_lock(kctx);
+}
+
+/**
+ * gpu_slc_unlock_as - Unlock the current process address space
+ *
+ * @kctx: The &struct kbase_context
+ */
+static void gpu_slc_unlock_as(struct kbase_context *kctx)
+{
+ kbase_gpu_vm_unlock(kctx);
+ up_write(kbase_mem_get_process_mmap_lock());
+}
+
+/**
+ * gpu_slc_in_group - Check whether the region is SLC cacheable
+ *
+ * @reg: The gpu memory region to check for an SLC cacheable memory group.
+ */
+static bool gpu_slc_in_group(struct kbase_va_region* reg)
+{
+ return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID;
+}
+
+/**
+ * gpu_slc_get_region - Find the gpu memory region from a virtual address
+ *
+ * @kctx: The &struct kbase_context
+ * @va: The base gpu virtual address of the region
+ *
+ * Return: On success, returns a valid memory region. On failure NULL is returned.
+ */
+static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va)
+{
+ struct kbase_va_region *reg;
+
+ if (!va)
+ goto invalid;
+
+ if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE))
+ goto invalid;
+
+ /* Find the region that the virtual address belongs to */
+ reg = kbase_region_tracker_find_region_base_address(kctx, va);
+
+ /* Validate the region */
+ if (kbase_is_region_invalid_or_free(reg))
+ goto invalid;
+
+ return reg;
+
+invalid:
+ dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va);
+ return NULL;
+}
+
+/**
+ * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable
+ *
+ * @kctx: The &struct kbase_context
+ * @reg: The gpu memory region migrate to an SLC cacheable memory group
+ * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
+ */
+static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg)
+{
+ int err;
+ u64 vpfn;
+ size_t page_nr;
+
+ KBASE_DEBUG_ASSERT(kctx);
+ KBASE_DEBUG_ASSERT(reg);
+
+ if (gpu_slc_in_group(reg)) {
+ return;
+ }
+
+ vpfn = reg->start_pfn;
+ page_nr = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn,
+ kbase_get_gpu_phy_pages(reg),
+ page_nr,
+ reg->flags,
+ MGM_SLC_GROUP_ID,
+ &dirty_reg->dirty_pgds);
+
+ /* Track the dirty region */
+ dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn);
+ dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr);
+
+ if (err)
+ dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err);
+ else
+ /* If everything is good, then set the new group on the region. */
+ reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID;
+}
+
+/**
+ * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region
+ *
+ * @kctx: The &struct kbase_context
+ * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
+ */
+static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg)
+{
+ size_t const dirty_page_nr =
+ (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn));
+
+ if (!dirty_page_nr)
+ return;
+
+ kbase_mmu_flush_invalidate_update_pages(
+ kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds);
+}
+
+/**
+ * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand.
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ */
+static void gpu_slc_resize_partition(struct kbase_device* kbdev)
+{
+ struct pixel_context *pc = kbdev->platform_context;
+
+ /* Request that the mgm select an SLC partition that fits our demand */
+ pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand);
+
+ dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand);
+}
+
+/**
+ * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition.
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ *
+ * Returns the size of the GPU's SLC partition.
+ */
+static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev)
+{
+ u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID);
+
+ dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size);
+
+ return partition_size;
+}
+
+/**
+ * gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free
+ * SLC space, and resizing the partition to meet demand.
+ *
+ * @kctx: The &struct kbase_context corresponding to a user space context which sent the liveness
+ * update
+ * @info: See struct gpu_slc_liveness_update_info
+ */
+static void gpu_slc_liveness_update(struct kbase_context* kctx,
+ struct gpu_slc_liveness_update_info* info)
+{
+ struct kbase_device* kbdev = kctx->kbdev;
+ struct pixel_context *pc = kbdev->platform_context;
+ struct pixel_platform_data *kctx_pd = kctx->platform_data;
+ struct dirty_region dirty_reg = {
+ .first_vpfn = U64_MAX,
+ .last_vpfn = 0,
+ .dirty_pgds = 0,
+ };
+ u64 current_usage = 0;
+ u64 current_demand = 0;
+ u64 free_space;
+ int i;
+
+ /* Lock the process address space before modifying ATE's */
+ gpu_slc_lock_as(kctx);
+
+ /* Synchronize updates to the partition size and usage */
+ mutex_lock(&pc->slc.lock);
+
+ dev_dbg(kbdev->dev, "pixel: buffer liveness update received");
+
+ /* Remove the usage and demand from the previous liveness update */
+ pc->slc.demand -= kctx_pd->slc.peak_demand;
+ pc->slc.usage -= kctx_pd->slc.peak_usage;
+ kctx_pd->slc.peak_demand = 0;
+ kctx_pd->slc.peak_usage = 0;
+
+ /* Calculate the remaining free space in the SLC partition (floored at 0) */
+ free_space = gpu_slc_get_partition_size(kbdev);
+ free_space -= min(free_space, pc->slc.usage);
+
+ for (i = 0; i < info->live_ranges_count; ++i)
+ {
+ struct kbase_va_region *reg;
+ u64 size;
+ u64 va;
+ u32 index = info->live_ranges[i].index;
+
+ if (unlikely(index >= info->buffer_count))
+ continue;
+
+ size = info->buffer_sizes[index];
+ va = info->buffer_va[index];
+
+ reg = gpu_slc_get_region(kctx, va);
+ if(!reg)
+ continue;
+
+ switch (info->live_ranges[i].type)
+ {
+ case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN:
+ /* Update demand as though there's no size limit */
+ current_demand += size;
+ kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand);
+
+ /* Check whether there's free space in the partition to store the buffer */
+ if (free_space >= current_usage + size)
+ gpu_slc_migrate_region(kctx, reg, &dirty_reg);
+
+ /* This may be true, even if the space calculation above returned false,
+ * as a previous call to this function may have migrated the region.
+ * In such a scenario, the current_usage may exceed the available free_space
+ * and we will be oversubscribed to the SLC partition.
+ * We could migrate the region back to the non-SLC group, but this would
+ * require an SLC flush, so for now we do nothing.
+ */
+ if (gpu_slc_in_group(reg)) {
+ current_usage += size;
+ kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage);
+ }
+ break;
+ case KBASE_PIXEL_GPU_LIVE_RANGE_END:
+ current_demand -= size;
+ if (gpu_slc_in_group(reg))
+ current_usage -= size;
+ break;
+ }
+ }
+ /* Perform single page table flush */
+ gpu_slc_flush_dirty_region(kctx, &dirty_reg);
+
+ /* Indicates a missing live range end marker */
+ WARN_ON_ONCE(current_demand != 0 || current_usage != 0);
+
+ /* Update the total usage and demand */
+ pc->slc.demand += kctx_pd->slc.peak_demand;
+ pc->slc.usage += kctx_pd->slc.peak_usage;
+
+ dev_dbg(kbdev->dev,
+ "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu",
+ kctx->id,
+ kctx_pd->slc.peak_demand,
+ kctx_pd->slc.peak_usage);
+ dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage);
+
+ /* Trigger partition resize based on the new demand */
+ gpu_slc_resize_partition(kctx->kbdev);
+
+ mutex_unlock(&pc->slc.lock);
+ gpu_slc_unlock_as(kctx);
+}
+
+static void gpu_slc_kctx_idle_worker(struct work_struct *work)
+{
+ struct pixel_platform_data *pd =
+ container_of(work, struct pixel_platform_data, slc.idle_work);
+ struct kbase_context *kctx = pd->kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct pixel_context *pc = kbdev->platform_context;
+
+ if (atomic_read(&pd->slc.idle_work_cancelled))
+ return;
+
+ mutex_lock(&pc->slc.lock);
+
+ pc->slc.demand -= pd->slc.peak_demand;
+ pc->slc.usage -= pd->slc.peak_usage;
+
+ pd->slc.peak_demand = 0;
+ pd->slc.peak_usage = 0;
+
+ gpu_slc_resize_partition(kctx->kbdev);
+
+ mutex_unlock(&pc->slc.lock);
+}
+
/**
* gpu_pixel_handle_buffer_liveness_update_ioctl() - See gpu_slc_liveness_update
*
@@ -30,8 +345,80 @@
int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
struct kbase_ioctl_buffer_liveness_update* update)
{
- (void)kctx, (void)update;
- return 0;
+ int err = -EINVAL;
+ struct gpu_slc_liveness_update_info info;
+ u64* buff = NULL;
+ u64 total_buff_size;
+
+ /* Compute the sizes of the user space arrays that we need to copy */
+ u64 const buffer_info_size = sizeof(u64) * update->buffer_count;
+ u64 const live_ranges_size =
+ sizeof(struct kbase_pixel_gpu_slc_liveness_mark) * update->live_ranges_count;
+
+ /* Guard against overflows and empty sizes */
+ if (!buffer_info_size || !live_ranges_size)
+ goto done;
+ if (U64_MAX / sizeof(u64) < update->buffer_count)
+ goto done;
+ if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count)
+ goto done;
+ /* Guard against nullptr */
+ if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address)
+ goto done;
+ /* Calculate the total buffer size required and detect overflows */
+ if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size)
+ goto done;
+
+ total_buff_size = buffer_info_size * 2 + live_ranges_size;
+
+ /* Allocate the memory we require to copy from user space */
+ buff = kmalloc(total_buff_size, GFP_KERNEL);
+ if (buff == NULL) {
+ dev_err(kctx->kbdev->dev, "pixel: failed to allocate buffer for liveness update");
+ err = -ENOMEM;
+ goto done;
+ }
+
+ /* Set up the info struct by pointing into the allocation. All 8 byte aligned */
+ info = (struct gpu_slc_liveness_update_info){
+ .buffer_va = buff,
+ .buffer_sizes = buff + update->buffer_count,
+ .buffer_count = update->buffer_count,
+ .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2),
+ .live_ranges_count = update->live_ranges_count,
+ };
+
+ /* Copy the data from user space */
+ err =
+ copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size);
+ if (err) {
+ dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges");
+ err = -EFAULT;
+ goto done;
+ }
+
+ err = copy_from_user(
+ info.buffer_sizes, u64_to_user_ptr(update->buffer_sizes_address), buffer_info_size);
+ if (err) {
+ dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer sizes");
+ err = -EFAULT;
+ goto done;
+ }
+
+ err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size);
+ if (err) {
+ dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses");
+ err = -EFAULT;
+ goto done;
+ }
+
+ /* Execute an slc update */
+ gpu_slc_liveness_update(kctx, &info);
+
+done:
+ kfree(buff);
+
+ return err;
}
/**
@@ -46,7 +433,10 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
*/
int gpu_slc_kctx_init(struct kbase_context *kctx)
{
- (void)kctx;
+ struct pixel_platform_data *pd = kctx->platform_data;
+
+ INIT_WORK(&pd->slc.idle_work, gpu_slc_kctx_idle_worker);
+
return 0;
}
@@ -54,10 +444,28 @@ int gpu_slc_kctx_init(struct kbase_context *kctx)
* gpu_slc_kctx_term() - Called when a kernel context is terminated
*
* @kctx: The &struct kbase_context that is being terminated
+ *
+ * Free up SLC space used by the buffers that this context owns.
*/
void gpu_slc_kctx_term(struct kbase_context *kctx)
{
- (void)kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct pixel_context *pc = kbdev->platform_context;
+ struct pixel_platform_data *kctx_pd = kctx->platform_data;
+
+ atomic_set(&kctx_pd->slc.idle_work_cancelled, 1);
+ cancel_work_sync(&kctx_pd->slc.idle_work);
+
+ mutex_lock(&pc->slc.lock);
+
+ /* Deduct the usage and demand, freeing that SLC space for the next update */
+ pc->slc.demand -= kctx_pd->slc.peak_demand;
+ pc->slc.usage -= kctx_pd->slc.peak_usage;
+
+ /* Trigger partition resize based on the new demand */
+ gpu_slc_resize_partition(kctx->kbdev);
+
+ mutex_unlock(&pc->slc.lock);
}
/**
@@ -67,7 +475,19 @@ void gpu_slc_kctx_term(struct kbase_context *kctx)
*/
void gpu_slc_kctx_active(struct kbase_context *kctx)
{
- (void)kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct pixel_platform_data *pd = kctx->platform_data;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* Asynchronously cancel the idle work, since we're in atomic context.
+ * The goal here is not to ensure that the idle_work doesn't run. Instead we need to ensure
+ * that any queued idle_work does not run *after* a liveness update for the now active kctx.
+ * Either the idle_work is executing now, and beats the cancellation check, or it runs later
+ * and early-exits at the cancellation check.
+ * In neither scenario will a 'cancelled' idle_work interfere with a later liveness update.
+ */
+ atomic_set(&pd->slc.idle_work_cancelled, 1);
}
/**
@@ -77,11 +497,22 @@ void gpu_slc_kctx_active(struct kbase_context *kctx)
*/
void gpu_slc_kctx_idle(struct kbase_context *kctx)
{
- (void)kctx;
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct pixel_platform_data *pd = kctx->platform_data;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ /* In the event that this line 'un-cancels' the idle_work, and that idle_work is executing,
+ * we will re-queue on the following line anyway, resulting in a unnecessary additional
+ * execution of the worker.
+ * While not optimal, it won't result in a correctness problem.
+ */
+ atomic_set(&pd->slc.idle_work_cancelled, 0);
+ queue_work(system_highpri_wq, &pd->slc.idle_work);
}
/**
- * gpu_slc_init - Initialize the SLC context for the GPU
+ * gpu_slc_init - Initialize the SLC partition for the GPU
*
* @kbdev: The &struct kbase_device for the GPU.
*
@@ -89,11 +520,15 @@ void gpu_slc_kctx_idle(struct kbase_context *kctx)
*/
int gpu_slc_init(struct kbase_device *kbdev)
{
+ struct pixel_context *pc = kbdev->platform_context;
+
+ mutex_init(&pc->slc.lock);
+
return 0;
}
/**
- * gpu_slc_term() - Terminates the Pixel GPU SLC context.
+ * gpu_slc_term() - Terminates the Pixel GPU SLC partition.
*
* @kbdev: The &struct kbase_device for the GPU.
*/