[DO NOT MERGE ANYWHERE] Revert "mali_kbase: platform: Remove liveness based SLC"

Revert submission 2753879-gpu-slcv2-gs201 Reason for revert: Prebuild did not land before cutoff Reverted changes: /q/submissionid:2753879-gpu-slcv2-gs201 Bug: 329447972 Change-Id: I60d8cbb170affb4edc126aeee390b16935244b21
author: Jack Diver <diverj@google.com> 2024-03-13 16:33:34 +0000
committer: Jack Diver <diverj@google.com> 2024-03-14 10:13:37 +0000
commit: ad771ce991d6e0855a90d15fa5993332edbbe183 (patch)
tree: e5713a696500e6d4892f1f9a06d3709badb23326
parent: 9052ae5d77f9ad5c8552ec915936a531cd899e2a (diff)
download: gpu-ad771ce991d6e0855a90d15fa5993332edbbe183.tar.gz
2 files changed, 464 insertions, 8 deletions
diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
index 06b76ea..a0bf623 100644
--- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
@@ -302,6 +302,10 @@ struct gpu_dvfs_metrics_uid_stats;
  * @dvfs.qos.bts.threshold: The G3D shader stack clock at which BTS will be enabled. Set via DT.
  * @dvfs.qos.bts.scenario:  The index of the BTS scenario to be used. Set via DT.
  *
+ * @slc.lock:           Synchronize updates to the SLC partition accounting variables.
+ * @slc.demand:         The total demand for SLC space, an aggregation of each kctx's demand.
+ * @slc.usage:          The total amount of SLC space used, an aggregation of each kctx's usage.
+ *
  * @itmon.wq:     A workqueue for ITMON page table search.
  * @itmon.work:   The work item for the above.
  * @itmon.nb:     The ITMON notifier block.
@@ -414,6 +418,12 @@ struct pixel_context {
 	} dvfs;
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
 
+	struct {
+		struct mutex lock;
+		u64 demand;
+		u64 usage;
+	} slc;
+
 #if IS_ENABLED(CONFIG_EXYNOS_ITMON)
 	struct {
 		struct workqueue_struct *wq;
@@ -430,10 +440,21 @@ struct pixel_context {
  *
  * @kctx:  Handle to the parent kctx
  * @stats: Tracks the dvfs metrics for the UID associated with this context
+ *
+ * @slc.peak_demand:         The parent context's maximum demand for SLC space
+ * @slc.peak_usage:          The parent context's maximum use of SLC space
+ * @slc.idle_work:           Work item used to queue SLC partition shrink upon context idle
+ * @slc.idle_work_cancelled: Flag for async cancellation of idle_work
  */
 struct pixel_platform_data {
 	struct kbase_context *kctx;
 	struct gpu_dvfs_metrics_uid_stats* stats;
+	struct {
+		u64 peak_demand;
+		u64 peak_usage;
+		struct work_struct idle_work;
+		atomic_t idle_work_cancelled;
+	} slc;
 };
 
 #endif /* _KBASE_CONFIG_PLATFORM_H_ */
diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.c b/mali_kbase/platform/pixel/pixel_gpu_slc.c
index 8e46be1..d6cb131 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_slc.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_slc.c
@@ -17,6 +17,321 @@
 #include "mali_kbase_config_platform.h"
 #include "pixel_gpu_slc.h"
 
+struct dirty_region {
+	u64 first_vpfn;
+	u64 last_vpfn;
+	u64 dirty_pgds;
+};
+
+/**
+ * struct gpu_slc_liveness_update_info - Buffer info, and live ranges
+ *
+ * @buffer_va:         Array of buffer base virtual addresses
+ * @buffer_sizes:      Array of buffer sizes
+ * @buffer_count:      Number of elements in the va and sizes buffers
+ * @live_ranges:       Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for
+ *                     each buffer
+ * @live_ranges_count: Number of elements in the live ranges buffer
+ */
+struct gpu_slc_liveness_update_info {
+	u64* buffer_va;
+	u64* buffer_sizes;
+	u64 buffer_count;
+	struct kbase_pixel_gpu_slc_liveness_mark* live_ranges;
+	u64 live_ranges_count;
+};
+
+/**
+ * gpu_slc_lock_as - Lock the current process address space
+ *
+ * @kctx:  The &struct kbase_context
+ */
+static void gpu_slc_lock_as(struct kbase_context *kctx)
+{
+	down_write(kbase_mem_get_process_mmap_lock());
+	kbase_gpu_vm_lock(kctx);
+}
+
+/**
+ * gpu_slc_unlock_as - Unlock the current process address space
+ *
+ * @kctx:  The &struct kbase_context
+ */
+static void gpu_slc_unlock_as(struct kbase_context *kctx)
+{
+	kbase_gpu_vm_unlock(kctx);
+	up_write(kbase_mem_get_process_mmap_lock());
+}
+
+/**
+ * gpu_slc_in_group - Check whether the region is SLC cacheable
+ *
+ * @reg:   The gpu memory region to check for an SLC cacheable memory group.
+ */
+static bool gpu_slc_in_group(struct kbase_va_region* reg)
+{
+	return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID;
+}
+
+/**
+ * gpu_slc_get_region - Find the gpu memory region from a virtual address
+ *
+ * @kctx:  The &struct kbase_context
+ * @va:    The base gpu virtual address of the region
+ *
+ * Return: On success, returns a valid memory region. On failure NULL is returned.
+ */
+static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va)
+{
+	struct kbase_va_region *reg;
+
+	if (!va)
+		goto invalid;
+
+	if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE))
+		goto invalid;
+
+	/* Find the region that the virtual address belongs to */
+	reg = kbase_region_tracker_find_region_base_address(kctx, va);
+
+	/* Validate the region */
+	if (kbase_is_region_invalid_or_free(reg))
+		goto invalid;
+
+	return reg;
+
+invalid:
+	dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va);
+	return NULL;
+}
+
+/**
+ * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable
+ *
+ * @kctx:      The &struct kbase_context
+ * @reg:       The gpu memory region migrate to an SLC cacheable memory group
+ * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
+ */
+static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg)
+{
+	int err;
+	u64 vpfn;
+	size_t page_nr;
+
+	KBASE_DEBUG_ASSERT(kctx);
+	KBASE_DEBUG_ASSERT(reg);
+
+	if (gpu_slc_in_group(reg)) {
+		return;
+	}
+
+	vpfn = reg->start_pfn;
+	page_nr = kbase_reg_current_backed_size(reg);
+
+	err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn,
+			kbase_get_gpu_phy_pages(reg),
+			page_nr,
+			reg->flags,
+			MGM_SLC_GROUP_ID,
+			&dirty_reg->dirty_pgds);
+
+	/* Track the dirty region */
+	dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn);
+	dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr);
+
+	if (err)
+		dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err);
+	else
+		/* If everything is good, then set the new group on the region. */
+		reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID;
+}
+
+/**
+ * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region
+ *
+ * @kctx:      The &struct kbase_context
+ * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
+ */
+static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg)
+{
+	size_t const dirty_page_nr =
+	    (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn));
+
+	if (!dirty_page_nr)
+		return;
+
+	kbase_mmu_flush_invalidate_update_pages(
+	    kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds);
+}
+
+/**
+ * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand.
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ */
+static void gpu_slc_resize_partition(struct kbase_device* kbdev)
+{
+	struct pixel_context *pc = kbdev->platform_context;
+
+	/* Request that the mgm select an SLC partition that fits our demand */
+	pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand);
+
+	dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand);
+}
+
+/**
+ * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition.
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ *
+ * Returns the size of the GPU's SLC partition.
+ */
+static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev)
+{
+	u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID);
+
+	dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size);
+
+	return partition_size;
+}
+
+/**
+ * gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free
+ *                           SLC space, and resizing the partition to meet demand.
+ *
+ * @kctx:   The &struct kbase_context corresponding to a user space context which sent the liveness
+ *          update
+ * @info:   See struct gpu_slc_liveness_update_info
+ */
+static void gpu_slc_liveness_update(struct kbase_context* kctx,
+                                    struct gpu_slc_liveness_update_info* info)
+{
+	struct kbase_device* kbdev = kctx->kbdev;
+	struct pixel_context *pc = kbdev->platform_context;
+	struct pixel_platform_data *kctx_pd = kctx->platform_data;
+	struct dirty_region dirty_reg = {
+		.first_vpfn = U64_MAX,
+		.last_vpfn = 0,
+		.dirty_pgds = 0,
+	};
+	u64 current_usage = 0;
+	u64 current_demand = 0;
+	u64 free_space;
+	int i;
+
+	/* Lock the process address space before modifying ATE's */
+	gpu_slc_lock_as(kctx);
+
+	/* Synchronize updates to the partition size and usage */
+	mutex_lock(&pc->slc.lock);
+
+	dev_dbg(kbdev->dev, "pixel: buffer liveness update received");
+
+	/* Remove the usage and demand from the previous liveness update */
+	pc->slc.demand -= kctx_pd->slc.peak_demand;
+	pc->slc.usage -= kctx_pd->slc.peak_usage;
+	kctx_pd->slc.peak_demand = 0;
+	kctx_pd->slc.peak_usage = 0;
+
+	/* Calculate the remaining free space in the SLC partition (floored at 0) */
+	free_space = gpu_slc_get_partition_size(kbdev);
+	free_space -= min(free_space, pc->slc.usage);
+
+	for (i = 0; i < info->live_ranges_count; ++i)
+	{
+		struct kbase_va_region *reg;
+                u64 size;
+                u64 va;
+		u32 index = info->live_ranges[i].index;
+
+		if (unlikely(index >= info->buffer_count))
+			continue;
+
+		size = info->buffer_sizes[index];
+		va = info->buffer_va[index];
+
+		reg = gpu_slc_get_region(kctx, va);
+		if(!reg)
+			continue;
+
+		switch (info->live_ranges[i].type)
+		{
+		case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN:
+			/* Update demand as though there's no size limit */
+			current_demand += size;
+			kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand);
+
+			/* Check whether there's free space in the partition to store the buffer */
+			if (free_space >= current_usage + size)
+				gpu_slc_migrate_region(kctx, reg, &dirty_reg);
+
+			/* This may be true, even if the space calculation above returned false,
+			 * as a previous call to this function may have migrated the region.
+			 * In such a scenario, the current_usage may exceed the available free_space
+			 * and we will be oversubscribed to the SLC partition.
+			 * We could migrate the region back to the non-SLC group, but this would
+			 * require an SLC flush, so for now we do nothing.
+			 */
+			if (gpu_slc_in_group(reg)) {
+				current_usage += size;
+				kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage);
+			}
+			break;
+		case KBASE_PIXEL_GPU_LIVE_RANGE_END:
+			current_demand -= size;
+			if (gpu_slc_in_group(reg))
+				current_usage -= size;
+			break;
+		}
+	}
+	/* Perform single page table flush */
+	gpu_slc_flush_dirty_region(kctx, &dirty_reg);
+
+	/* Indicates a missing live range end marker */
+	WARN_ON_ONCE(current_demand != 0 || current_usage != 0);
+
+	/* Update the total usage and demand */
+	pc->slc.demand += kctx_pd->slc.peak_demand;
+	pc->slc.usage += kctx_pd->slc.peak_usage;
+
+	dev_dbg(kbdev->dev,
+	        "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu",
+	        kctx->id,
+	        kctx_pd->slc.peak_demand,
+	        kctx_pd->slc.peak_usage);
+	dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage);
+
+	/* Trigger partition resize based on the new demand */
+	gpu_slc_resize_partition(kctx->kbdev);
+
+	mutex_unlock(&pc->slc.lock);
+	gpu_slc_unlock_as(kctx);
+}
+
+static void gpu_slc_kctx_idle_worker(struct work_struct *work)
+{
+	struct pixel_platform_data *pd =
+		container_of(work, struct pixel_platform_data, slc.idle_work);
+	struct kbase_context *kctx = pd->kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct pixel_context *pc = kbdev->platform_context;
+
+	if (atomic_read(&pd->slc.idle_work_cancelled))
+		return;
+
+	mutex_lock(&pc->slc.lock);
+
+	pc->slc.demand -= pd->slc.peak_demand;
+	pc->slc.usage -= pd->slc.peak_usage;
+
+	pd->slc.peak_demand = 0;
+	pd->slc.peak_usage = 0;
+
+	gpu_slc_resize_partition(kctx->kbdev);
+
+	mutex_unlock(&pc->slc.lock);
+}
+
 /**
  * gpu_pixel_handle_buffer_liveness_update_ioctl() - See gpu_slc_liveness_update
  *
@@ -30,8 +345,80 @@
 int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
                                                   struct kbase_ioctl_buffer_liveness_update* update)
 {
-	(void)kctx, (void)update;
-	return 0;
+	int err = -EINVAL;
+	struct gpu_slc_liveness_update_info info;
+	u64* buff = NULL;
+	u64 total_buff_size;
+
+	/* Compute the sizes of the user space arrays that we need to copy */
+	u64 const buffer_info_size = sizeof(u64) * update->buffer_count;
+	u64 const live_ranges_size =
+	    sizeof(struct kbase_pixel_gpu_slc_liveness_mark) * update->live_ranges_count;
+
+	/* Guard against overflows and empty sizes */
+	if (!buffer_info_size || !live_ranges_size)
+		goto done;
+	if (U64_MAX / sizeof(u64) < update->buffer_count)
+		goto done;
+	if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count)
+		goto done;
+	/* Guard against nullptr */
+	if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address)
+		goto done;
+	/* Calculate the total buffer size required and detect overflows */
+	if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size)
+		goto done;
+
+	total_buff_size = buffer_info_size * 2 + live_ranges_size;
+
+	/* Allocate the memory we require to copy from user space */
+	buff = kmalloc(total_buff_size, GFP_KERNEL);
+	if (buff == NULL) {
+		dev_err(kctx->kbdev->dev, "pixel: failed to allocate buffer for liveness update");
+		err = -ENOMEM;
+		goto done;
+	}
+
+	/* Set up the info struct by pointing into the allocation. All 8 byte aligned */
+	info = (struct gpu_slc_liveness_update_info){
+	    .buffer_va = buff,
+	    .buffer_sizes = buff + update->buffer_count,
+	    .buffer_count = update->buffer_count,
+	    .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2),
+	    .live_ranges_count = update->live_ranges_count,
+	};
+
+	/* Copy the data from user space */
+	err =
+	    copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size);
+	if (err) {
+		dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges");
+		err = -EFAULT;
+		goto done;
+	}
+
+	err = copy_from_user(
+	    info.buffer_sizes, u64_to_user_ptr(update->buffer_sizes_address), buffer_info_size);
+	if (err) {
+		dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer sizes");
+		err = -EFAULT;
+		goto done;
+	}
+
+	err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size);
+	if (err) {
+		dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses");
+		err = -EFAULT;
+		goto done;
+	}
+
+	/* Execute an slc update */
+	gpu_slc_liveness_update(kctx, &info);
+
+done:
+	kfree(buff);
+
+	return err;
 }
 
 /**
@@ -46,7 +433,10 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
  */
 int gpu_slc_kctx_init(struct kbase_context *kctx)
 {
-	(void)kctx;
+	struct pixel_platform_data *pd = kctx->platform_data;
+
+	INIT_WORK(&pd->slc.idle_work, gpu_slc_kctx_idle_worker);
+
 	return 0;
 }
 
@@ -54,10 +444,28 @@ int gpu_slc_kctx_init(struct kbase_context *kctx)
  * gpu_slc_kctx_term() - Called when a kernel context is terminated
  *
  * @kctx: The &struct kbase_context that is being terminated
+ *
+ * Free up SLC space used by the buffers that this context owns.
  */
 void gpu_slc_kctx_term(struct kbase_context *kctx)
 {
-	(void)kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct pixel_context *pc = kbdev->platform_context;
+	struct pixel_platform_data *kctx_pd = kctx->platform_data;
+
+	atomic_set(&kctx_pd->slc.idle_work_cancelled, 1);
+	cancel_work_sync(&kctx_pd->slc.idle_work);
+
+	mutex_lock(&pc->slc.lock);
+
+	/* Deduct the usage and demand, freeing that SLC space for the next update */
+	pc->slc.demand -= kctx_pd->slc.peak_demand;
+	pc->slc.usage -= kctx_pd->slc.peak_usage;
+
+	/* Trigger partition resize based on the new demand */
+	gpu_slc_resize_partition(kctx->kbdev);
+
+	mutex_unlock(&pc->slc.lock);
 }
 
 /**
@@ -67,7 +475,19 @@ void gpu_slc_kctx_term(struct kbase_context *kctx)
  */
 void gpu_slc_kctx_active(struct kbase_context *kctx)
 {
-	(void)kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct pixel_platform_data *pd = kctx->platform_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* Asynchronously cancel the idle work, since we're in atomic context.
+	 * The goal here is not to ensure that the idle_work doesn't run. Instead we need to ensure
+	 * that any  queued idle_work does not run *after* a liveness update for the now active kctx.
+	 * Either the idle_work is executing now, and beats the cancellation check, or it runs later
+	 * and early-exits at the cancellation check.
+	 * In neither scenario will a 'cancelled' idle_work interfere with a later liveness update.
+	 */
+	atomic_set(&pd->slc.idle_work_cancelled, 1);
 }
 
 /**
@@ -77,11 +497,22 @@ void gpu_slc_kctx_active(struct kbase_context *kctx)
  */
 void gpu_slc_kctx_idle(struct kbase_context *kctx)
 {
-	(void)kctx;
+	struct kbase_device *kbdev = kctx->kbdev;
+	struct pixel_platform_data *pd = kctx->platform_data;
+
+	lockdep_assert_held(&kbdev->hwaccess_lock);
+
+	/* In the event that this line 'un-cancels' the idle_work, and that idle_work is executing,
+	 * we will re-queue on the following line anyway, resulting in a unnecessary additional
+	 * execution of the worker.
+	 * While not optimal, it won't result in a correctness problem.
+	 */
+	atomic_set(&pd->slc.idle_work_cancelled, 0);
+	queue_work(system_highpri_wq, &pd->slc.idle_work);
 }
 
 /**
- * gpu_slc_init - Initialize the SLC context for the GPU
+ * gpu_slc_init - Initialize the SLC partition for the GPU
  *
  * @kbdev: The &struct kbase_device for the GPU.
  *
@@ -89,11 +520,15 @@ void gpu_slc_kctx_idle(struct kbase_context *kctx)
  */
 int gpu_slc_init(struct kbase_device *kbdev)
 {
+	struct pixel_context *pc = kbdev->platform_context;
+
+	mutex_init(&pc->slc.lock);
+
 	return 0;
 }
 
 /**
- * gpu_slc_term() - Terminates the Pixel GPU SLC context.
+ * gpu_slc_term() - Terminates the Pixel GPU SLC partition.
  *
  * @kbdev: The &struct kbase_device for the GPU.
  */
author	Jack Diver <diverj@google.com>	2024-03-13 16:33:34 +0000
committer	Jack Diver <diverj@google.com>	2024-03-14 10:13:37 +0000
commit	ad771ce991d6e0855a90d15fa5993332edbbe183 (patch)
tree	e5713a696500e6d4892f1f9a06d3709badb23326
parent	9052ae5d77f9ad5c8552ec915936a531cd899e2a (diff)
download	gpu-ad771ce991d6e0855a90d15fa5993332edbbe183.tar.gz