summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h45
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_js_backend.c3
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c4
-rw-r--r--mali_kbase/mali_kbase_config.c9
-rw-r--r--mali_kbase/mali_kbase_config.h18
-rw-r--r--mali_kbase/platform/pixel/Kbuild10
-rw-r--r--mali_kbase/platform/pixel/mali_kbase_config_platform.h35
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu.c1
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_slc.c403
-rw-r--r--mali_kbase/platform/pixel/pixel_gpu_slc.h25
-rw-r--r--mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu7
-rw-r--r--mali_pixel/Kbuild6
-rw-r--r--mali_pixel/memory_group_manager.c539
-rw-r--r--mali_pixel/pixel_slc.c405
-rw-r--r--mali_pixel/pixel_slc.h98
15 files changed, 753 insertions, 855 deletions
diff --git a/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h b/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h
index b575c79..2a27f4f 100644
--- a/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h
+++ b/common/include/uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h
@@ -7,49 +7,10 @@
#ifndef _UAPI_PIXEL_MEMORY_GROUP_MANAGER_H_
#define _UAPI_PIXEL_MEMORY_GROUP_MANAGER_H_
-/**
- * enum pixel_mgm_group_id - Symbolic names for used memory groups
- */
-enum pixel_mgm_group_id
-{
- /* The Mali driver requires that allocations made on one of the groups
- * are not treated specially.
- */
- MGM_RESERVED_GROUP_ID = 0,
-
- /* Group for memory that should be cached in the system level cache. */
- MGM_SLC_GROUP_ID = 1,
-
- /* Group for memory explicitly allocated in SLC. */
- MGM_SLC_EXPLICIT_GROUP_ID = 2,
+void pixel_mgm_slc_update_signal(struct memory_group_manager_device* mgm_dev, u64 signal);
- /* Imported memory is handled by the allocator of the memory, and the Mali
- * DDK will request a group_id for such memory via mgm_get_import_memory_id().
- * We specify which group we want to use for this here.
- */
- MGM_IMPORTED_MEMORY_GROUP_ID = (MEMORY_GROUP_MANAGER_NR_GROUPS - 1),
-};
+void pixel_mgm_slc_inc_refcount(struct memory_group_manager_device* mgm_dev);
-/**
- * pixel_mgm_query_group_size - Query the current size of a memory group
- *
- * @mgm_dev: The memory group manager through which the request is being made.
- * @group_id: Memory group to query.
- *
- * Returns the actual size of the memory group's active partition
- */
-extern u64 pixel_mgm_query_group_size(struct memory_group_manager_device* mgm_dev,
- enum pixel_mgm_group_id group_id);
-
-/**
- * pixel_mgm_resize_group_to_fit - Resize a memory group to meet @demand, if possible
- *
- * @mgm_dev: The memory group manager through which the request is being made.
- * @group_id: Memory group for which we will change the backing partition.
- * @demand: The demanded space from the memory group.
- */
-extern void pixel_mgm_resize_group_to_fit(struct memory_group_manager_device* mgm_dev,
- enum pixel_mgm_group_id group_id,
- u64 demand);
+void pixel_mgm_slc_dec_refcount(struct memory_group_manager_device* mgm_dev);
#endif /* _UAPI_PIXEL_MEMORY_GROUP_MANAGER_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_js_backend.c b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
index be72c4a..304737f 100644
--- a/mali_kbase/backend/gpu/mali_kbase_js_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_js_backend.c
@@ -244,6 +244,9 @@ static enum hrtimer_restart timer_callback(struct hrtimer *timer)
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+ /* Inform platform of scheduling event */
+ kbasep_platform_event_tick_tock(kbdev);
+
return HRTIMER_NORESTART;
}
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 8ead416..9e47a1e 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -43,6 +43,7 @@
#include <mali_kbase_gpu_metrics.h>
#include <csf/mali_kbase_csf_trace_buffer.h>
#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
+#include <uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h>
/* Value to indicate that a queue group is not groups_to_schedule list */
#define KBASEP_GROUP_PREPARED_SEQ_NUM_INVALID (U32_MAX)
@@ -6685,6 +6686,9 @@ static int kbase_csf_scheduler_kthread(void *data)
dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration.");
wake_up_all(&kbdev->csf.event_wait);
+
+ /* Inform platform of scheduling event */
+ kbasep_platform_event_tick_tock(kbdev);
}
/* Wait for the other thread, that signaled the exit, to call kthread_stop() */
diff --git a/mali_kbase/mali_kbase_config.c b/mali_kbase/mali_kbase_config.c
index 72080a7..669e1c3 100644
--- a/mali_kbase/mali_kbase_config.c
+++ b/mali_kbase/mali_kbase_config.c
@@ -119,6 +119,15 @@ void kbasep_platform_event_work_end(void *param)
platform_funcs_p->platform_handler_work_end_func(param);
}
+void kbasep_platform_event_tick_tock(struct kbase_device *kbdev)
+{
+ struct kbase_platform_funcs_conf *platform_funcs_p;
+
+ platform_funcs_p = (struct kbase_platform_funcs_conf*)PLATFORM_FUNCS;
+ if (platform_funcs_p && platform_funcs_p->platform_handler_tick_tock)
+ platform_funcs_p->platform_handler_tick_tock(kbdev);
+}
+
int kbasep_platform_fw_config_init(struct kbase_device *kbdev)
{
struct kbase_platform_funcs_conf *platform_funcs_p;
diff --git a/mali_kbase/mali_kbase_config.h b/mali_kbase/mali_kbase_config.h
index 7f6d3ed..549c170 100644
--- a/mali_kbase/mali_kbase_config.h
+++ b/mali_kbase/mali_kbase_config.h
@@ -138,6 +138,14 @@ struct kbase_platform_funcs_conf {
*/
void (*platform_handler_context_idle)(struct kbase_context *kctx);
/**
+ * platform_handler_tick_tock - Platform specific callback when a scheduler tick/tock occurs.
+ *
+ * @kbdev: kbase_device pointer
+ *
+ * Context: Process context
+ */
+ void (*platform_handler_tick_tock)(struct kbase_device *kbdev);
+ /**
* platform_handler_work_begin_func - Platform specific handler whose
* function changes depending on the
* backend used.
@@ -634,6 +642,16 @@ void kbasep_platform_event_work_begin(void *param);
void kbasep_platform_event_work_end(void *param);
/**
+ * kbasep_platform_tick_tock - Platform specific callback when a scheduler tick/tock occurs.
+ *
+ * @kbdev: kbase_device pointer
+ *
+ * Function calls a platform defined routine if specified in the configuration attributes.
+ *
+ */
+void kbasep_platform_event_tick_tock(struct kbase_device *kbdev);
+
+/**
* kbasep_platform_fw_config_init - Platform specific callback to configure FW
*
* @kbdev - kbase_device pointer
diff --git a/mali_kbase/platform/pixel/Kbuild b/mali_kbase/platform/pixel/Kbuild
index 6d6b0a8..c35c0be 100644
--- a/mali_kbase/platform/pixel/Kbuild
+++ b/mali_kbase/platform/pixel/Kbuild
@@ -45,8 +45,14 @@ mali_kbase-y += \
platform/$(MALI_PLATFORM_DIR)/pixel_gpu_sscd.o
endif
-mali_kbase-$(CONFIG_MALI_PIXEL_GPU_SLC) += \
- platform/$(MALI_PLATFORM_DIR)/pixel_gpu_slc.o
+ifeq ($(CONFIG_MALI_PIXEL_GPU_SLC),y)
+ mali_kbase-y += \
+ platform/$(MALI_PLATFORM_DIR)/pixel_gpu_slc.o
+
+ ifeq ($(CONFIG_SOC_ZUMA),y)
+ ccflags-y += -DPIXEL_GPU_SLC_ACPM_SIGNAL
+ endif
+endif
mali_kbase-$(CONFIG_MALI_CSF_SUPPORT) += \
platform/$(MALI_PLATFORM_DIR)/pixel_gpu_debug.o
diff --git a/mali_kbase/platform/pixel/mali_kbase_config_platform.h b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
index 991e5d4..4ad3318 100644
--- a/mali_kbase/platform/pixel/mali_kbase_config_platform.h
+++ b/mali_kbase/platform/pixel/mali_kbase_config_platform.h
@@ -320,15 +320,12 @@ struct gpu_dvfs_metrics_uid_stats;
* @dvfs.qos.bts.threshold: The G3D shader stack clock at which BTS will be enabled. Set via DT.
* @dvfs.qos.bts.scenario: The index of the BTS scenario to be used. Set via DT.
*
- * @slc.lock: Synchronize updates to the SLC partition accounting variables.
- * @slc.demand: The total demand for SLC space, an aggregation of each kctx's demand.
- * @slc.usage: The total amount of SLC space used, an aggregation of each kctx's usage.
- *
* @itmon.wq: A workqueue for ITMON page table search.
* @itmon.work: The work item for the above.
* @itmon.nb: The ITMON notifier block.
* @itmon.pa: The faulting physical address.
* @itmon.active: Active count, non-zero while a search is active.
+ * @slc_demand: Tracks demand for SLC space
*/
struct pixel_context {
struct kbase_device *kbdev;
@@ -461,12 +458,6 @@ struct pixel_context {
} dvfs;
#endif /* CONFIG_MALI_MIDGARD_DVFS */
- struct {
- struct mutex lock;
- u64 demand;
- u64 usage;
- } slc;
-
#if IS_ENABLED(CONFIG_EXYNOS_ITMON)
struct {
struct workqueue_struct *wq;
@@ -476,28 +467,26 @@ struct pixel_context {
atomic_t active;
} itmon;
#endif
+#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL
+ atomic_t slc_demand;
+#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */
};
/**
* struct pixel_platform_data - Per kbase_context Pixel specific platform data
*
- * @kctx: Handle to the parent kctx
- * @stats: Tracks the dvfs metrics for the UID associated with this context
- *
- * @slc.peak_demand: The parent context's maximum demand for SLC space
- * @slc.peak_usage: The parent context's maximum use of SLC space
- * @slc.idle_work: Work item used to queue SLC partition shrink upon context idle
- * @slc.idle_work_cancelled: Flag for async cancellation of idle_work
+ * @kctx: Handle to the parent kctx
+ * @stats: Tracks the dvfs metrics for the UID associated with this context
+ * @slc_vote: Tracks whether this context is voting for slc
+ * @slc_demand: Tracks demand for SLC space
*/
struct pixel_platform_data {
struct kbase_context *kctx;
struct gpu_dvfs_metrics_uid_stats* stats;
- struct {
- u64 peak_demand;
- u64 peak_usage;
- struct work_struct idle_work;
- atomic_t idle_work_cancelled;
- } slc;
+ int slc_vote;
+#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL
+ atomic_t slc_demand;
+#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */
};
#endif /* _KBASE_CONFIG_PLATFORM_H_ */
diff --git a/mali_kbase/platform/pixel/pixel_gpu.c b/mali_kbase/platform/pixel/pixel_gpu.c
index 5ac8807..1ae6db6 100644
--- a/mali_kbase/platform/pixel/pixel_gpu.c
+++ b/mali_kbase/platform/pixel/pixel_gpu.c
@@ -328,6 +328,7 @@ struct kbase_platform_funcs_conf platform_funcs = {
#endif /* CONFIG_MALI_MIDGARD_DVFS */
.platform_handler_context_active = &gpu_slc_kctx_active,
.platform_handler_context_idle = &gpu_slc_kctx_idle,
+ .platform_handler_tick_tock = &gpu_slc_tick_tock,
.platform_fw_cfg_init_func = &gpu_fw_cfg_init,
.platform_handler_core_dump_func = &gpu_sscd_dump,
};
diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.c b/mali_kbase/platform/pixel/pixel_gpu_slc.c
index 62a4e9e..1aac4d8 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_slc.c
+++ b/mali_kbase/platform/pixel/pixel_gpu_slc.c
@@ -17,189 +17,53 @@
#include "mali_kbase_config_platform.h"
#include "pixel_gpu_slc.h"
-struct dirty_region {
- u64 first_vpfn;
- u64 last_vpfn;
- u64 dirty_pgds;
-};
+#include <uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h>
/**
- * struct gpu_slc_liveness_update_info - Buffer info, and live ranges
- *
- * @buffer_va: Array of buffer base virtual addresses
- * @buffer_sizes: Array of buffer sizes
- * @buffer_count: Number of elements in the va and sizes buffers
- * @live_ranges: Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for
- * each buffer
- * @live_ranges_count: Number of elements in the live ranges buffer
+ * enum slc_vote_state - Whether a context is voting for SLC
*/
-struct gpu_slc_liveness_update_info {
- u64* buffer_va;
- u64* buffer_sizes;
- u64 buffer_count;
- struct kbase_pixel_gpu_slc_liveness_mark* live_ranges;
- u64 live_ranges_count;
+enum slc_vote_state {
+ /** @IDLE: Idle, not voting for SLC */
+ IDLE = 0,
+ /** @VOTING: Active, voting for SLC */
+ VOTING = 1,
};
/**
- * gpu_slc_lock_as - Lock the current process address space
+ * transition() - Try to transition from one value to another
*
- * @kctx: The &struct kbase_context
- */
-static void gpu_slc_lock_as(struct kbase_context *kctx)
-{
- down_write(kbase_mem_get_process_mmap_lock());
- kbase_gpu_vm_lock_with_pmode_sync(kctx);
-}
-
-/**
- * gpu_slc_unlock_as - Unlock the current process address space
- *
- * @kctx: The &struct kbase_context
- */
-static void gpu_slc_unlock_as(struct kbase_context *kctx)
-{
- kbase_gpu_vm_unlock_with_pmode_sync(kctx);
- up_write(kbase_mem_get_process_mmap_lock());
-}
-
-/**
- * gpu_slc_in_group - Check whether the region is SLC cacheable
+ * @v: Value to transition
+ * @old: Starting state to transition from
+ * @new: Destination state to transition to
*
- * @reg: The gpu memory region to check for an SLC cacheable memory group.
+ * Return: Whether the transition was successful
*/
-static bool gpu_slc_in_group(struct kbase_va_region* reg)
+static bool transition(int *v, int old, int new)
{
- return reg->gpu_alloc->group_id == MGM_SLC_GROUP_ID;
-}
-
-/**
- * gpu_slc_get_region - Find the gpu memory region from a virtual address
- *
- * @kctx: The &struct kbase_context
- * @va: The base gpu virtual address of the region
- *
- * Return: On success, returns a valid memory region. On failure NULL is returned.
- */
-static struct kbase_va_region* gpu_slc_get_region(struct kbase_context *kctx, u64 va)
-{
- struct kbase_va_region *reg;
-
- if (!va)
- goto invalid;
+ bool const cond = *v == old;
- if ((va & ~PAGE_MASK) && (va >= PAGE_SIZE))
- goto invalid;
+ if (cond)
+ *v = new;
- /* Find the region that the virtual address belongs to */
- reg = kbase_region_tracker_find_region_base_address(kctx, va);
-
- /* Validate the region */
- if (kbase_is_region_invalid_or_free(reg))
- goto invalid;
- /* Might be shrunk */
- if (kbase_is_region_shrinkable(reg))
- goto invalid;
- /* Driver internal alloc */
- if (kbase_va_region_is_no_user_free(reg))
- goto invalid;
-
- return reg;
-
-invalid:
- dev_dbg(kctx->kbdev->dev, "pixel: failed to find valid region for gpu_va: %llu", va);
- return NULL;
-}
-
-/**
- * gpu_slc_migrate_region - Add PBHA that will make the pages SLC cacheable
- *
- * @kctx: The &struct kbase_context
- * @reg: The gpu memory region migrate to an SLC cacheable memory group
- * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
- */
-static void gpu_slc_migrate_region(struct kbase_context *kctx, struct kbase_va_region *reg, struct dirty_region *dirty_reg)
-{
- int err;
- u64 vpfn;
- size_t page_nr;
-
- KBASE_DEBUG_ASSERT(kctx);
- KBASE_DEBUG_ASSERT(reg);
-
- if (gpu_slc_in_group(reg)) {
- return;
- }
-
- vpfn = reg->start_pfn;
- page_nr = kbase_reg_current_backed_size(reg);
-
- err = kbase_mmu_update_pages_no_flush(kctx->kbdev, &kctx->mmu, vpfn,
- kbase_get_gpu_phy_pages(reg),
- page_nr,
- reg->flags,
- MGM_SLC_GROUP_ID,
- &dirty_reg->dirty_pgds);
-
- /* Track the dirty region */
- dirty_reg->first_vpfn = min(dirty_reg->first_vpfn, vpfn);
- dirty_reg->last_vpfn = max(dirty_reg->last_vpfn, vpfn + page_nr);
-
- if (err)
- dev_warn(kctx->kbdev->dev, "pixel: failed to move region to SLC: %d", err);
- else
- /* If everything is good, then set the new group on the region. */
- reg->gpu_alloc->group_id = MGM_SLC_GROUP_ID;
-}
-
-/**
- * gpu_slc_flush_dirty_region - Perform an MMU flush for a dirty page region
- *
- * @kctx: The &struct kbase_context
- * @dirty_reg: The &struct dirty_region containing the extent of the dirty page table entries
- */
-static void gpu_slc_flush_dirty_region(struct kbase_context *kctx, struct dirty_region *dirty_reg)
-{
- size_t const dirty_page_nr =
- (dirty_reg->last_vpfn - min(dirty_reg->first_vpfn, dirty_reg->last_vpfn));
-
- if (!dirty_page_nr)
- return;
-
- kbase_mmu_flush_invalidate_update_pages(
- kctx->kbdev, kctx, dirty_reg->first_vpfn, dirty_page_nr, dirty_reg->dirty_pgds);
-}
-
-/**
- * gpu_slc_resize_partition - Attempt to resize the GPU's SLC partition to meet demand.
- *
- * @kbdev: The &struct kbase_device for the GPU.
- */
-static void gpu_slc_resize_partition(struct kbase_device* kbdev)
-{
- struct pixel_context *pc = kbdev->platform_context;
-
- /* Request that the mgm select an SLC partition that fits our demand */
- pixel_mgm_resize_group_to_fit(kbdev->mgm_dev, MGM_SLC_GROUP_ID, pc->slc.demand);
-
- dev_dbg(kbdev->dev, "pixel: resized GPU SLC partition to meet demand: %llu", pc->slc.demand);
+ return cond;
}
+#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL
/**
- * gpu_slc_get_partition_size - Query the current size of the GPU's SLC partition.
- *
- * @kbdev: The &struct kbase_device for the GPU.
+ * struct gpu_slc_liveness_update_info - Buffer info, and live ranges
*
- * Returns the size of the GPU's SLC partition.
+ * @buffer_sizes: Array of buffer sizes
+ * @buffer_count: Number of elements in the va and sizes buffers
+ * @live_ranges: Array of &struct kbase_pixel_gpu_slc_liveness_mark denoting live ranges for
+ * each buffer
+ * @live_ranges_count: Number of elements in the live ranges buffer
*/
-static u64 gpu_slc_get_partition_size(struct kbase_device* kbdev)
-{
- u64 const partition_size = pixel_mgm_query_group_size(kbdev->mgm_dev, MGM_SLC_GROUP_ID);
-
- dev_dbg(kbdev->dev, "pixel: GPU SLC partition partition size: %llu", partition_size);
-
- return partition_size;
-}
+struct gpu_slc_liveness_update_info {
+ u64* buffer_sizes;
+ u64 buffer_count;
+ struct kbase_pixel_gpu_slc_liveness_mark* live_ranges;
+ u64 live_ranges_count;
+};
/**
* gpu_slc_liveness_update - Respond to a liveness update by trying to put the new buffers into free
@@ -215,127 +79,40 @@ static void gpu_slc_liveness_update(struct kbase_context* kctx,
struct kbase_device* kbdev = kctx->kbdev;
struct pixel_context *pc = kbdev->platform_context;
struct pixel_platform_data *kctx_pd = kctx->platform_data;
- struct dirty_region dirty_reg = {
- .first_vpfn = U64_MAX,
- .last_vpfn = 0,
- .dirty_pgds = 0,
- };
- u64 current_usage = 0;
- u64 current_demand = 0;
- u64 free_space;
+ s64 current_demand = 0, peak_demand = 0, old_demand;
int i;
- /* Lock the process address space before modifying ATE's */
- gpu_slc_lock_as(kctx);
-
- /* Synchronize updates to the partition size and usage */
- mutex_lock(&pc->slc.lock);
-
dev_dbg(kbdev->dev, "pixel: buffer liveness update received");
- /* Remove the usage and demand from the previous liveness update */
- pc->slc.demand -= kctx_pd->slc.peak_demand;
- pc->slc.usage -= kctx_pd->slc.peak_usage;
- kctx_pd->slc.peak_demand = 0;
- kctx_pd->slc.peak_usage = 0;
-
- /* Calculate the remaining free space in the SLC partition (floored at 0) */
- free_space = gpu_slc_get_partition_size(kbdev);
- free_space -= min(free_space, pc->slc.usage);
-
for (i = 0; i < info->live_ranges_count; ++i)
{
- struct kbase_va_region *reg;
u64 size;
- u64 va;
u32 index = info->live_ranges[i].index;
if (unlikely(index >= info->buffer_count))
continue;
size = info->buffer_sizes[index];
- va = info->buffer_va[index];
-
- reg = gpu_slc_get_region(kctx, va);
- if(!reg)
- continue;
switch (info->live_ranges[i].type)
{
case KBASE_PIXEL_GPU_LIVE_RANGE_BEGIN:
/* Update demand as though there's no size limit */
current_demand += size;
- kctx_pd->slc.peak_demand = max(kctx_pd->slc.peak_demand, current_demand);
-
- /* Check whether there's free space in the partition to store the buffer */
- if (free_space >= current_usage + size)
- gpu_slc_migrate_region(kctx, reg, &dirty_reg);
-
- /* This may be true, even if the space calculation above returned false,
- * as a previous call to this function may have migrated the region.
- * In such a scenario, the current_usage may exceed the available free_space
- * and we will be oversubscribed to the SLC partition.
- * We could migrate the region back to the non-SLC group, but this would
- * require an SLC flush, so for now we do nothing.
- */
- if (gpu_slc_in_group(reg)) {
- current_usage += size;
- kctx_pd->slc.peak_usage = max(kctx_pd->slc.peak_usage, current_usage);
- }
+ peak_demand = max(peak_demand, current_demand);
break;
case KBASE_PIXEL_GPU_LIVE_RANGE_END:
current_demand -= size;
- if (gpu_slc_in_group(reg))
- current_usage -= size;
break;
}
}
- /* Perform single page table flush */
- gpu_slc_flush_dirty_region(kctx, &dirty_reg);
/* Indicates a missing live range end marker */
- WARN_ON_ONCE(current_demand != 0 || current_usage != 0);
-
- /* Update the total usage and demand */
- pc->slc.demand += kctx_pd->slc.peak_demand;
- pc->slc.usage += kctx_pd->slc.peak_usage;
+ WARN_ON_ONCE(current_demand != 0);
- dev_dbg(kbdev->dev,
- "pixel: kctx_%d, peak_demand: %llu, peak_usage: %llu",
- kctx->id,
- kctx_pd->slc.peak_demand,
- kctx_pd->slc.peak_usage);
- dev_dbg(kbdev->dev, "pixel: kbdev, demand: %llu, usage: %llu", pc->slc.demand, pc->slc.usage);
-
- /* Trigger partition resize based on the new demand */
- gpu_slc_resize_partition(kctx->kbdev);
-
- mutex_unlock(&pc->slc.lock);
- gpu_slc_unlock_as(kctx);
-}
-
-static void gpu_slc_kctx_idle_worker(struct work_struct *work)
-{
- struct pixel_platform_data *pd =
- container_of(work, struct pixel_platform_data, slc.idle_work);
- struct kbase_context *kctx = pd->kctx;
- struct kbase_device *kbdev = kctx->kbdev;
- struct pixel_context *pc = kbdev->platform_context;
-
- if (atomic_read(&pd->slc.idle_work_cancelled))
- return;
-
- mutex_lock(&pc->slc.lock);
-
- pc->slc.demand -= pd->slc.peak_demand;
- pc->slc.usage -= pd->slc.peak_usage;
-
- pd->slc.peak_demand = 0;
- pd->slc.peak_usage = 0;
-
- gpu_slc_resize_partition(kctx->kbdev);
-
- mutex_unlock(&pc->slc.lock);
+ /* Update the demand */
+ old_demand = atomic_xchg(&kctx_pd->slc_demand, peak_demand);
+ atomic_add(peak_demand - old_demand, &pc->slc_demand);
}
/**
@@ -369,13 +146,13 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
if (U64_MAX / sizeof(struct kbase_pixel_gpu_slc_liveness_mark) < update->live_ranges_count)
goto done;
/* Guard against nullptr */
- if (!update->live_ranges_address || !update->buffer_va_address || !update->buffer_sizes_address)
+ if (!update->live_ranges_address || !update->buffer_sizes_address)
goto done;
/* Calculate the total buffer size required and detect overflows */
- if ((U64_MAX - live_ranges_size) / 2 < buffer_info_size)
+ if ((U64_MAX - live_ranges_size) < buffer_info_size)
goto done;
- total_buff_size = buffer_info_size * 2 + live_ranges_size;
+ total_buff_size = buffer_info_size + live_ranges_size;
/* Allocate the memory we require to copy from user space */
buff = kmalloc(total_buff_size, GFP_KERNEL);
@@ -387,16 +164,15 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
/* Set up the info struct by pointing into the allocation. All 8 byte aligned */
info = (struct gpu_slc_liveness_update_info){
- .buffer_va = buff,
- .buffer_sizes = buff + update->buffer_count,
+ .buffer_sizes = buff,
.buffer_count = update->buffer_count,
- .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count * 2),
+ .live_ranges = (struct kbase_pixel_gpu_slc_liveness_mark*)(buff + update->buffer_count),
.live_ranges_count = update->live_ranges_count,
};
/* Copy the data from user space */
- err =
- copy_from_user(info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size);
+ err = copy_from_user(
+ info.live_ranges, u64_to_user_ptr(update->live_ranges_address), live_ranges_size);
if (err) {
dev_err(kctx->kbdev->dev, "pixel: failed to copy live ranges");
err = -EFAULT;
@@ -411,13 +187,6 @@ int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
goto done;
}
- err = copy_from_user(info.buffer_va, u64_to_user_ptr(update->buffer_va_address), buffer_info_size);
- if (err) {
- dev_err(kctx->kbdev->dev, "pixel: failed to copy buffer addresses");
- err = -EFAULT;
- goto done;
- }
-
/* Execute an slc update */
gpu_slc_liveness_update(kctx, &info);
@@ -426,6 +195,7 @@ done:
return err;
}
+#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */
/**
* gpu_slc_kctx_init() - Called when a kernel context is created
@@ -439,10 +209,7 @@ done:
*/
int gpu_slc_kctx_init(struct kbase_context *kctx)
{
- struct pixel_platform_data *pd = kctx->platform_data;
-
- INIT_WORK(&pd->slc.idle_work, gpu_slc_kctx_idle_worker);
-
+ (void)kctx;
return 0;
}
@@ -450,28 +217,23 @@ int gpu_slc_kctx_init(struct kbase_context *kctx)
* gpu_slc_kctx_term() - Called when a kernel context is terminated
*
* @kctx: The &struct kbase_context that is being terminated
- *
- * Free up SLC space used by the buffers that this context owns.
*/
void gpu_slc_kctx_term(struct kbase_context *kctx)
{
- struct kbase_device *kbdev = kctx->kbdev;
- struct pixel_context *pc = kbdev->platform_context;
- struct pixel_platform_data *kctx_pd = kctx->platform_data;
-
- atomic_set(&kctx_pd->slc.idle_work_cancelled, 1);
- cancel_work_sync(&kctx_pd->slc.idle_work);
-
- mutex_lock(&pc->slc.lock);
-
- /* Deduct the usage and demand, freeing that SLC space for the next update */
- pc->slc.demand -= kctx_pd->slc.peak_demand;
- pc->slc.usage -= kctx_pd->slc.peak_usage;
+ struct pixel_platform_data *pd = kctx->platform_data;
- /* Trigger partition resize based on the new demand */
- gpu_slc_resize_partition(kctx->kbdev);
+ /* Contexts can be terminated without being idled first */
+ if (transition(&pd->slc_vote, VOTING, IDLE))
+ pixel_mgm_slc_dec_refcount(kctx->kbdev->mgm_dev);
- mutex_unlock(&pc->slc.lock);
+#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL
+ {
+ struct pixel_context* pc = kctx->kbdev->platform_context;
+ /* Deduct the usage and demand, freeing that SLC space for the next update */
+ u64 kctx_demand = atomic_xchg(&pd->slc_demand, 0);
+ atomic_sub(kctx_demand, &pc->slc_demand);
+ }
+#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */
}
/**
@@ -481,19 +243,12 @@ void gpu_slc_kctx_term(struct kbase_context *kctx)
*/
void gpu_slc_kctx_active(struct kbase_context *kctx)
{
- struct kbase_device *kbdev = kctx->kbdev;
struct pixel_platform_data *pd = kctx->platform_data;
- lockdep_assert_held(&kbdev->hwaccess_lock);
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
- /* Asynchronously cancel the idle work, since we're in atomic context.
- * The goal here is not to ensure that the idle_work doesn't run. Instead we need to ensure
- * that any queued idle_work does not run *after* a liveness update for the now active kctx.
- * Either the idle_work is executing now, and beats the cancellation check, or it runs later
- * and early-exits at the cancellation check.
- * In neither scenario will a 'cancelled' idle_work interfere with a later liveness update.
- */
- atomic_set(&pd->slc.idle_work_cancelled, 1);
+ if (transition(&pd->slc_vote, IDLE, VOTING))
+ pixel_mgm_slc_inc_refcount(kctx->kbdev->mgm_dev);
}
/**
@@ -503,22 +258,34 @@ void gpu_slc_kctx_active(struct kbase_context *kctx)
*/
void gpu_slc_kctx_idle(struct kbase_context *kctx)
{
- struct kbase_device *kbdev = kctx->kbdev;
struct pixel_platform_data *pd = kctx->platform_data;
- lockdep_assert_held(&kbdev->hwaccess_lock);
+ lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
+
+ if (transition(&pd->slc_vote, VOTING, IDLE))
+ pixel_mgm_slc_dec_refcount(kctx->kbdev->mgm_dev);
+}
- /* In the event that this line 'un-cancels' the idle_work, and that idle_work is executing,
- * we will re-queue on the following line anyway, resulting in a unnecessary additional
- * execution of the worker.
- * While not optimal, it won't result in a correctness problem.
- */
- atomic_set(&pd->slc.idle_work_cancelled, 0);
- queue_work(system_highpri_wq, &pd->slc.idle_work);
+/**
+ * gpu_slc_tick_tock() - Called when a GPU scheduling kick occurs
+ *
+ * @kbdev: The &struct kbase_device for the GPU.
+ */
+void gpu_slc_tick_tock(struct kbase_device *kbdev)
+{
+#ifndef PIXEL_GPU_SLC_ACPM_SIGNAL
+ struct pixel_context* pc = kbdev->platform_context;
+ /* Threshold of 4MB */
+ u64 signal = atomic_read(&pc->slc_demand) / (4 << 20);
+
+ pixel_mgm_slc_update_signal(kbdev->mgm_dev, signal);
+#else
+ pixel_mgm_slc_update_signal(kbdev->mgm_dev, 0);
+#endif /* PIXEL_GPU_SLC_ACPM_SIGNAL */
}
/**
- * gpu_slc_init - Initialize the SLC partition for the GPU
+ * gpu_slc_init - Initialize the SLC context for the GPU
*
* @kbdev: The &struct kbase_device for the GPU.
*
@@ -526,15 +293,11 @@ void gpu_slc_kctx_idle(struct kbase_context *kctx)
*/
int gpu_slc_init(struct kbase_device *kbdev)
{
- struct pixel_context *pc = kbdev->platform_context;
-
- mutex_init(&pc->slc.lock);
-
return 0;
}
/**
- * gpu_slc_term() - Terminates the Pixel GPU SLC partition.
+ * gpu_slc_term() - Terminates the Pixel GPU SLC context.
*
* @kbdev: The &struct kbase_device for the GPU.
*/
diff --git a/mali_kbase/platform/pixel/pixel_gpu_slc.h b/mali_kbase/platform/pixel/pixel_gpu_slc.h
index 82d0779..8a59df0 100644
--- a/mali_kbase/platform/pixel/pixel_gpu_slc.h
+++ b/mali_kbase/platform/pixel/pixel_gpu_slc.h
@@ -8,9 +8,6 @@
#define _PIXEL_GPU_SLC_H_
#ifdef CONFIG_MALI_PIXEL_GPU_SLC
-int gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
- struct kbase_ioctl_buffer_liveness_update* update);
-
int gpu_slc_init(struct kbase_device *kbdev);
void gpu_slc_term(struct kbase_device *kbdev);
@@ -22,13 +19,9 @@ void gpu_slc_kctx_term(struct kbase_context *kctx);
void gpu_slc_kctx_active(struct kbase_context *kctx);
void gpu_slc_kctx_idle(struct kbase_context *kctx);
-#else
-static int __maybe_unused gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
- struct kbase_ioctl_buffer_liveness_update* update)
-{
- return (void)kctx, (void)update, 0;
-}
+void gpu_slc_tick_tock(struct kbase_device *kbdev);
+#else
static int __maybe_unused gpu_slc_init(struct kbase_device *kbdev) { return (void)kbdev, 0; }
static void __maybe_unused gpu_slc_term(struct kbase_device *kbdev) { (void)kbdev; }
@@ -40,6 +33,20 @@ static void __maybe_unused gpu_slc_kctx_term(struct kbase_context* kctx) { (void
static void __maybe_unused gpu_slc_kctx_active(struct kbase_context *kctx) { (void)kctx; }
static void __maybe_unused gpu_slc_kctx_idle(struct kbase_context *kctx) { (void)kctx; }
+
+static void __maybe_unused gpu_slc_tick_tock(struct kbase_device *kbdev) { (void)kbdev; }
#endif /* CONFIG_MALI_PIXEL_GPU_SLC */
+#if defined(CONFIG_MALI_PIXEL_GPU_SLC) && !defined(PIXEL_GPU_SLC_ACPM_SIGNAL)
+int
+gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
+ struct kbase_ioctl_buffer_liveness_update* update);
+#else
+static int __maybe_unused gpu_pixel_handle_buffer_liveness_update_ioctl(struct kbase_context* kctx,
+ struct kbase_ioctl_buffer_liveness_update* update)
+{
+ return (void)kctx, (void)update, 0;
+}
+#endif
+
#endif /* _PIXEL_GPU_SLC_H_ */
diff --git a/mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu b/mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu
new file mode 100644
index 0000000..1d3bc11
--- /dev/null
+++ b/mali_pixel/Documentation/ABI/testing/sysfs-kernel-pixel_stat-gpu
@@ -0,0 +1,7 @@
+What: /sys/kernel/pixel_stat/gpu/mem/slc_pin_partition
+Date: Feb 2024
+Contact: "Jack Diver" <diverj@google.com>
+Description:
+ Write-only node to manually pin the SLC partition in the enabled
+ state. This useful when profiling SLC performance.
+
diff --git a/mali_pixel/Kbuild b/mali_pixel/Kbuild
index d20ce03..4f65a95 100644
--- a/mali_pixel/Kbuild
+++ b/mali_pixel/Kbuild
@@ -25,7 +25,7 @@ CONFIG_MALI_MEMORY_GROUP_MANAGER ?= m
CONFIG_MALI_PRIORITY_CONTROL_MANAGER ?= m
CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR ?= m
CONFIG_MALI_PIXEL_STATS ?= m
-CONFIG_MALI_PIXEL_GPU_SLC=y
+CONFIG_MALI_PIXEL_GPU_SLC ?= y
mali_pixel-objs :=
@@ -39,6 +39,7 @@ endif
ifeq ($(CONFIG_MALI_MEMORY_GROUP_MANAGER),m)
DEFINES += -DCONFIG_MALI_MEMORY_GROUP_MANAGER
mali_pixel-objs += memory_group_manager.o
+ mali_pixel-objs += pixel_slc.o
endif
ifeq ($(CONFIG_MALI_PRIORITY_CONTROL_MANAGER),m)
DEFINES += -DCONFIG_MALI_PRIORITY_CONTROL_MANAGER
@@ -50,6 +51,9 @@ ifeq ($(CONFIG_MALI_PROTECTED_MEMORY_ALLOCATOR),m)
endif
ifeq ($(CONFIG_MALI_PIXEL_GPU_SLC),y)
DEFINES += -DCONFIG_MALI_PIXEL_GPU_SLC
+ ifeq ($(CONFIG_SOC_ZUMA),y)
+ DEFINES += -DPIXEL_GPU_SLC_ACPM_SIGNAL
+ endif
endif
# Use our defines when compiling, and include mali platform module headers
diff --git a/mali_pixel/memory_group_manager.c b/mali_pixel/memory_group_manager.c
index 0c9a241..9076a65 100644
--- a/mali_pixel/memory_group_manager.c
+++ b/mali_pixel/memory_group_manager.c
@@ -23,7 +23,7 @@
#include <linux/memory_group_manager.h>
-#include <soc/google/pt.h>
+#include "pixel_slc.h"
#include <uapi/gpu/arm/midgard/platform/pixel/pixel_memory_group_manager.h>
@@ -31,29 +31,30 @@
#define ORDER_SMALL_PAGE 0
#define ORDER_LARGE_PAGE const_ilog2(NUM_PAGES_IN_2MB_LARGE_PAGE)
-/* Borr does not have "real" PBHA support. However, since we only use a 36-bit PA on the bus,
- * AxADDR[39:36] is wired up to the GPU AxUSER[PBHA] field seen by the rest of the system.
- * Those AxADDR bits come from [39:36] in the page descriptor.
- *
- * Odin and Turse have "real" PBHA support using a dedicated output signal and page descriptor field.
- * The AxUSER[PBHA] field is driven by the GPU's PBHA signal, and AxADDR[39:36] is dropped.
- * The page descriptor PBHA field is [62:59].
- *
- * We could write to both of these locations, as each SoC only reads from its respective PBHA
- * location with the other being ignored or dropped.
- *
- * b/148988078 contains confirmation of the above description.
+/**
+ * enum mgm_group_id - Symbolic names for used memory groups
*/
-#if IS_ENABLED(CONFIG_SOC_GS101)
-#define PBHA_BIT_POS (36)
-#else
-#define PBHA_BIT_POS (59)
-#endif
-#define PBHA_BIT_MASK (0xf)
+enum mgm_group_id
+{
+ /**
+ * @MGM_RESERVED_GROUP_ID: The Mali driver requires that allocations made on one of the
+ * groups are not treated specially.
+ */
+ MGM_RESERVED_GROUP_ID = 0,
-#define MGM_PBHA_DEFAULT 0
+ /**
+ * @MGM_SLC_GROUP_ID: Group for memory that should be cached in the system level cache.
+ */
+ MGM_SLC_GROUP_ID = 1,
-#define MGM_SENTINEL_PT_SIZE U64_MAX
+ /**
+ * @MGM_IMPORTED_MEMORY_GROUP_ID: Imported memory is handled by the allocator of the memory,
+ * and the Mali DDK will request a group_id for such memory
+ * via mgm_get_import_memory_id(). We specify which group we
+ * want to use for this here.
+ */
+ MGM_IMPORTED_MEMORY_GROUP_ID = (MEMORY_GROUP_MANAGER_NR_GROUPS - 1),
+};
#define INVALID_GROUP_ID(group_id) \
WARN_ON((group_id) >= MEMORY_GROUP_MANAGER_NR_GROUPS)
@@ -81,13 +82,6 @@ static inline vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma,
* @lp_size: The number of allocated large(2MB) pages
* @insert_pfn: The number of calls to map pages for CPU access.
* @update_gpu_pte: The number of calls to update GPU page table entries.
- * @ptid: The active partition ID for this group
- * @pbha: The PBHA bits assigned to this group,
- * @base_pt: The base partition ID available to this group.
- * @pt_num: The number of partitions available to this group.
- * @active_pt_idx: The relative index for the partition backing the group.
- * Different from the absolute ptid.
- * @state: The lifecycle state of the partition associated with this group
* This structure allows page allocation information to be displayed via
* debugfs. Display is organized per group with small and large sized pages.
*/
@@ -98,30 +92,6 @@ struct mgm_group {
atomic_t insert_pfn;
atomic_t update_gpu_pte;
#endif
-
- ptid_t ptid;
- ptpbha_t pbha;
-
- u32 base_pt;
- u32 pt_num;
- u32 active_pt_idx;
- enum {
- MGM_GROUP_STATE_NEW = 0,
- MGM_GROUP_STATE_ENABLED = 10,
- MGM_GROUP_STATE_DISABLED_NOT_FREED = 20,
- MGM_GROUP_STATE_DISABLED = 30,
- } state;
-};
-
-/**
- * struct partition_stats - Structure for tracking sizing of a partition
- *
- * @capacity: The total capacity of each partition
- * @size: The current size of each partition
- */
-struct partition_stats {
- u64 capacity;
- atomic64_t size;
};
/**
@@ -130,26 +100,22 @@ struct partition_stats {
* @groups: To keep track of the number of allocated pages of all groups
* @ngroups: Number of groups actually used
* @npartitions: Number of partitions used by all groups combined
- * @pt_stats: The sizing info for each partition
* @dev: device attached
- * @pt_handle: Link to SLC partition data
* @kobj: &sruct kobject used for linking to pixel_stats_sysfs node
* @mgm_debugfs_root: debugfs root directory of memory group manager
+ * @slc_data: To track GPU SLC partitions.
*
* This structure allows page allocation information to be displayed via
* debugfs. Display is organized per group with small and large sized pages.
*/
struct mgm_groups {
struct mgm_group groups[MEMORY_GROUP_MANAGER_NR_GROUPS];
- size_t ngroups;
- size_t npartitions;
- struct partition_stats *pt_stats;
struct device *dev;
- struct pt_handle *pt_handle;
struct kobject kobj;
#ifdef CONFIG_MALI_MEMORY_GROUP_MANAGER_DEBUG_FS
struct dentry *mgm_debugfs_root;
#endif
+ struct slc_data slc_data;
};
/*
@@ -158,13 +124,6 @@ struct mgm_groups {
#ifdef CONFIG_MALI_MEMORY_GROUP_MANAGER_DEBUG_FS
-static int mgm_debugfs_state_get(void *data, u64 *val)
-{
- struct mgm_group *group = data;
- *val = (u64)group->state;
- return 0;
-}
-
static int mgm_debugfs_size_get(void *data, u64 *val)
{
struct mgm_group *group = data;
@@ -193,8 +152,6 @@ static int mgm_debugfs_update_gpu_pte_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_state, mgm_debugfs_state_get,
- NULL, "%llu\n");
DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_size, mgm_debugfs_size_get,
NULL, "%llu\n");
DEFINE_SIMPLE_ATTRIBUTE(fops_mgm_lp_size, mgm_debugfs_lp_size_get,
@@ -218,7 +175,6 @@ static struct {
const char *name;
const struct file_operations *fops;
} attribs[] = {
- { "state", &fops_mgm_state},
{ "size", &fops_mgm_size},
{ "lp_size", &fops_mgm_lp_size},
{ "insert_pfn", &fops_mgm_insert_pfn},
@@ -298,6 +254,8 @@ extern struct kobject *pixel_stat_gpu_kobj;
#define MGM_ATTR_RO(_name) \
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+#define MGM_ATTR_WO(_name) \
+ static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
static ssize_t total_page_count_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -340,10 +298,31 @@ static ssize_t large_page_count_show(struct kobject *kobj,
}
MGM_ATTR_RO(large_page_count);
+static ssize_t slc_pin_partition_store(struct kobject* kobj,
+ struct kobj_attribute* attr,
+ const char* buf,
+ size_t count)
+{
+ struct mgm_groups *data = container_of(kobj, struct mgm_groups, kobj);
+ bool pin;
+
+ if (!data)
+ return -ENODEV;
+
+ if (kstrtobool(buf, &pin))
+ return -EINVAL;
+
+ slc_pin(&data->slc_data, pin);
+
+ return count;
+}
+MGM_ATTR_WO(slc_pin_partition);
+
static struct attribute *mgm_attrs[] = {
&total_page_count_attr.attr,
&small_page_count_attr.attr,
&large_page_count_attr.attr,
+ &slc_pin_partition_attr.attr,
NULL,
};
ATTRIBUTE_GROUPS(mgm);
@@ -392,20 +371,6 @@ static void mgm_sysfs_term(struct mgm_groups *data)
#endif /* CONFIG_MALI_PIXEL_STATS */
-static int group_pt_id(struct mgm_groups *data, enum pixel_mgm_group_id group_id, int pt_index)
-{
- struct mgm_group *group = &data->groups[group_id];
- if (WARN_ON_ONCE(pt_index >= group->pt_num))
- return 0;
-
- return group->base_pt + pt_index;
-}
-
-static int group_active_pt_id(struct mgm_groups *data, enum pixel_mgm_group_id group_id)
-{
- return group_pt_id(data, group_id, data->groups[group_id].active_pt_idx);
-}
-
static atomic64_t total_gpu_pages = ATOMIC64_INIT(0);
static atomic_t* get_size_counter(struct memory_group_manager_device* mgm_dev, unsigned int group_id, unsigned int order)
@@ -434,26 +399,7 @@ static void update_size(struct memory_group_manager_device *mgm_dev, unsigned in
atomic_inc(size);
atomic64_add(1 << order, &total_gpu_pages);
} else {
- if (atomic_dec_return(size) < 0) {
- /* b/289501175
- * Pages are often 'migrated' to the SLC group, which needs special
- * accounting.
- *
- * TODO: Remove after SLC MGM decoupling b/290354607
- */
- if (!WARN_ON(group_id != MGM_SLC_GROUP_ID)) {
- /* Undo the dec, and instead decrement the reserved group counter.
- * This is still making the assumption that the migration came from
- * the reserved group. Currently this is always true, however it
- * might not be in future. It would be invasive and costly to track
- * where every page came from, so instead this will be fixed as part
- * of the b/290354607 effort.
- */
- atomic_inc(size);
- update_size(mgm_dev, MGM_RESERVED_GROUP_ID, order, alloc);
- return;
- }
- }
+ WARN_ON(atomic_dec_return(size) < 0);
atomic64_sub(1 << order, &total_gpu_pages);
}
@@ -462,185 +408,6 @@ static void update_size(struct memory_group_manager_device *mgm_dev, unsigned in
pr_warn("total_gpu_pages %lld\n", atomic64_read(&total_gpu_pages));
}
-static void pt_size_invalidate(struct mgm_groups* data, int pt_idx)
-{
- /* Set the size to a known sentinel value so that we can later detect an update */
- atomic64_set(&data->pt_stats[pt_idx].size, MGM_SENTINEL_PT_SIZE);
-}
-
-static void pt_size_init(struct mgm_groups* data, int pt_idx, size_t size)
-{
- /* The resize callback may have already been executed, which would have set
- * the correct size. Only update the size if this has not happened.
- * We can tell that no resize took place if the size is still a sentinel.
- */
- atomic64_cmpxchg(&data->pt_stats[pt_idx].size, MGM_SENTINEL_PT_SIZE, size);
-}
-
-static void validate_ptid(struct mgm_groups* data, enum pixel_mgm_group_id group_id, int ptid)
-{
- if (ptid == -EINVAL)
- dev_err(data->dev, "Failed to get partition for group: %d\n", group_id);
- else
- dev_info(data->dev, "pt_client_mutate returned ptid=%d for group=%d", ptid, group_id);
-}
-
-static void update_group(struct mgm_groups* data,
- enum pixel_mgm_group_id group_id,
- int ptid,
- int relative_pt_idx)
-{
- int const abs_pt_idx = group_pt_id(data, group_id, relative_pt_idx);
- int const pbha = pt_pbha(data->dev->of_node, abs_pt_idx);
-
- if (pbha == PT_PBHA_INVALID)
- dev_err(data->dev, "Failed to get PBHA for group: %d\n", group_id);
- else
- dev_info(data->dev, "pt_pbha returned PBHA=%d for group=%d", pbha, group_id);
-
- data->groups[group_id].ptid = ptid;
- data->groups[group_id].pbha = pbha;
- data->groups[group_id].state = MGM_GROUP_STATE_ENABLED;
- data->groups[group_id].active_pt_idx = relative_pt_idx;
-}
-
-static void disable_partition(struct mgm_groups* data, enum pixel_mgm_group_id group_id)
-{
- int const active_idx = group_active_pt_id(data, group_id);
-
- /* Skip if not already enabled */
- if (data->groups[group_id].state != MGM_GROUP_STATE_ENABLED)
- return;
-
- pt_client_disable_no_free(data->pt_handle, active_idx);
- data->groups[group_id].state = MGM_GROUP_STATE_DISABLED_NOT_FREED;
-
- pt_size_invalidate(data, active_idx);
- pt_size_init(data, active_idx, 0);
-}
-
-static void enable_partition(struct mgm_groups* data, enum pixel_mgm_group_id group_id)
-{
- int ptid;
- size_t size = 0;
- int const active_idx = group_active_pt_id(data, group_id);
-
- /* Skip if already enabled */
- if (data->groups[group_id].state == MGM_GROUP_STATE_ENABLED)
- return;
-
- pt_size_invalidate(data, active_idx);
-
- ptid = pt_client_enable_size(data->pt_handle, active_idx, &size);
-
- validate_ptid(data, group_id, ptid);
-
- update_group(data, group_id, ptid, data->groups[group_id].active_pt_idx);
-
- pt_size_init(data, active_idx, size);
-}
-
-static void set_group_partition(struct mgm_groups* data,
- enum pixel_mgm_group_id group_id,
- int new_pt_index)
-{
- int ptid;
- size_t size = 0;
- int const active_idx = group_active_pt_id(data, group_id);
- int const new_idx = group_pt_id(data, group_id, new_pt_index);
-
- /* Early out if no changes are needed */
- if (new_idx == active_idx)
- return;
-
- pt_size_invalidate(data, new_idx);
-
- ptid = pt_client_mutate_size(data->pt_handle, active_idx, new_idx, &size);
-
- validate_ptid(data, group_id, ptid);
-
- update_group(data, group_id, ptid, new_pt_index);
-
- pt_size_init(data, new_idx, size);
- /* Reset old partition size */
- atomic64_set(&data->pt_stats[active_idx].size, data->pt_stats[active_idx].capacity);
-}
-
-u64 pixel_mgm_query_group_size(struct memory_group_manager_device* mgm_dev,
- enum pixel_mgm_group_id group_id)
-{
- struct mgm_groups *data;
- struct mgm_group *group;
- u64 size = 0;
-
- /* Early out if the group doesn't exist */
- if (INVALID_GROUP_ID(group_id))
- goto done;
-
- data = mgm_dev->data;
- group = &data->groups[group_id];
-
- /* Early out if the group has no partitions */
- if (group->pt_num == 0)
- goto done;
-
- size = atomic64_read(&data->pt_stats[group_active_pt_id(data, group_id)].size);
-
-done:
- return size;
-}
-EXPORT_SYMBOL(pixel_mgm_query_group_size);
-
-void pixel_mgm_resize_group_to_fit(struct memory_group_manager_device* mgm_dev,
- enum pixel_mgm_group_id group_id,
- u64 demand)
-{
- struct mgm_groups *data;
- struct mgm_group *group;
- s64 diff, cur_size, min_diff = S64_MAX;
- int pt_idx;
-
- /* Early out if the group doesn't exist */
- if (INVALID_GROUP_ID(group_id))
- goto done;
-
- data = mgm_dev->data;
- group = &data->groups[group_id];
-
- /* Early out if the group has no partitions */
- if (group->pt_num == 0)
- goto done;
-
- /* We can disable the partition if there's no demand */
- if (demand == 0)
- {
- disable_partition(data, group_id);
- goto done;
- }
-
- /* Calculate best partition to use, by finding the nearest capacity */
- for (pt_idx = 0; pt_idx < group->pt_num; ++pt_idx)
- {
- cur_size = data->pt_stats[group_pt_id(data, group_id, pt_idx)].capacity;
- diff = abs(demand - cur_size);
-
- if (diff > min_diff)
- break;
-
- min_diff = diff;
- }
-
- /* Ensure the partition is enabled before trying to mutate it */
- enable_partition(data, group_id);
- set_group_partition(data, group_id, pt_idx - 1);
-
-done:
- dev_dbg(data->dev, "%s: resized memory_group_%d for demand: %lldB", __func__, group_id, demand);
-
- return;
-}
-EXPORT_SYMBOL(pixel_mgm_resize_group_to_fit);
-
static struct page *mgm_alloc_page(
struct memory_group_manager_device *mgm_dev, unsigned int group_id,
gfp_t gfp_mask, unsigned int order)
@@ -655,35 +422,12 @@ static struct page *mgm_alloc_page(
if (INVALID_GROUP_ID(group_id))
return NULL;
- if (WARN_ON_ONCE((group_id != MGM_RESERVED_GROUP_ID) &&
- (group_active_pt_id(data, group_id) >= data->npartitions)))
- return NULL;
-
/* We don't expect to be allocting pages into the group used for
* external or imported memory
*/
if (WARN_ON(group_id == MGM_IMPORTED_MEMORY_GROUP_ID))
return NULL;
- /* If we are allocating a page in this group for the first time then
- * ensure that we have enabled the relevant partitions for it.
- */
- if (group_id != MGM_RESERVED_GROUP_ID) {
- switch (data->groups[group_id].state) {
- case MGM_GROUP_STATE_NEW:
- enable_partition(data, group_id);
- break;
- case MGM_GROUP_STATE_ENABLED:
- case MGM_GROUP_STATE_DISABLED_NOT_FREED:
- case MGM_GROUP_STATE_DISABLED:
- /* Everything should already be set up*/
- break;
- default:
- dev_err(data->dev, "Group %u in invalid state %d\n",
- group_id, data->groups[group_id].state);
- }
- }
-
p = alloc_pages(gfp_mask, order);
if (p) {
@@ -742,7 +486,7 @@ static u64 mgm_update_gpu_pte(
int const mmu_level, u64 pte)
{
struct mgm_groups *const data = mgm_dev->data;
- unsigned int pbha;
+ u64 const old_pte = pte;
dev_dbg(data->dev,
"%s(mgm_dev=%p, group_id=%u, mmu_level=%d, pte=0x%llx)\n",
@@ -751,40 +495,22 @@ static u64 mgm_update_gpu_pte(
if (INVALID_GROUP_ID(group_id))
return pte;
- /* Clear any bits set in the PBHA range */
- if (pte & ((u64)PBHA_BIT_MASK << PBHA_BIT_POS)) {
- dev_warn(data->dev,
- "%s: updating pte with bits already set in PBHA range",
- __func__);
- pte &= ~((u64)PBHA_BIT_MASK << PBHA_BIT_POS);
- }
-
switch (group_id) {
case MGM_RESERVED_GROUP_ID:
case MGM_IMPORTED_MEMORY_GROUP_ID:
/* The reserved group doesn't set PBHA bits */
- /* TODO: Determine what to do with imported memory */
+ pte = slc_wipe_pbha(pte);
break;
+ case MGM_SLC_GROUP_ID:
+ /* Map requests for SLC memory groups to SLC */
+ pte = slc_set_pbha(&data->slc_data, pte);
default:
- /* All other groups will have PBHA bits */
- if (data->groups[group_id].state > MGM_GROUP_STATE_NEW) {
- u64 old_pte = pte;
- pbha = data->groups[group_id].pbha;
-
- pte |= ((u64)pbha & PBHA_BIT_MASK) << PBHA_BIT_POS;
-
- dev_dbg(data->dev,
- "%s: group_id=%u pbha=%d "
- "pte=0x%llx -> 0x%llx\n",
- __func__, group_id, pbha, old_pte, pte);
-
- } else {
- dev_err(data->dev,
- "Tried to get PBHA of uninitialized group=%d",
- group_id);
- }
+ break;
}
+ dev_dbg(data->dev, "%s: group_id=%u pte=0x%llx -> 0x%llx\n",
+ __func__, group_id, old_pte, pte);
+
#ifdef CONFIG_MALI_MEMORY_GROUP_MANAGER_DEBUG_FS
atomic_inc(&data->groups[group_id].update_gpu_pte);
#endif
@@ -795,27 +521,10 @@ static u64 mgm_update_gpu_pte(
static u64 mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev, unsigned int group_id,
int mmu_level, u64 pte)
{
- struct mgm_groups *const data = mgm_dev->data;
- u64 old_pte;
-
if (INVALID_GROUP_ID(group_id))
return pte;
- switch (group_id) {
- case MGM_RESERVED_GROUP_ID:
- case MGM_IMPORTED_MEMORY_GROUP_ID:
- /* The reserved group doesn't set PBHA bits */
- /* TODO: Determine what to do with imported memory */
- break;
- default:
- /* All other groups will have PBHA bits, so clear them */
- old_pte = pte;
- pte &= ~((u64)PBHA_BIT_MASK << PBHA_BIT_POS);
- dev_dbg(data->dev, "%s: group_id=%u pte=0x%llx -> 0x%llx\n", __func__, group_id,
- old_pte, pte);
- }
-
- return pte;
+ return slc_wipe_pbha(pte);
}
static vm_fault_t mgm_vmf_insert_pfn_prot(
@@ -847,49 +556,36 @@ static vm_fault_t mgm_vmf_insert_pfn_prot(
return fault;
}
-static void mgm_resize_callback(void *data, int id, size_t size_allocated)
+void pixel_mgm_slc_update_signal(struct memory_group_manager_device* mgm_dev, u64 signal)
{
- struct mgm_groups *const mgm_data = (struct mgm_groups *)data;
- dev_dbg(mgm_data->dev, "Resize callback called, size_allocated: %zu\n", size_allocated);
- /* Update the partition size for the group */
- atomic64_set(&mgm_data->pt_stats[id].size, size_allocated);
+ struct mgm_groups *const data = mgm_dev->data;
+
+ slc_update_signal(&data->slc_data, signal);
}
+EXPORT_SYMBOL_GPL(pixel_mgm_slc_update_signal);
-static int mgm_initialize_data(struct mgm_groups *mgm_data)
+void pixel_mgm_slc_inc_refcount(struct memory_group_manager_device* mgm_dev)
{
- int i, ret;
+ struct mgm_groups *const data = mgm_dev->data;
- /* +1 to include the required default group */
- const int ngroups = of_property_count_strings(mgm_data->dev->of_node, "groups") + 1;
- if (WARN_ON(ngroups < 0) ||
- WARN_ON(ngroups > MEMORY_GROUP_MANAGER_NR_GROUPS)) {
- mgm_data->ngroups = 0;
- } else {
- mgm_data->ngroups = ngroups;
- }
- mgm_data->npartitions = of_property_count_strings(mgm_data->dev->of_node, "pt_id");
+ slc_inc_refcount(&data->slc_data);
+}
+EXPORT_SYMBOL_GPL(pixel_mgm_slc_inc_refcount);
- mgm_data->pt_stats = kzalloc(mgm_data->npartitions * sizeof(struct partition_stats), GFP_KERNEL);
- if (mgm_data->pt_stats == NULL) {
- dev_err(mgm_data->dev, "failed to allocate space for pt_stats");
- ret = -ENOMEM;
- goto out_err;
- }
+void pixel_mgm_slc_dec_refcount(struct memory_group_manager_device* mgm_dev)
+{
+ struct mgm_groups *const data = mgm_dev->data;
- for (i = 0; i < mgm_data->npartitions; i++) {
- struct partition_stats* stats;
- u32 capacity_kb;
- ret = of_property_read_u32_index(mgm_data->dev->of_node, "pt_size", i, &capacity_kb);
- if (ret) {
- dev_err(mgm_data->dev, "failed to read pt_size[%d]", i);
- continue;
- }
+ slc_dec_refcount(&data->slc_data);
+}
+EXPORT_SYMBOL_GPL(pixel_mgm_slc_dec_refcount);
- stats = &mgm_data->pt_stats[i];
- // Convert from KB to bytes
- stats->capacity = (u64)capacity_kb << 10;
- atomic64_set(&stats->size, stats->capacity);
- }
+static int mgm_initialize_data(struct mgm_groups *mgm_data)
+{
+ int i, ret;
+
+ if ((ret = slc_init_data(&mgm_data->slc_data, mgm_data->dev)))
+ goto out_err;
for (i = 0; i < MEMORY_GROUP_MANAGER_NR_GROUPS; i++) {
atomic_set(&mgm_data->groups[i].size, 0);
@@ -898,50 +594,8 @@ static int mgm_initialize_data(struct mgm_groups *mgm_data)
atomic_set(&mgm_data->groups[i].insert_pfn, 0);
atomic_set(&mgm_data->groups[i].update_gpu_pte, 0);
#endif
-
- mgm_data->groups[i].pbha = MGM_PBHA_DEFAULT;
- mgm_data->groups[i].base_pt = 0;
- mgm_data->groups[i].pt_num = 0;
- mgm_data->groups[i].active_pt_idx = 0;
- mgm_data->groups[i].state = MGM_GROUP_STATE_NEW;
- }
-
- /* Discover the partitions belonging to each memory group, skipping the reserved group */
- for (i = 1; i < mgm_data->ngroups; i++) {
- /* Device tree has no description for the reserved group */
- int const dt_idx = i - 1;
-
- int err = of_property_read_u32_index(
- mgm_data->dev->of_node, "group_base_pt", dt_idx, &mgm_data->groups[i].base_pt);
- if (err) {
- dev_warn(mgm_data->dev, "failed to read base pt index for group %d", i);
- continue;
- }
-
- err = of_property_read_u32_index(
- mgm_data->dev->of_node, "group_pt_num", dt_idx, &mgm_data->groups[i].pt_num);
- if (err)
- dev_warn(mgm_data->dev, "failed to read pt number for group %d", i);
}
- /*
- * Initialize SLC partitions. We don't enable partitions until
- * we actually allocate memory to the corresponding memory
- * group
- */
- mgm_data->pt_handle =
- pt_client_register(mgm_data->dev->of_node, (void*)mgm_data, &mgm_resize_callback);
-
- if (IS_ERR(mgm_data->pt_handle)) {
- ret = PTR_ERR(mgm_data->pt_handle);
- dev_err(mgm_data->dev, "pt_client_register returned %d\n", ret);
- goto out_err;
- }
-
- /* We don't use PBHA bits for the reserved memory group, and so
- * it is effectively already initialized.
- */
- mgm_data->groups[MGM_RESERVED_GROUP_ID].state = MGM_GROUP_STATE_ENABLED;
if ((ret = mgm_debugfs_init(mgm_data)))
goto out_err;
@@ -949,20 +603,9 @@ static int mgm_initialize_data(struct mgm_groups *mgm_data)
if ((ret = mgm_sysfs_init(mgm_data)))
goto out_err;
-#ifdef CONFIG_MALI_PIXEL_GPU_SLC
- /* We enable the SLC partition by default to support dynamic SLC caching.
- * Enabling will initialize the partition, by querying the pbha and assigning a ptid.
- * We then immediately disable the partition, effectively resizing the group to zero,
- * whilst still retaining other properties such as pbha.
- */
- enable_partition(mgm_data, MGM_SLC_GROUP_ID);
- disable_partition(mgm_data, MGM_SLC_GROUP_ID);
-#endif
-
return ret;
out_err:
- kfree(mgm_data->pt_stats);
return ret;
}
@@ -983,29 +626,9 @@ static void mgm_term_data(struct mgm_groups *data)
dev_warn(data->dev,
"%zu 9 order pages in group(%d) leaked\n",
(size_t)atomic_read(&group->lp_size), i);
-
- /* Disable partition indices and free the partition */
- switch (group->state) {
-
- case MGM_GROUP_STATE_NEW:
- case MGM_GROUP_STATE_DISABLED:
- /* Nothing to do */
- break;
-
- case MGM_GROUP_STATE_ENABLED:
- pt_client_disable(data->pt_handle, group_active_pt_id(data, i));
- break;
- case MGM_GROUP_STATE_DISABLED_NOT_FREED:
- pt_client_free(data->pt_handle, group_active_pt_id(data, i));
- break;
-
- default:
- dev_err(data->dev, "Group %d in invalid state %d\n",
- i, group->state);
- }
}
- pt_client_unregister(data->pt_handle);
+ slc_term_data(&data->slc_data);
mgm_debugfs_term(data);
mgm_sysfs_term(data);
diff --git a/mali_pixel/pixel_slc.c b/mali_pixel/pixel_slc.c
new file mode 100644
index 0000000..45506ab
--- /dev/null
+++ b/mali_pixel/pixel_slc.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2024 Google LLC.
+ *
+ * Author: Jack Diver <diverj@google.com>
+ */
+
+#include <linux/atomic.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/dev_printk.h>
+/* Pixel integration includes */
+#include <soc/google/acpm_ipc_ctrl.h>
+#include "pixel_slc.h"
+
+
+/**
+ * DOC: PBHA
+ *
+ * Borr does not have "real" PBHA support. However, since we only use a 36-bit PA on the bus,
+ * AxADDR[39:36] is wired up to the GPU AxUSER[PBHA] field seen by the rest of the system.
+ * Those AxADDR bits come from [39:36] in the page descriptor.
+ *
+ * Odin and Turse have "real" PBHA support using a dedicated output signal and page descriptor field.
+ * The AxUSER[PBHA] field is driven by the GPU's PBHA signal, and AxADDR[39:36] is dropped.
+ * The page descriptor PBHA field is [62:59].
+ *
+ * We could write to both of these locations, as each SoC only reads from its respective PBHA
+ * location with the other being ignored or dropped.
+ *
+ * b/148988078 contains confirmation of the above description.
+ */
+#if IS_ENABLED(CONFIG_SOC_GS101)
+#define PBHA_BIT_POS (36)
+#else
+#define PBHA_BIT_POS (59)
+#endif
+#define PBHA_BIT_MASK (0xf)
+
+#define PARTITION_DISABLE_HYSTERESIS (msecs_to_jiffies(100))
+#define PARTITION_ENABLE_THRESHOLD (7)
+
+
+/**
+ * partition_required() - Determine whether we require a partition to be enabled
+ *
+ * @pt: The partition to check.
+ *
+ * Check whether a partition meets the requirements for being enabled.
+ *
+ * Return: True, if the partition is required to be enabled, otherwise false.
+ */
+static bool partition_required(struct slc_partition *pt)
+{
+ lockdep_assert_held(&pt->lock);
+
+ return (atomic_read(&pt->refcount) && (pt->signal >= PARTITION_ENABLE_THRESHOLD)) ||
+ pt->pinned;
+}
+
+/**
+ * pixel_atomic_dec_and_lock_irqsave - lock on reaching reference count zero
+ *
+ * @val: The atomic counter
+ * @lock: The spinlock in question
+ * @flags: Storage for the current interrupt enable state
+ *
+ * Decrements @val by 1, if the result is 0, locks @lock.
+ *
+ * Return: True if the lock was taken, false for all other cases.
+ */
+static int pixel_atomic_dec_and_lock_irqsave(atomic_t* val, spinlock_t* lock, unsigned long* flags)
+{
+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+ if (atomic_add_unless(val, -1, 1))
+ return 0;
+
+ /* Otherwise do it the slow way */
+ spin_lock_irqsave(lock, *flags);
+ if (atomic_dec_and_test(val))
+ return 1;
+ spin_unlock_irqrestore(lock, *flags);
+
+ return 0;
+}
+
+/**
+ * slc_wipe_pbha - Clear any set PBHA bits from the pte.
+ *
+ * @pte: The pte to strip of PBHA.
+ *
+ * Return: The PTE with all PBHA stripped.
+ */
+u64 slc_wipe_pbha(u64 pte)
+{
+ return pte & ~((u64)PBHA_BIT_MASK << PBHA_BIT_POS);
+}
+
+/**
+ * slc_set_pbha - Apply the PBHA to @pte.
+ *
+ * @data: The &struct slc_data tracking partition information.
+ * @pte: The pte to modify.
+ *
+ * Return: On success, returns a modified PTE. On failure the original PTE is returned.
+ */
+u64 slc_set_pbha(struct slc_data const *data, u64 pte)
+{
+ /* Clear any bits set in the PBHA range */
+ pte = slc_wipe_pbha(pte);
+
+ /* Apply the PBHA for the given virtual partition */
+ return pte | (((u64)data->partition.pbha) & PBHA_BIT_MASK) << PBHA_BIT_POS;
+}
+
+/**
+ * enable_partition - Enable @pt
+ *
+ * @data: The &struct slc_data tracking partition information.
+ * @pt: The &struct slc_partition representing the partition to enable.
+ */
+static void enable_partition(struct slc_data *data, struct slc_partition *pt)
+{
+ /* Skip if already enabled */
+ if (pt->enabled)
+ return;
+
+ (void)pt_client_enable(data->pt_handle, pt->index);
+ pt->enabled = true;
+
+ dev_dbg(data->dev, "enabled partition %d", pt->index);
+}
+
+/**
+ * disable_partition - Disable @pt
+ *
+ * @data: The &struct slc_data tracking partition information.
+ * @pt: The &struct slc_partition representing the partition to disable.
+ */
+static void disable_partition(struct slc_data *data, struct slc_partition *pt)
+{
+ /* Skip if not enabled */
+ if (!pt->enabled)
+ return;
+
+ pt_client_disable_no_free(data->pt_handle, pt->index);
+ pt->enabled = false;
+
+ dev_dbg(data->dev, "disabled partition %d", pt->index);
+}
+
+/**
+ * queue_disable_worker - Queue a delayed partition disable op
+ *
+ * @data: The &struct slc_data tracking partition information.
+ */
+static void queue_disable_worker(struct slc_data *data)
+{
+ queue_delayed_work(system_highpri_wq, &data->disable_work, PARTITION_DISABLE_HYSTERESIS);
+}
+
+/**
+ * partition_disable_worker - Callback to lazily disable a partition
+ *
+ * @work: The &struct work_struct dequeued
+ */
+static void partition_disable_worker(struct work_struct *work)
+{
+ struct slc_data* data = container_of(work, struct slc_data, disable_work.work);
+ struct slc_partition *pt = &data->partition;
+ unsigned long flags;
+
+ /* Complete any pending disable ops */
+ spin_lock_irqsave(&pt->lock, flags);
+
+ if (!partition_required(pt))
+ disable_partition(data, pt);
+
+ spin_unlock_irqrestore(&pt->lock, flags);
+}
+
+/**
+ * slc_inc_refcount - Increase the partition reference count.
+ *
+ * @data: The &struct slc_data tracking partition information.
+ *
+ * If this is the first reference being taken, the partition will be enabled.
+ */
+void slc_inc_refcount(struct slc_data *data)
+{
+ struct slc_partition *pt = &data->partition;
+
+ /* Try to re-enable the partition if this is the first reference */
+ if (atomic_inc_return(&pt->refcount) == 1) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&pt->lock, flags);
+
+ /* Enable the partition immediately if it's required */
+ if (partition_required(pt))
+ enable_partition(data, pt);
+
+ spin_unlock_irqrestore(&pt->lock, flags);
+ }
+}
+
+/**
+ * slc_dec_refcount - Decrease the partition reference count.
+ *
+ * @data: The &struct slc_data tracking partition information.
+ *
+ * If this is the last reference being released, the partition will be disabled.
+ */
+void slc_dec_refcount(struct slc_data *data)
+{
+ struct slc_partition *pt = &data->partition;
+ unsigned long flags;
+
+ /* Disable the partition if this was the last reference */
+ if (pixel_atomic_dec_and_lock_irqsave(&pt->refcount, &pt->lock, &flags)) {
+
+ /* Lazily disable the partition if it's no longer required */
+ if (!partition_required(pt))
+ queue_disable_worker(data);
+
+ spin_unlock_irqrestore(&pt->lock, flags);
+ }
+}
+
+void slc_update_signal(struct slc_data *data, u64 signal)
+{
+ struct slc_partition *pt = &data->partition;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pt->lock, flags);
+
+ /* Use ACPM signal when available */
+ if (data->signal)
+ pt->signal = ioread64((u64 __iomem*)data->signal);
+ else
+ pt->signal = signal;
+
+ if (partition_required(pt))
+ /* Enable the partition immediately if it's required */
+ enable_partition(data, pt);
+ else
+ /* Lazily disable the partition if it's no longer required */
+ queue_disable_worker(data);
+
+ spin_unlock_irqrestore(&pt->lock, flags);
+}
+
+void slc_pin(struct slc_data *data, bool pin)
+{
+ struct slc_partition *pt = &data->partition;
+ unsigned long flags;
+
+ spin_lock_irqsave(&pt->lock, flags);
+
+ pt->pinned = pin;
+ if (pin)
+ enable_partition(data, pt);
+ else if (!partition_required(pt))
+ queue_disable_worker(data);
+
+ spin_unlock_irqrestore(&pt->lock, flags);
+}
+
+/**
+ * init_partition - Register and initialize a partition with the SLC driver.
+ *
+ * @data: The &struct slc_data tracking partition information.
+ * @pt: The &struct slc_partition to store the configured partition information.
+ * @index: The index of the partition, relative to the DT node.
+ *
+ * Returns EINVAL on error, otherwise 0.
+ */
+static int init_partition(struct slc_data *data, struct slc_partition *pt, u32 index)
+{
+ ptid_t ptid;
+ ptpbha_t pbha;
+ int err = -EINVAL;
+
+ ptid = pt_client_enable(data->pt_handle, index);
+ if (ptid == PT_PTID_INVALID) {
+ dev_err(data->dev, "failed to enable pt: %d\n", index);
+ goto err_exit;
+ }
+
+ pbha = pt_pbha(data->dev->of_node, index);
+ if (pbha == PT_PBHA_INVALID) {
+ dev_err(data->dev, "failed to get PBHA for pt: %d\n", index);
+ goto err_exit;
+ }
+
+ /* This retains the allocated ptid */
+ pt_client_disable_no_free(data->pt_handle, index);
+
+ /* Success */
+ err = 0;
+
+ *pt = (struct slc_partition) {
+ .index = index,
+ .ptid = ptid,
+ .pbha = pbha,
+ .enabled = false,
+ .refcount = ATOMIC_INIT(0),
+ .signal = 0,
+ .pinned = false,
+ };
+ spin_lock_init(&pt->lock);
+
+err_exit:
+ return err;
+}
+
+
+/**
+ * term_partition - Disable and free a partition, unregistering it.
+ *
+ * @data: The &struct slc_data tracking partition information.
+ * @pt: The &struct slc_partition to terminate.
+ *
+ * Returns EINVAL on error, otherwise 0.
+ */
+static void term_partition(struct slc_data *data, struct slc_partition *pt)
+{
+ disable_partition(data, pt);
+ pt_client_free(data->pt_handle, pt->index);
+}
+
+/**
+ * slc_init_data - Read all SLC partition information, init the partitions, and track within @data.
+ *
+ * @data: The &struct slc_data tracking partition information.
+ * @dev: The platform device associated with the parent node.
+ *
+ * Return: On success, returns 0. On failure an error code is returned.
+ */
+int slc_init_data(struct slc_data *data, struct device* dev)
+{
+ int ret = -EINVAL;
+
+ if (data == NULL || dev == NULL)
+ goto err_exit;
+
+ /* Inherit the platform device */
+ data->dev = dev;
+
+ INIT_DELAYED_WORK(&data->disable_work, partition_disable_worker);
+
+ /* Register our node with the SLC driver.
+ * This detects our partitions defined within the DT.
+ */
+ data->pt_handle = pt_client_register(data->dev->of_node, NULL, NULL);
+ if (IS_ERR(data->pt_handle)) {
+ ret = PTR_ERR(data->pt_handle);
+ dev_err(data->dev, "pt_client_register failed with: %d\n", ret);
+ goto err_exit;
+ }
+
+ if (IS_ENABLED(PIXEL_GPU_SLC_ACPM_SIGNAL)) {
+ u32 size;
+
+ /* Obtain a handle to the ACPM provided GPU partition signal */
+ if ((ret = acpm_ipc_get_buffer("GPU_SIGNAL", &data->signal, &size))) {
+ dev_err(data->dev, "failed to retrieve SLC GPU signal: %d", ret);
+ goto err_exit;
+ }
+
+ /* Validate the signal buffer size */
+ if (size != sizeof(u64)) {
+ dev_err(data->dev, "SLC GPU signal size incorrect: %d", size);
+ goto err_exit;
+ }
+ }
+
+ if ((ret = init_partition(data, &data->partition, 0)))
+ goto pt_init_err_exit;
+
+ return 0;
+
+pt_init_err_exit:
+ pt_client_unregister(data->pt_handle);
+
+err_exit:
+ return ret;
+}
+
+/**
+ * slc_term_data - Tear down SLC partitions and free tracking data.
+ *
+ * @data: The &struct slc_data tracking partition information.
+ */
+void slc_term_data(struct slc_data *data)
+{
+ /* Ensure all pending disable ops are complete */
+ cancel_delayed_work_sync(&data->disable_work);
+
+ term_partition(data, &data->partition);
+
+ pt_client_unregister(data->pt_handle);
+}
diff --git a/mali_pixel/pixel_slc.h b/mali_pixel/pixel_slc.h
new file mode 100644
index 0000000..cb8e90d
--- /dev/null
+++ b/mali_pixel/pixel_slc.h
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2024 Google LLC.
+ *
+ * Author: Jack Diver <diverj@google.com>
+ */
+#ifndef _PIXEL_SLC_H_
+#define _PIXEL_SLC_H_
+
+#include <soc/google/pt.h>
+
+/**
+ * DOC: SLC partition management
+ *
+ * Key definitions:
+ * + Partition index - The unique index of a partition, relative to the dt node that owns it.
+ * This index is used when communicating with the underlying SLC driver.
+ * + ptid - This is the HW level ID associated with an enabled partition. These id's are allocated
+ * at partition enable time. The GPU driver will never directly use the ptid, but will
+ * track it.
+ * External analysis of the caching behavior (e.g. hit and eviction counters), are
+ * associated with a ptid, not a physical partition index.
+ * This driver attempts to hold on to any allocated ptids until driver termination to make
+ * profiling of caching performance easier.
+ * + PBHA - Acronym: Page Based Hardware Attributes. Every physical partition has a PBHA value
+ * associated with it. We insert these attributes into PTEs so that transactions with a
+ * page carry the PBHA within their high bits.
+ * Transactions with PBHA bits set are intercepted by the SLC, where the corresponding
+ * partition and it's caching behavior (Read/write alloc etc.) are looked up and applied to
+ * the transaction.
+ */
+
+/**
+ * struct slc_partition - Structure for tracking partition state.
+ */
+struct slc_partition {
+ /** @index: The active partition ID for this virtual partition */
+ u32 index;
+
+ /** @ptid: The active partition ID for this virtual partition */
+ ptid_t ptid;
+
+ /** @pbha: The page based HW attributes for this partition */
+ ptpbha_t pbha;
+
+ /** @enabled: Is the partition currently enabled */
+ bool enabled;
+
+ /** @refcount: Reference count for this partition */
+ atomic_t refcount;
+
+ /** @lock: Lock protecting enable/disable ops on this partition */
+ spinlock_t lock;
+
+ /** @signal: Partition enable/disable signal from SLC governor */
+ u64 signal;
+
+ /** @pinned: Is the partition pinned to the enabled state */
+ bool pinned;
+};
+
+/**
+ * struct slc_data - Structure for tracking SLC context.
+ */
+struct slc_data {
+ /** @pt_handle: Link to ACPM SLC partition data */
+ struct pt_handle *pt_handle;
+
+ /** @partition: Information specific to an individual SLC partition */
+ struct slc_partition partition;
+
+ /** @dev: Inherited pointer to device attached */
+ struct device *dev;
+
+ /** @disable_work: Work item used to queue lazy SLC partition disable ops. */
+ struct delayed_work disable_work;
+
+ /** @signal: Partition enable/disable signal from SLC governor. */
+ char __iomem *signal;
+};
+
+int slc_init_data(struct slc_data *data, struct device* dev);
+
+void slc_term_data(struct slc_data *data);
+
+u64 slc_set_pbha(struct slc_data const *data, u64 pte);
+
+u64 slc_wipe_pbha(u64 pte);
+
+void slc_inc_refcount(struct slc_data *data);
+
+void slc_dec_refcount(struct slc_data *data);
+
+void slc_pin(struct slc_data *data, bool pin);
+
+void slc_update_signal(struct slc_data *data, u64 signal);
+
+#endif /* _PIXEL_SLC_H_ */