summaryrefslogtreecommitdiff
path: root/mali_kbase/mali_kbase_mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'mali_kbase/mali_kbase_mem.c')
-rw-r--r--mali_kbase/mali_kbase_mem.c1803
1 files changed, 1054 insertions, 749 deletions
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 6562f01..5547bef 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -43,6 +43,11 @@
#include <mmu/mali_kbase_mmu.h>
#include <mali_kbase_config_defaults.h>
#include <mali_kbase_trace_gpu_mem.h>
+#include <linux/version_compat_defs.h>
+#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
+#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
+
+#if MALI_JIT_PRESSURE_LIMIT_BASE
/*
* Alignment of objects allocated by the GPU inside a just-in-time memory
@@ -66,6 +71,7 @@
*/
#define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u)
+#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
/* Forward declarations */
static void free_partial_locked(struct kbase_context *kctx,
@@ -89,68 +95,72 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
#error "Unknown CPU VA width for this architecture"
#endif
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (kbase_ctx_compat_mode(kctx))
cpu_va_bits = 32;
-#endif
return cpu_va_bits;
}
-/* This function finds out which RB tree the given pfn from the GPU VA belongs
- * to based on the memory zone the pfn refers to
- */
-static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
- u64 gpu_pfn)
+unsigned long kbase_zone_to_bits(enum kbase_memory_zone zone)
{
- struct rb_root *rbtree = NULL;
+ return ((((unsigned long)zone) & ((1 << KBASE_REG_ZONE_BITS) - 1ul))
+ << KBASE_REG_ZONE_SHIFT);
+}
- struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+enum kbase_memory_zone kbase_bits_to_zone(unsigned long zone_bits)
+{
+ return (enum kbase_memory_zone)(((zone_bits) & KBASE_REG_ZONE_MASK)
+ >> KBASE_REG_ZONE_SHIFT);
+}
+char *kbase_reg_zone_get_name(enum kbase_memory_zone zone)
+{
+ switch (zone) {
+ case SAME_VA_ZONE:
+ return "SAME_VA";
+ case CUSTOM_VA_ZONE:
+ return "CUSTOM_VA";
+ case EXEC_VA_ZONE:
+ return "EXEC_VA";
#if MALI_USE_CSF
- struct kbase_reg_zone *fixed_va_zone =
- kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA);
-
- struct kbase_reg_zone *exec_fixed_va_zone =
- kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA);
-
- if (gpu_pfn >= fixed_va_zone->base_pfn) {
- rbtree = &kctx->reg_rbtree_fixed;
- return rbtree;
- } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) {
- rbtree = &kctx->reg_rbtree_exec_fixed;
- return rbtree;
- }
+ case MCU_SHARED_ZONE:
+ return "MCU_SHARED";
+ case EXEC_FIXED_VA_ZONE:
+ return "EXEC_FIXED_VA";
+ case FIXED_VA_ZONE:
+ return "FIXED_VA";
#endif
- if (gpu_pfn >= exec_va_zone->base_pfn)
- rbtree = &kctx->reg_rbtree_exec;
- else {
- u64 same_va_end;
+ default:
+ return NULL;
+ }
+}
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif /* CONFIG_64BIT */
- same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
-#if IS_ENABLED(CONFIG_64BIT)
- } else {
- struct kbase_reg_zone *same_va_zone =
- kbase_ctx_reg_zone_get(kctx,
- KBASE_REG_ZONE_SAME_VA);
- same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
- }
-#endif /* CONFIG_64BIT */
+/**
+ * kbase_gpu_pfn_to_rbtree - find the rb-tree tracking the region with the indicated GPU
+ * page frame number
+ * @kctx: kbase context
+ * @gpu_pfn: GPU PFN address
+ *
+ * Context: any context.
+ *
+ * Return: reference to the rb-tree root, NULL if not found
+ */
+static struct rb_root *kbase_gpu_pfn_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn)
+{
+ enum kbase_memory_zone zone_idx;
+ struct kbase_reg_zone *zone;
- if (gpu_pfn >= same_va_end)
- rbtree = &kctx->reg_rbtree_custom;
- else
- rbtree = &kctx->reg_rbtree_same;
+ for (zone_idx = 0; zone_idx < CONTEXT_ZONE_MAX; zone_idx++) {
+ zone = &kctx->reg_zone[zone_idx];
+ if ((gpu_pfn >= zone->base_pfn) && (gpu_pfn < kbase_reg_zone_end_pfn(zone)))
+ return &zone->reg_rbtree;
}
- return rbtree;
+ return NULL;
}
/* This function inserts a region into the tree. */
-static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
+void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
{
u64 start_pfn = new_reg->start_pfn;
struct rb_node **link = NULL;
@@ -251,7 +261,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
lockdep_assert_held(&kctx->reg_lock);
- rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+ rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn);
+ if (unlikely(!rbtree))
+ return NULL;
return kbase_find_region_enclosing_address(rbtree, gpu_addr);
}
@@ -289,7 +301,9 @@ struct kbase_va_region *kbase_region_tracker_find_region_base_address(
lockdep_assert_held(&kctx->reg_lock);
- rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
+ rbtree = kbase_gpu_pfn_to_rbtree(kctx, gpu_pfn);
+ if (unlikely(!rbtree))
+ return NULL;
return kbase_find_region_base_address(rbtree, gpu_addr);
}
@@ -376,10 +390,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
struct kbase_va_region *reg)
{
struct rb_node *rbprev;
+ struct kbase_reg_zone *zone = container_of(reg->rbtree, struct kbase_reg_zone, reg_rbtree);
struct kbase_va_region *prev = NULL;
struct rb_node *rbnext;
struct kbase_va_region *next = NULL;
struct rb_root *reg_rbtree = NULL;
+ struct kbase_va_region *orig_reg = reg;
int merged_front = 0;
int merged_back = 0;
@@ -399,8 +415,8 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
*/
u64 prev_end_pfn = prev->start_pfn + prev->nr_pages;
- WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
- (reg->flags & KBASE_REG_ZONE_MASK));
+ WARN_ON((kbase_bits_to_zone(prev->flags)) !=
+ (kbase_bits_to_zone(reg->flags)));
if (!WARN_ON(reg->start_pfn < prev_end_pfn))
prev->nr_pages += reg->start_pfn - prev_end_pfn;
prev->nr_pages += reg->nr_pages;
@@ -421,32 +437,30 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
*/
u64 reg_end_pfn = reg->start_pfn + reg->nr_pages;
- WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
- (reg->flags & KBASE_REG_ZONE_MASK));
+ WARN_ON((kbase_bits_to_zone(next->flags)) !=
+ (kbase_bits_to_zone(reg->flags)));
if (!WARN_ON(next->start_pfn < reg_end_pfn))
next->nr_pages += next->start_pfn - reg_end_pfn;
next->start_pfn = reg->start_pfn;
next->nr_pages += reg->nr_pages;
rb_erase(&(reg->rblink), reg_rbtree);
merged_back = 1;
- if (merged_front) {
- /* We already merged with prev, free it */
- kfree(reg);
- }
}
}
- /* If we failed to merge then we need to add a new block */
- if (!(merged_front || merged_back)) {
+ if (merged_front && merged_back) {
+ /* We already merged with prev, free it */
+ kfree(reg);
+ } else if (!(merged_front || merged_back)) {
+ /* If we failed to merge then we need to add a new block */
+
/*
* We didn't merge anything. Try to add a new free
* placeholder, and in any case, remove the original one.
*/
struct kbase_va_region *free_reg;
- free_reg = kbase_alloc_free_region(reg_rbtree,
- reg->start_pfn, reg->nr_pages,
- reg->flags & KBASE_REG_ZONE_MASK);
+ free_reg = kbase_alloc_free_region(zone, reg->start_pfn, reg->nr_pages);
if (!free_reg) {
/* In case of failure, we cannot allocate a replacement
* free region, so we will be left with a 'gap' in the
@@ -477,6 +491,12 @@ void kbase_remove_va_region(struct kbase_device *kbdev,
rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
}
+ /* This operation is always safe because the function never frees
+ * the region. If the region has been merged to both front and back,
+ * then it's the previous region that is supposed to be freed.
+ */
+ orig_reg->start_pfn = 0;
+
out:
return;
}
@@ -487,6 +507,7 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
* kbase_insert_va_region_nolock - Insert a VA region to the list,
* replacing the existing one.
*
+ * @kbdev: The kbase device
* @new_reg: The new region to insert
* @at_reg: The region to replace
* @start_pfn: The Page Frame Number to insert at
@@ -494,10 +515,14 @@ KBASE_EXPORT_TEST_API(kbase_remove_va_region);
*
* Return: 0 on success, error code otherwise.
*/
-static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
- struct kbase_va_region *at_reg, u64 start_pfn, size_t nr_pages)
+static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
+ struct kbase_va_region *new_reg,
+ struct kbase_va_region *at_reg, u64 start_pfn,
+ size_t nr_pages)
{
struct rb_root *reg_rbtree = NULL;
+ struct kbase_reg_zone *zone =
+ container_of(at_reg->rbtree, struct kbase_reg_zone, reg_rbtree);
int err = 0;
reg_rbtree = at_reg->rbtree;
@@ -539,10 +564,8 @@ static int kbase_insert_va_region_nolock(struct kbase_va_region *new_reg,
else {
struct kbase_va_region *new_front_reg;
- new_front_reg = kbase_alloc_free_region(reg_rbtree,
- at_reg->start_pfn,
- start_pfn - at_reg->start_pfn,
- at_reg->flags & KBASE_REG_ZONE_MASK);
+ new_front_reg = kbase_alloc_free_region(zone, at_reg->start_pfn,
+ start_pfn - at_reg->start_pfn);
if (new_front_reg) {
at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
@@ -595,9 +618,9 @@ int kbase_add_va_region(struct kbase_context *kctx,
#endif
if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
#if MALI_USE_CSF
- ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) &&
+ ((kbase_bits_to_zone(reg->flags)) != EXEC_FIXED_VA_ZONE) &&
#endif
- ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
+ ((kbase_bits_to_zone(reg->flags)) != EXEC_VA_ZONE)) {
if (cpu_va_bits > gpu_pc_bits) {
align = max(align, (size_t)((1ULL << gpu_pc_bits)
>> PAGE_SHIFT));
@@ -615,8 +638,7 @@ int kbase_add_va_region(struct kbase_context *kctx,
* then don't retry, we're out of VA and there is
* nothing which can be done about it.
*/
- if ((reg->flags & KBASE_REG_ZONE_MASK) !=
- KBASE_REG_ZONE_CUSTOM_VA)
+ if ((kbase_bits_to_zone(reg->flags)) != CUSTOM_VA_ZONE)
break;
} while (kbase_jit_evict(kctx));
@@ -679,8 +701,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
goto exit;
}
- err = kbase_insert_va_region_nolock(reg, tmp, gpu_pfn,
- nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
if (err) {
dev_warn(dev, "Failed to insert va region");
err = -ENOMEM;
@@ -705,8 +726,7 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
nr_pages, align_offset, align_mask,
&start_pfn);
if (tmp) {
- err = kbase_insert_va_region_nolock(reg, tmp,
- start_pfn, nr_pages);
+ err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
if (unlikely(err)) {
dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
start_pfn, nr_pages);
@@ -722,85 +742,27 @@ exit:
return err;
}
-/*
- * @brief Initialize the internal region tracker data structure.
+/**
+ * kbase_reg_to_kctx - Obtain the kbase context tracking a VA region.
+ * @reg: VA region
+ *
+ * Return:
+ * * pointer to kbase context of the memory allocation
+ * * NULL if the region does not belong to a kbase context (for instance,
+ * if the allocation corresponds to a shared MCU region on CSF).
*/
-#if MALI_USE_CSF
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
- struct kbase_va_region *same_va_reg,
- struct kbase_va_region *custom_va_reg,
- struct kbase_va_region *exec_va_reg,
- struct kbase_va_region *exec_fixed_va_reg,
- struct kbase_va_region *fixed_va_reg)
-{
- u64 last_zone_end_pfn;
-
- kctx->reg_rbtree_same = RB_ROOT;
- kbase_region_tracker_insert(same_va_reg);
-
- last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages;
-
- /* Although custom_va_reg doesn't always exist, initialize
- * unconditionally because of the mem_view debugfs
- * implementation which relies on it being empty.
- */
- kctx->reg_rbtree_custom = RB_ROOT;
- kctx->reg_rbtree_exec = RB_ROOT;
-
- if (custom_va_reg) {
- WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(custom_va_reg);
- last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages;
- }
-
- /* Initialize exec, fixed and exec_fixed. These are always
- * initialized at this stage, if they will exist at all.
- */
- kctx->reg_rbtree_fixed = RB_ROOT;
- kctx->reg_rbtree_exec_fixed = RB_ROOT;
-
- if (exec_va_reg) {
- WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(exec_va_reg);
- last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages;
- }
-
- if (exec_fixed_va_reg) {
- WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(exec_fixed_va_reg);
- last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages;
- }
-
- if (fixed_va_reg) {
- WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn);
- kbase_region_tracker_insert(fixed_va_reg);
- last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages;
- }
-}
-#else
-static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
- struct kbase_va_region *same_va_reg,
- struct kbase_va_region *custom_va_reg)
+static struct kbase_context *kbase_reg_to_kctx(struct kbase_va_region *reg)
{
- kctx->reg_rbtree_same = RB_ROOT;
- kbase_region_tracker_insert(same_va_reg);
+ struct rb_root *rbtree = reg->rbtree;
+ struct kbase_reg_zone *zone = container_of(rbtree, struct kbase_reg_zone, reg_rbtree);
- /* Although custom_va_reg and exec_va_reg don't always exist,
- * initialize unconditionally because of the mem_view debugfs
- * implementation which relies on them being empty.
- *
- * The difference between the two is that the EXEC_VA region
- * is never initialized at this stage.
- */
- kctx->reg_rbtree_custom = RB_ROOT;
- kctx->reg_rbtree_exec = RB_ROOT;
+ if (!kbase_is_ctx_reg_zone(zone->id))
+ return NULL;
- if (custom_va_reg)
- kbase_region_tracker_insert(custom_va_reg);
+ return container_of(zone - zone->id, struct kbase_context, reg_zone[0]);
}
-#endif /* MALI_USE_CSF */
-static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
+void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
{
struct rb_node *rbnode;
struct kbase_va_region *reg;
@@ -810,7 +772,13 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
if (rbnode) {
rb_erase(rbnode, rbtree);
reg = rb_entry(rbnode, struct kbase_va_region, rblink);
- WARN_ON(reg->va_refcnt != 1);
+ WARN_ON(kbase_refcount_read(&reg->va_refcnt) != 1);
+ if (kbase_is_page_migration_enabled()) {
+ struct kbase_context *kctx = kbase_reg_to_kctx(reg);
+
+ if (kctx)
+ kbase_gpu_munmap(kctx, reg);
+ }
/* Reset the start_pfn - as the rbtree is being
* destroyed and we've already erased this region, there
* is no further need to attempt to remove it.
@@ -825,214 +793,261 @@ static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
} while (rbnode);
}
-void kbase_region_tracker_term(struct kbase_context *kctx)
-{
- kbase_gpu_vm_lock(kctx);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
-#if MALI_USE_CSF
- WARN_ON(!list_empty(&kctx->csf.event_pages_head));
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed);
- kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed);
-
-#endif
- kbase_gpu_vm_unlock(kctx);
-}
-
-void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
-{
- kbase_region_tracker_erase_rbtree(rbtree);
-}
-
static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
{
return min_t(size_t, kbase_get_num_cpu_va_bits(kctx),
kctx->kbdev->gpu_props.mmu.va_bits);
}
-int kbase_region_tracker_init(struct kbase_context *kctx)
+static int kbase_reg_zone_same_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
{
- struct kbase_va_region *same_va_reg;
- struct kbase_va_region *custom_va_reg = NULL;
- size_t same_va_bits = kbase_get_same_va_bits(kctx);
- u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
- u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
- u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
- u64 same_va_pages;
- u64 same_va_base = 1u;
int err;
-#if MALI_USE_CSF
- struct kbase_va_region *exec_va_reg;
- struct kbase_va_region *exec_fixed_va_reg;
- struct kbase_va_region *fixed_va_reg;
-
- u64 exec_va_base;
- u64 fixed_va_end;
- u64 exec_fixed_va_base;
- u64 fixed_va_base;
- u64 fixed_va_pages;
-#endif
-
- /* Take the lock as kbase_free_alloced_region requires it */
- kbase_gpu_vm_lock(kctx);
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
+ const size_t same_va_bits = kbase_get_same_va_bits(kctx);
+ const u64 base_pfn = 1u;
+ u64 nr_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - base_pfn;
- same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
+ lockdep_assert_held(&kctx->reg_lock);
#if MALI_USE_CSF
- if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
+ if ((base_pfn + nr_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
/* Depending on how the kernel is configured, it's possible (eg on aarch64) for
* same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone
* doesn't cross into the exec_va zone.
*/
- same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base;
+ nr_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - base_pfn;
}
#endif
+ err = kbase_reg_zone_init(kctx->kbdev, zone, SAME_VA_ZONE, base_pfn, nr_pages);
+ if (err)
+ return -ENOMEM;
- /* all have SAME_VA */
- same_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base,
- same_va_pages, KBASE_REG_ZONE_SAME_VA);
+ kctx->gpu_va_end = base_pfn + nr_pages;
- if (!same_va_reg) {
- err = -ENOMEM;
- goto fail_unlock;
- }
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
- same_va_pages);
+ return 0;
+}
-#if IS_ENABLED(CONFIG_64BIT)
- /* 32-bit clients have custom VA zones */
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
- if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
- err = -EINVAL;
- goto fail_free_same_va;
- }
- /* If the current size of TMEM is out of range of the
- * virtual address space addressable by the MMU then
- * we should shrink it to fit
- */
- if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
- custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
+static void kbase_reg_zone_same_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
- custom_va_reg = kbase_alloc_free_region(
- &kctx->reg_rbtree_custom,
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
- custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
+ kbase_reg_zone_term(zone);
+}
- if (!custom_va_reg) {
- err = -ENOMEM;
- goto fail_free_same_va;
- }
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
- KBASE_REG_ZONE_CUSTOM_VA_BASE,
- custom_va_size);
-#if IS_ENABLED(CONFIG_64BIT)
- } else {
- custom_va_size = 0;
- }
-#endif
+static int kbase_reg_zone_custom_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
+ u64 nr_pages = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
-#if MALI_USE_CSF
- /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
- exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+ /* If the context does not support CUSTOM_VA zones, then we don't need to
+ * proceed past this point, and can pretend that it was initialized properly.
+ * In practice, this will mean that the zone metadata structure will be zero
+ * initialized and not contain a valid zone ID.
+ */
+ if (!kbase_ctx_compat_mode(kctx))
+ return 0;
+
+ if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE)
+ return -EINVAL;
- /* Similarly the end of the FIXED_VA zone also depends on whether the client
- * is 32 or 64-bits.
+ /* If the current size of TMEM is out of range of the
+ * virtual address space addressable by the MMU then
+ * we should shrink it to fit
*/
- fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+ if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
+ nr_pages = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
- exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
- fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
- }
+ if (kbase_reg_zone_init(kctx->kbdev, zone, CUSTOM_VA_ZONE, KBASE_REG_ZONE_CUSTOM_VA_BASE,
+ nr_pages))
+ return -ENOMEM;
+
+ /* On JM systems, this is the last memory zone that gets initialized,
+ * so the GPU VA ends right after the end of the CUSTOM_VA zone. On CSF,
+ * setting here is harmless, as the FIXED_VA initializer will overwrite
+ * it
+ */
+ kctx->gpu_va_end += nr_pages;
+
+ return 0;
+}
+
+static void kbase_reg_zone_custom_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
+
+ kbase_reg_zone_term(zone);
+}
+
+static inline u64 kbase_get_exec_va_zone_base(struct kbase_context *kctx)
+{
+ u64 base_pfn;
+
+#if MALI_USE_CSF
+ base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_64;
+ if (kbase_ctx_compat_mode(kctx))
+ base_pfn = KBASE_REG_ZONE_EXEC_VA_BASE_32;
+#else
+ /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
+ * initially U64_MAX
+ */
+ base_pfn = U64_MAX;
#endif
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
- KBASE_REG_ZONE_EXEC_VA_SIZE);
+ return base_pfn;
+}
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_base,
- KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
+static inline int kbase_reg_zone_exec_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
+ const u64 base_pfn = kbase_get_exec_va_zone_base(kctx);
+ u64 nr_pages = KBASE_REG_ZONE_EXEC_VA_SIZE;
- if (!exec_va_reg) {
- err = -ENOMEM;
- goto fail_free_custom_va;
- }
+#if !MALI_USE_CSF
+ nr_pages = 0;
+#endif
- exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE;
+ return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_VA_ZONE, base_pfn, nr_pages);
+}
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base,
- KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+static void kbase_reg_zone_exec_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
- exec_fixed_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_exec_fixed, exec_fixed_va_base,
- KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
- KBASE_REG_ZONE_EXEC_FIXED_VA);
+ kbase_reg_zone_term(zone);
+}
- if (!exec_fixed_va_reg) {
- err = -ENOMEM;
- goto fail_free_exec_va;
- }
+#if MALI_USE_CSF
+static inline u64 kbase_get_exec_fixed_va_zone_base(struct kbase_context *kctx)
+{
+ return kbase_get_exec_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_VA_SIZE;
+}
+
+static int kbase_reg_zone_exec_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE);
+ const u64 base_pfn = kbase_get_exec_fixed_va_zone_base(kctx);
+
+ return kbase_reg_zone_init(kctx->kbdev, zone, EXEC_FIXED_VA_ZONE, base_pfn,
+ KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
+}
- fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
- fixed_va_pages = fixed_va_end - fixed_va_base;
+static void kbase_reg_zone_exec_fixed_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, EXEC_FIXED_VA_ZONE);
+
+ WARN_ON(!list_empty(&kctx->csf.event_pages_head));
+ kbase_reg_zone_term(zone);
+}
+
+static int kbase_reg_zone_fixed_va_init(struct kbase_context *kctx, u64 gpu_va_limit)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE);
+ const u64 base_pfn =
+ kbase_get_exec_fixed_va_zone_base(kctx) + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
+ u64 fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
+ u64 nr_pages;
+
+ if (kbase_ctx_compat_mode(kctx))
+ fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
+ nr_pages = fixed_va_end - base_pfn;
- fixed_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_fixed, fixed_va_base,
- fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
+ if (kbase_reg_zone_init(kctx->kbdev, zone, FIXED_VA_ZONE, base_pfn, nr_pages))
+ return -ENOMEM;
kctx->gpu_va_end = fixed_va_end;
- if (!fixed_va_reg) {
- err = -ENOMEM;
- goto fail_free_exec_fixed_va;
- }
+ return 0;
+}
- kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg,
- exec_fixed_va_reg, fixed_va_reg);
+static void kbase_reg_zone_fixed_va_term(struct kbase_context *kctx)
+{
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get(kctx, FIXED_VA_ZONE);
- INIT_LIST_HEAD(&kctx->csf.event_pages_head);
-#else
- /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
- * initially U64_MAX
- */
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u);
- /* Other zones are 0: kbase_create_context() uses vzalloc */
+ kbase_reg_zone_term(zone);
+}
+#endif
+
+typedef int kbase_memory_zone_init(struct kbase_context *kctx, u64 gpu_va_limit);
+typedef void kbase_memory_zone_term(struct kbase_context *kctx);
+
+struct kbase_memory_zone_init_meta {
+ kbase_memory_zone_init *init;
+ kbase_memory_zone_term *term;
+ char *error_msg;
+};
+
+static const struct kbase_memory_zone_init_meta zones_init[] = {
+ [SAME_VA_ZONE] = { kbase_reg_zone_same_va_init, kbase_reg_zone_same_va_term,
+ "Could not initialize SAME_VA zone" },
+ [CUSTOM_VA_ZONE] = { kbase_reg_zone_custom_va_init, kbase_reg_zone_custom_va_term,
+ "Could not initialize CUSTOM_VA zone" },
+ [EXEC_VA_ZONE] = { kbase_reg_zone_exec_va_init, kbase_reg_zone_exec_va_term,
+ "Could not initialize EXEC_VA zone" },
+#if MALI_USE_CSF
+ [EXEC_FIXED_VA_ZONE] = { kbase_reg_zone_exec_fixed_va_init,
+ kbase_reg_zone_exec_fixed_va_term,
+ "Could not initialize EXEC_FIXED_VA zone" },
+ [FIXED_VA_ZONE] = { kbase_reg_zone_fixed_va_init, kbase_reg_zone_fixed_va_term,
+ "Could not initialize FIXED_VA zone" },
+#endif
+};
- kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
- kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
+int kbase_region_tracker_init(struct kbase_context *kctx)
+{
+ const u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
+ const u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
+ int err;
+ unsigned int i;
+
+ /* Take the lock as kbase_free_alloced_region requires it */
+ kbase_gpu_vm_lock(kctx);
+
+ for (i = 0; i < ARRAY_SIZE(zones_init); i++) {
+ err = zones_init[i].init(kctx, gpu_va_limit);
+ if (unlikely(err)) {
+ dev_err(kctx->kbdev->dev, "%s, err = %d\n", zones_init[i].error_msg, err);
+ goto term;
+ }
+ }
+#if MALI_USE_CSF
+ INIT_LIST_HEAD(&kctx->csf.event_pages_head);
#endif
kctx->jit_va = false;
kbase_gpu_vm_unlock(kctx);
- return 0;
-#if MALI_USE_CSF
-fail_free_exec_fixed_va:
- kbase_free_alloced_region(exec_fixed_va_reg);
-fail_free_exec_va:
- kbase_free_alloced_region(exec_va_reg);
-fail_free_custom_va:
- if (custom_va_reg)
- kbase_free_alloced_region(custom_va_reg);
-#endif
+ return 0;
+term:
+ while (i-- > 0)
+ zones_init[i].term(kctx);
-fail_free_same_va:
- kbase_free_alloced_region(same_va_reg);
-fail_unlock:
kbase_gpu_vm_unlock(kctx);
return err;
}
+void kbase_region_tracker_term(struct kbase_context *kctx)
+{
+ unsigned int i;
+
+ WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
+ "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
+ kctx->tgid, kctx->id);
+
+ kbase_gpu_vm_lock(kctx);
+
+ for (i = 0; i < ARRAY_SIZE(zones_init); i++)
+ zones_init[i].term(kctx);
+
+ kbase_gpu_vm_unlock(kctx);
+}
+
static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx)
{
struct kbase_reg_zone *exec_va_zone;
lockdep_assert_held(&kctx->reg_lock);
- exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
+ exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
return (exec_va_zone->base_pfn != U64_MAX);
}
@@ -1072,16 +1087,16 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
lockdep_assert_held(&kctx->reg_lock);
- for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) {
+ for (zone_idx = 0; zone_idx < MEMORY_ZONE_MAX; zone_idx++) {
struct kbase_reg_zone *zone;
struct kbase_va_region *reg;
u64 zone_base_addr;
- unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
- unsigned long reg_zone;
+ enum kbase_memory_zone reg_zone;
- if (!kbase_is_ctx_reg_zone(zone_bits))
+ if (!kbase_is_ctx_reg_zone(zone_idx))
continue;
- zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
+
+ zone = kbase_ctx_reg_zone_get(kctx, zone_idx);
zone_base_addr = zone->base_pfn << PAGE_SHIFT;
reg = kbase_region_tracker_find_region_base_address(
@@ -1089,21 +1104,21 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
if (!zone->va_size_pages) {
WARN(reg,
- "Should not have found a region that starts at 0x%.16llx for zone 0x%lx",
- (unsigned long long)zone_base_addr, zone_bits);
+ "Should not have found a region that starts at 0x%.16llx for zone %s",
+ (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx));
continue;
}
if (WARN(!reg,
- "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it",
- (unsigned long long)zone_base_addr, zone_bits))
+ "There should always be a region that starts at 0x%.16llx for zone %s, couldn't find it",
+ (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx)))
return true; /* Safest return value */
- reg_zone = reg->flags & KBASE_REG_ZONE_MASK;
- if (WARN(reg_zone != zone_bits,
- "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx",
- (unsigned long long)zone_base_addr, zone_bits,
- reg_zone))
+ reg_zone = kbase_bits_to_zone(reg->flags);
+ if (WARN(reg_zone != zone_idx,
+ "The region that starts at 0x%.16llx should be in zone %s but was found in the wrong zone %s",
+ (unsigned long long)zone_base_addr, kbase_reg_zone_get_name(zone_idx),
+ kbase_reg_zone_get_name(reg_zone)))
return true; /* Safest return value */
/* Unless the region is completely free, of the same size as
@@ -1120,15 +1135,12 @@ static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
return false;
}
-#if IS_ENABLED(CONFIG_64BIT)
static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
u64 jit_va_pages)
{
struct kbase_va_region *same_va_reg;
- struct kbase_reg_zone *same_va_zone;
+ struct kbase_reg_zone *same_va_zone, *custom_va_zone;
u64 same_va_zone_base_addr;
- const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA;
- struct kbase_va_region *custom_va_reg;
u64 jit_va_start;
lockdep_assert_held(&kctx->reg_lock);
@@ -1139,14 +1151,14 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
* cause an overlap to happen with existing same VA allocations and the
* custom VA zone.
*/
- same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits);
+ same_va_zone = kbase_ctx_reg_zone_get(kctx, SAME_VA_ZONE);
same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT;
same_va_reg = kbase_region_tracker_find_region_base_address(
kctx, same_va_zone_base_addr);
if (WARN(!same_va_reg,
- "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
- (unsigned long long)same_va_zone_base_addr, same_va_zone_bits))
+ "Already found a free region at the start of every zone, but now cannot find any region for zone SAME_VA base 0x%.16llx",
+ (unsigned long long)same_va_zone_base_addr))
return -ENOMEM;
/* kbase_region_tracker_has_allocs() in the caller has already ensured
@@ -1167,28 +1179,17 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
/*
* Create a custom VA zone at the end of the VA for allocations which
- * JIT can use so it doesn't have to allocate VA from the kernel.
- */
- custom_va_reg =
- kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start,
- jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
-
- /*
- * The context will be destroyed if we fail here so no point
- * reverting the change we made to same_va.
+ * JIT can use so it doesn't have to allocate VA from the kernel. Note
+ * that while the zone has already been zero-initialized during the
+ * region tracker initialization, we can just overwrite it.
*/
- if (!custom_va_reg)
+ custom_va_zone = kbase_ctx_reg_zone_get(kctx, CUSTOM_VA_ZONE);
+ if (kbase_reg_zone_init(kctx->kbdev, custom_va_zone, CUSTOM_VA_ZONE, jit_va_start,
+ jit_va_pages))
return -ENOMEM;
- /* Since this is 64-bit, the custom zone will not have been
- * initialized, so initialize it now
- */
- kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start,
- jit_va_pages);
- kbase_region_tracker_insert(custom_va_reg);
return 0;
}
-#endif
int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
int max_allocations, int trim_level, int group_id,
@@ -1229,10 +1230,8 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (!kbase_ctx_flag(kctx, KCTX_COMPAT))
+ if (!kbase_ctx_compat_mode(kctx))
err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
-#endif
/*
* Nothing to do for 32-bit clients, JIT uses the existing
* custom VA zone.
@@ -1259,12 +1258,11 @@ exit_unlock:
int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
{
#if !MALI_USE_CSF
- struct kbase_va_region *exec_va_reg;
struct kbase_reg_zone *exec_va_zone;
struct kbase_reg_zone *target_zone;
struct kbase_va_region *target_reg;
u64 target_zone_base_addr;
- unsigned long target_zone_bits;
+ enum kbase_memory_zone target_zone_id;
u64 exec_va_start;
int err;
#endif
@@ -1308,25 +1306,23 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
goto exit_unlock;
}
-#if IS_ENABLED(CONFIG_64BIT)
- if (kbase_ctx_flag(kctx, KCTX_COMPAT)) {
-#endif
+ if (kbase_ctx_compat_mode(kctx)) {
/* 32-bit client: take from CUSTOM_VA zone */
- target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
-#if IS_ENABLED(CONFIG_64BIT)
+ target_zone_id = CUSTOM_VA_ZONE;
} else {
/* 64-bit client: take from SAME_VA zone */
- target_zone_bits = KBASE_REG_ZONE_SAME_VA;
+ target_zone_id = SAME_VA_ZONE;
}
-#endif
- target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
+
+ target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_id);
target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
target_reg = kbase_region_tracker_find_region_base_address(
kctx, target_zone_base_addr);
if (WARN(!target_reg,
- "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
- (unsigned long long)target_zone_base_addr, target_zone_bits)) {
+ "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone %s",
+ (unsigned long long)target_zone_base_addr,
+ kbase_reg_zone_get_name(target_zone_id))) {
err = -ENOMEM;
goto exit_unlock;
}
@@ -1345,28 +1341,14 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages
/* Taken from the end of the target zone */
exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
-
- exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec,
- exec_va_start,
- exec_va_pages,
- KBASE_REG_ZONE_EXEC_VA);
- if (!exec_va_reg) {
- err = -ENOMEM;
- goto exit_unlock;
- }
- /* Update EXEC_VA zone
- *
- * not using kbase_ctx_reg_zone_init() - it was already initialized
- */
- exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
- exec_va_zone->base_pfn = exec_va_start;
- exec_va_zone->va_size_pages = exec_va_pages;
+ exec_va_zone = kbase_ctx_reg_zone_get(kctx, EXEC_VA_ZONE);
+ if (kbase_reg_zone_init(kctx->kbdev, exec_va_zone, EXEC_VA_ZONE, exec_va_start,
+ exec_va_pages))
+ return -ENOMEM;
/* Update target zone and corresponding region */
target_reg->nr_pages -= exec_va_pages;
target_zone->va_size_pages -= exec_va_pages;
-
- kbase_region_tracker_insert(exec_va_reg);
err = 0;
exit_unlock:
@@ -1378,36 +1360,40 @@ exit_unlock:
#if MALI_USE_CSF
void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev)
{
- kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree);
+ kbase_reg_zone_term(&kbdev->csf.mcu_shared_zone);
}
int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
{
- struct kbase_va_region *shared_reg;
- u64 shared_reg_start_pfn;
- u64 shared_reg_size;
-
- shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE;
- shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE;
-
- kbdev->csf.shared_reg_rbtree = RB_ROOT;
-
- shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree,
- shared_reg_start_pfn,
- shared_reg_size,
- KBASE_REG_ZONE_MCU_SHARED);
- if (!shared_reg)
- return -ENOMEM;
-
- kbase_region_tracker_insert(shared_reg);
- return 0;
+ return kbase_reg_zone_init(kbdev, &kbdev->csf.mcu_shared_zone, MCU_SHARED_ZONE,
+ KBASE_REG_ZONE_MCU_SHARED_BASE, MCU_SHARED_ZONE_SIZE);
}
#endif
+static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
+{
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
+#if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
+ kbdev->pagesize_2mb = true;
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
+ dev_warn(
+ kbdev->dev,
+ "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
+ }
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+ kbdev->pagesize_2mb = false;
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
+#else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+ /* Set it to the default based on which GPU is present */
+ kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+#endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
+}
+
int kbase_mem_init(struct kbase_device *kbdev)
{
int err = 0;
struct kbasep_mem_device *memdev;
+ char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
#if IS_ENABLED(CONFIG_OF)
struct device_node *mgm_node = NULL;
#endif
@@ -1416,6 +1402,20 @@ int kbase_mem_init(struct kbase_device *kbdev)
memdev = &kbdev->memdev;
+ kbasep_mem_page_size_init(kbdev);
+
+ scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
+ kbdev->devname);
+
+ /* Initialize slab cache for kbase_va_regions */
+ kbdev->va_region_slab =
+ kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
+ if (kbdev->va_region_slab == NULL) {
+ dev_err(kbdev->dev, "Failed to create va_region_slab\n");
+ return -ENOMEM;
+ }
+
+ kbase_mem_migrate_init(kbdev);
kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
KBASE_MEM_POOL_MAX_SIZE_KCTX);
@@ -1479,8 +1479,7 @@ int kbase_mem_init(struct kbase_device *kbdev)
kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults,
KBASE_MEM_POOL_MAX_SIZE_KBDEV);
- err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev,
- &mem_pool_defaults, NULL);
+ err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL);
}
return err;
@@ -1506,6 +1505,11 @@ void kbase_mem_term(struct kbase_device *kbdev)
kbase_mem_pool_group_term(&kbdev->mem_pools);
+ kbase_mem_migrate_term(kbdev);
+
+ kmem_cache_destroy(kbdev->va_region_slab);
+ kbdev->va_region_slab = NULL;
+
WARN_ON(kbdev->total_gpu_pages);
WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
@@ -1519,41 +1523,41 @@ KBASE_EXPORT_TEST_API(kbase_mem_term);
/**
* kbase_alloc_free_region - Allocate a free region object.
*
- * @rbtree: Backlink to the red-black tree of memory regions.
+ * @zone: CUSTOM_VA_ZONE or SAME_VA_ZONE
* @start_pfn: The Page Frame Number in GPU virtual address space.
* @nr_pages: The size of the region in pages.
- * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA
*
* The allocated object is not part of any list yet, and is flagged as
* KBASE_REG_FREE. No mapping is allocated yet.
*
- * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
- *
* Return: pointer to the allocated region object on success, NULL otherwise.
*/
-struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
- u64 start_pfn, size_t nr_pages, int zone)
+struct kbase_va_region *kbase_alloc_free_region(struct kbase_reg_zone *zone, u64 start_pfn,
+ size_t nr_pages)
{
struct kbase_va_region *new_reg;
- KBASE_DEBUG_ASSERT(rbtree != NULL);
-
- /* zone argument should only contain zone related region flags */
- KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
KBASE_DEBUG_ASSERT(nr_pages > 0);
/* 64-bit address range is the max */
KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
- new_reg = kzalloc(sizeof(*new_reg), GFP_KERNEL);
+ if (WARN_ON(!zone))
+ return NULL;
+
+ if (unlikely(!zone->base_pfn || !zone->va_size_pages))
+ return NULL;
+
+ new_reg = kmem_cache_zalloc(zone->cache, GFP_KERNEL);
if (!new_reg)
return NULL;
- new_reg->va_refcnt = 1;
+ kbase_refcount_set(&new_reg->va_refcnt, 1);
+ atomic_set(&new_reg->no_user_free_count, 0);
new_reg->cpu_alloc = NULL; /* no alloc bound yet */
new_reg->gpu_alloc = NULL; /* no alloc bound yet */
- new_reg->rbtree = rbtree;
- new_reg->flags = zone | KBASE_REG_FREE;
+ new_reg->rbtree = &zone->reg_rbtree;
+ new_reg->flags = kbase_zone_to_bits(zone->id) | KBASE_REG_FREE;
new_reg->flags |= KBASE_REG_GROWABLE;
@@ -1565,42 +1569,15 @@ struct kbase_va_region *kbase_alloc_free_region(struct rb_root *rbtree,
return new_reg;
}
-
KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
-static struct kbase_context *kbase_reg_flags_to_kctx(
- struct kbase_va_region *reg)
+struct kbase_va_region *kbase_ctx_alloc_free_region(struct kbase_context *kctx,
+ enum kbase_memory_zone id, u64 start_pfn,
+ size_t nr_pages)
{
- struct kbase_context *kctx = NULL;
- struct rb_root *rbtree = reg->rbtree;
+ struct kbase_reg_zone *zone = kbase_ctx_reg_zone_get_nolock(kctx, id);
- switch (reg->flags & KBASE_REG_ZONE_MASK) {
- case KBASE_REG_ZONE_CUSTOM_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_custom);
- break;
- case KBASE_REG_ZONE_SAME_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_same);
- break;
- case KBASE_REG_ZONE_EXEC_VA:
- kctx = container_of(rbtree, struct kbase_context,
- reg_rbtree_exec);
- break;
-#if MALI_USE_CSF
- case KBASE_REG_ZONE_EXEC_FIXED_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
- break;
- case KBASE_REG_ZONE_FIXED_VA:
- kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
- break;
-#endif
- default:
- WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
- break;
- }
-
- return kctx;
+ return kbase_alloc_free_region(zone, start_pfn, nr_pages);
}
/**
@@ -1614,18 +1591,18 @@ static struct kbase_context *kbase_reg_flags_to_kctx(
* alloc object will be released.
* It is a bug if no alloc object exists for non-free regions.
*
+ * If region is MCU_SHARED_ZONE it is freed
*/
void kbase_free_alloced_region(struct kbase_va_region *reg)
{
#if MALI_USE_CSF
- if ((reg->flags & KBASE_REG_ZONE_MASK) ==
- KBASE_REG_ZONE_MCU_SHARED) {
+ if (kbase_bits_to_zone(reg->flags) == MCU_SHARED_ZONE) {
kfree(reg);
return;
}
#endif
if (!(reg->flags & KBASE_REG_FREE)) {
- struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+ struct kbase_context *kctx = kbase_reg_to_kctx(reg);
if (WARN_ON(!kctx))
return;
@@ -1633,10 +1610,17 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
if (WARN_ON(kbase_is_region_invalid(reg)))
return;
- dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n",
- (void *)reg);
+ dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n of zone %s", (void *)reg,
+ kbase_reg_zone_get_name(kbase_bits_to_zone(reg->flags)));
#if MALI_USE_CSF
if (reg->flags & KBASE_REG_CSF_EVENT)
+ /*
+ * This should not be reachable if called from 'mcu_shared' functions
+ * such as:
+ * kbase_csf_firmware_mcu_shared_mapping_init
+ * kbase_csf_firmware_mcu_shared_mapping_term
+ */
+
kbase_unlink_event_mem_page(kctx, reg);
#endif
@@ -1650,8 +1634,6 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
* on the list at termination time of the region tracker.
*/
if (!list_empty(&reg->gpu_alloc->evict_node)) {
- mutex_unlock(&kctx->jit_evict_lock);
-
/*
* Unlink the physical allocation before unmaking it
* evictable so that the allocation isn't grown back to
@@ -1662,6 +1644,8 @@ void kbase_free_alloced_region(struct kbase_va_region *reg)
if (reg->cpu_alloc != reg->gpu_alloc)
reg->gpu_alloc->reg = NULL;
+ mutex_unlock(&kctx->jit_evict_lock);
+
/*
* If a region has been made evictable then we must
* unmake it before trying to free it.
@@ -1736,41 +1720,45 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
for (i = 0; i < alloc->imported.alias.nents; i++) {
if (alloc->imported.alias.aliased[i].alloc) {
- err = kbase_mmu_insert_pages(
- kctx->kbdev, &kctx->mmu,
- reg->start_pfn + (i * stride),
- alloc->imported.alias.aliased[i]
- .alloc->pages +
- alloc->imported.alias.aliased[i]
- .offset,
+ err = kbase_mmu_insert_aliased_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+ alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset,
alloc->imported.alias.aliased[i].length,
- reg->flags & gwt_mask, kctx->as_nr,
- group_id, mmu_sync_info);
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
+ NULL);
if (err)
- goto bad_insert;
+ goto bad_aliased_insert;
/* Note: mapping count is tracked at alias
* creation time
*/
} else {
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + i * stride,
- kctx->aliasing_sink_page,
+ err = kbase_mmu_insert_single_aliased_page(
+ kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
alloc->imported.alias.aliased[i].length,
- (reg->flags & mask & gwt_mask) | attr,
- group_id, mmu_sync_info);
+ (reg->flags & mask & gwt_mask) | attr, group_id,
+ mmu_sync_info);
if (err)
- goto bad_insert;
+ goto bad_aliased_insert;
}
}
} else {
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn,
- kbase_get_gpu_phy_pages(reg),
- kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr,
- group_id, mmu_sync_info);
+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
+ reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
+ err = kbase_mmu_insert_pages_skip_status_update(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
+ } else {
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ kbase_get_gpu_phy_pages(reg),
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr, group_id,
+ mmu_sync_info, reg);
+ }
+
if (err)
goto bad_insert;
kbase_mem_phy_alloc_gpu_mapped(alloc);
@@ -1780,9 +1768,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
!WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
- /* For padded imported dma-buf memory, map the dummy aliasing
- * page from the end of the dma-buf pages, to the end of the
- * region using a read only mapping.
+ /* For padded imported dma-buf or user-buf memory, map the dummy
+ * aliasing page from the end of the imported pages, to the end of
+ * the region using a read only mapping.
*
* Only map when it's imported dma-buf memory that is currently
* mapped.
@@ -1790,23 +1778,31 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
* Assume reg->gpu_alloc->nents is the number of actual pages
* in the dma-buf memory.
*/
- err = kbase_mmu_insert_single_page(
- kctx, reg->start_pfn + reg->gpu_alloc->nents,
- kctx->aliasing_sink_page,
+ err = kbase_mmu_insert_single_imported_page(
+ kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
reg->nr_pages - reg->gpu_alloc->nents,
- (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR,
- KBASE_MEM_GROUP_SINK, mmu_sync_info);
+ (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
+ mmu_sync_info);
if (err)
goto bad_insert;
}
return err;
-bad_insert:
- kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, reg->nr_pages,
- kctx->as_nr);
+bad_aliased_insert:
+ while (i-- > 0) {
+ struct tagged_addr *phys_alloc = NULL;
+ u64 const stride = alloc->imported.alias.stride;
+ if (alloc->imported.alias.aliased[i].alloc != NULL)
+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset;
+
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
+ phys_alloc, alloc->imported.alias.aliased[i].length,
+ alloc->imported.alias.aliased[i].length, kctx->as_nr);
+ }
+bad_insert:
kbase_remove_va_region(kctx->kbdev, reg);
return err;
@@ -1814,12 +1810,13 @@ bad_insert:
KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
-static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
- struct kbase_mem_phy_alloc *alloc, bool writeable);
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
+ struct kbase_va_region *reg);
int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
{
int err = 0;
+ struct kbase_mem_phy_alloc *alloc;
if (reg->start_pfn == 0)
return 0;
@@ -1827,67 +1824,95 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
if (!reg->gpu_alloc)
return -EINVAL;
+ alloc = reg->gpu_alloc;
+
/* Tear down GPU page tables, depending on memory type. */
- switch (reg->gpu_alloc->type) {
+ switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS: {
size_t i = 0;
- struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
-
/* Due to the way the number of valid PTEs and ATEs are tracked
* currently, only the GPU virtual range that is backed & mapped
- * should be passed to the kbase_mmu_teardown_pages() function,
- * hence individual aliased regions needs to be unmapped
- * separately.
+ * should be passed to the page teardown function, hence individual
+ * aliased regions needs to be unmapped separately.
*/
for (i = 0; i < alloc->imported.alias.nents; i++) {
- if (alloc->imported.alias.aliased[i].alloc) {
- int err_loop = kbase_mmu_teardown_pages(
- kctx->kbdev, &kctx->mmu,
- reg->start_pfn +
- (i *
- alloc->imported.alias.stride),
- alloc->imported.alias.aliased[i].length,
- kctx->as_nr);
- if (WARN_ON_ONCE(err_loop))
- err = err_loop;
- }
+ struct tagged_addr *phys_alloc = NULL;
+ int err_loop;
+
+ if (alloc->imported.alias.aliased[i].alloc != NULL)
+ phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
+ alloc->imported.alias.aliased[i].offset;
+
+ err_loop = kbase_mmu_teardown_pages(
+ kctx->kbdev, &kctx->mmu,
+ reg->start_pfn + (i * alloc->imported.alias.stride),
+ phys_alloc, alloc->imported.alias.aliased[i].length,
+ alloc->imported.alias.aliased[i].length, kctx->as_nr);
+
+ if (WARN_ON_ONCE(err_loop))
+ err = err_loop;
}
}
break;
- case KBASE_MEM_TYPE_IMPORTED_UMM:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, reg->nr_pages, kctx->as_nr);
+ case KBASE_MEM_TYPE_IMPORTED_UMM: {
+ size_t nr_phys_pages = reg->nr_pages;
+ size_t nr_virt_pages = reg->nr_pages;
+ /* If the region has import padding and falls under the threshold for
+ * issuing a partial GPU cache flush, we want to reduce the number of
+ * physical pages that get flushed.
+
+ * This is symmetric with case of mapping the memory, which first maps
+ * each imported physical page to a separate virtual page, and then
+ * maps the single aliasing sink page to each of the virtual padding
+ * pages.
+ */
+ if (reg->flags & KBASE_REG_IMPORT_PAD)
+ nr_phys_pages = alloc->nents + 1;
+
+ err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn, alloc->pages,
+ nr_phys_pages, nr_virt_pages,
+ kctx->as_nr);
+ }
break;
- default:
- err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu,
- reg->start_pfn, kbase_reg_current_backed_size(reg),
- kctx->as_nr);
+ case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_imported_pages(kctx->kbdev, &kctx->mmu,
+ reg->start_pfn, alloc->pages,
+ nr_reg_pages, nr_reg_pages,
+ kctx->as_nr);
+ }
+ break;
+ default: {
+ size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
+
+ err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
+ alloc->pages, nr_reg_pages, nr_reg_pages,
+ kctx->as_nr);
+ }
break;
}
/* Update tracking, and other cleanup, depending on memory type. */
- switch (reg->gpu_alloc->type) {
+ switch (alloc->type) {
case KBASE_MEM_TYPE_ALIAS:
/* We mark the source allocs as unmapped from the GPU when
* putting reg's allocs
*/
break;
case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
- struct kbase_alloc_import_user_buf *user_buf =
- &reg->gpu_alloc->imported.user_buf;
-
- if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
- user_buf->current_mapping_usage_count &=
- ~PINNED_ON_IMPORT;
-
- /* The allocation could still have active mappings. */
- if (user_buf->current_mapping_usage_count == 0) {
- kbase_jd_user_buf_unmap(kctx, reg->gpu_alloc,
- (reg->flags & (KBASE_REG_CPU_WR |
- KBASE_REG_GPU_WR)));
- }
+ struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf;
+
+ if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
+ user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT;
+
+ /* The allocation could still have active mappings. */
+ if (user_buf->current_mapping_usage_count == 0) {
+ kbase_jd_user_buf_unmap(kctx, alloc, reg);
}
}
+ }
fallthrough;
default:
kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
@@ -2007,7 +2032,8 @@ void kbase_sync_single(struct kbase_context *kctx,
BUG_ON(!cpu_page);
BUG_ON(offset + size > PAGE_SIZE);
- dma_addr = kbase_dma_addr(cpu_page) + offset;
+ dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset;
+
if (sync_fn == KBASE_SYNC_TO_CPU)
dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
size, DMA_BIDIRECTIONAL);
@@ -2018,29 +2044,30 @@ void kbase_sync_single(struct kbase_context *kctx,
void *src = NULL;
void *dst = NULL;
struct page *gpu_page;
+ dma_addr_t dma_addr;
if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
return;
gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
+ dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset;
if (sync_fn == KBASE_SYNC_TO_DEVICE) {
- src = ((unsigned char *)kmap(cpu_page)) + offset;
- dst = ((unsigned char *)kmap(gpu_page)) + offset;
+ src = ((unsigned char *)kbase_kmap(cpu_page)) + offset;
+ dst = ((unsigned char *)kbase_kmap(gpu_page)) + offset;
} else if (sync_fn == KBASE_SYNC_TO_CPU) {
- dma_sync_single_for_cpu(kctx->kbdev->dev,
- kbase_dma_addr(gpu_page) + offset,
- size, DMA_BIDIRECTIONAL);
- src = ((unsigned char *)kmap(gpu_page)) + offset;
- dst = ((unsigned char *)kmap(cpu_page)) + offset;
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size,
+ DMA_BIDIRECTIONAL);
+ src = ((unsigned char *)kbase_kmap(gpu_page)) + offset;
+ dst = ((unsigned char *)kbase_kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
- kunmap(gpu_page);
- kunmap(cpu_page);
+ kbase_kunmap(gpu_page, src);
+ kbase_kunmap(cpu_page, dst);
if (sync_fn == KBASE_SYNC_TO_DEVICE)
- dma_sync_single_for_device(kctx->kbdev->dev,
- kbase_dma_addr(gpu_page) + offset,
- size, DMA_BIDIRECTIONAL);
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size,
+ DMA_BIDIRECTIONAL);
}
}
@@ -2186,29 +2213,27 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
__func__, (void *)reg, (void *)kctx);
lockdep_assert_held(&kctx->reg_lock);
- if (reg->flags & KBASE_REG_NO_USER_FREE) {
+ if (kbase_va_region_is_no_user_free(reg)) {
dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
return -EINVAL;
}
- /*
- * Unlink the physical allocation before unmaking it evictable so
- * that the allocation isn't grown back to its last backed size
- * as we're going to unmap it anyway.
- */
- reg->cpu_alloc->reg = NULL;
- if (reg->cpu_alloc != reg->gpu_alloc)
- reg->gpu_alloc->reg = NULL;
-
- /*
- * If a region has been made evictable then we must unmake it
+ /* If a region has been made evictable then we must unmake it
* before trying to free it.
* If the memory hasn't been reclaimed it will be unmapped and freed
* below, if it has been reclaimed then the operations below are no-ops.
*/
if (reg->flags & KBASE_REG_DONT_NEED) {
- KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
- KBASE_MEM_TYPE_NATIVE);
+ WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE);
+ mutex_lock(&kctx->jit_evict_lock);
+ /* Unlink the physical allocation before unmaking it evictable so
+ * that the allocation isn't grown back to its last backed size
+ * as we're going to unmap it anyway.
+ */
+ reg->cpu_alloc->reg = NULL;
+ if (reg->cpu_alloc != reg->gpu_alloc)
+ reg->gpu_alloc->reg = NULL;
+ mutex_unlock(&kctx->jit_evict_lock);
kbase_mem_evictable_unmake(reg->gpu_alloc);
}
@@ -2219,8 +2244,8 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re
}
#if MALI_USE_CSF
- if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) ||
- ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
+ if (((kbase_bits_to_zone(reg->flags)) == FIXED_VA_ZONE) ||
+ ((kbase_bits_to_zone(reg->flags)) == EXEC_FIXED_VA_ZONE)) {
if (reg->flags & KBASE_REG_FIXED_ADDRESS)
atomic64_dec(&kctx->num_fixed_allocs);
else
@@ -2268,7 +2293,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
__func__);
return -EINVAL;
}
- kbase_gpu_vm_lock(kctx);
+ kbase_gpu_vm_lock_with_pmode_sync(kctx);
if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
@@ -2297,7 +2322,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
goto out_unlock;
}
- if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
+ if ((kbase_bits_to_zone(reg->flags)) == SAME_VA_ZONE) {
/* SAME_VA must be freed through munmap */
dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
gpu_addr);
@@ -2308,7 +2333,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
}
out_unlock:
- kbase_gpu_vm_unlock(kctx);
+ kbase_gpu_vm_unlock_with_pmode_sync(kctx);
return err;
}
@@ -2407,8 +2432,11 @@ int kbase_update_region_flags(struct kbase_context *kctx,
if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
- if (flags & BASEP_MEM_NO_USER_FREE)
- reg->flags |= KBASE_REG_NO_USER_FREE;
+ if (flags & BASEP_MEM_NO_USER_FREE) {
+ kbase_gpu_vm_lock(kctx);
+ kbase_va_region_no_user_free_inc(reg);
+ kbase_gpu_vm_unlock(kctx);
+ }
if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
@@ -2457,21 +2485,18 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
* allocation is visible to the OOM killer
*/
kbase_process_page_usage_inc(kctx, nr_pages_requested);
+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
tp = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
/* Check if we have enough pages requested so we can allocate a large
* page (512 * 4KB = 2MB )
*/
- if (nr_left >= (SZ_2M / SZ_4K)) {
+ if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
- res = kbase_mem_pool_alloc_pages(
- &kctx->mem_pools.large[alloc->group_id],
- nr_lp * (SZ_2M / SZ_4K),
- tp,
- true);
+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
+ nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task);
if (res > 0) {
nr_left -= res;
@@ -2525,7 +2550,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
err = kbase_mem_pool_grow(
&kctx->mem_pools.large[alloc->group_id],
- 1);
+ 1, kctx->task);
if (err)
break;
} while (1);
@@ -2566,13 +2591,11 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
}
}
}
-no_new_partial:
-#endif
+no_new_partial:
if (nr_left) {
- res = kbase_mem_pool_alloc_pages(
- &kctx->mem_pools.small[alloc->group_id],
- nr_left, tp, false);
+ res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
+ tp, false, kctx->task);
if (res <= 0)
goto alloc_failed;
}
@@ -2584,8 +2607,6 @@ no_new_partial:
alloc->nents += nr_pages_requested;
- kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
-
done:
return 0;
@@ -2595,19 +2616,13 @@ alloc_failed:
size_t nr_pages_to_free = nr_pages_requested - nr_left;
alloc->nents += nr_pages_to_free;
-
- kbase_process_page_usage_inc(kctx, nr_pages_to_free);
- atomic_add(nr_pages_to_free, &kctx->used_pages);
- atomic_add(nr_pages_to_free,
- &kctx->kbdev->memdev.used_pages);
-
kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
}
- kbase_process_page_usage_dec(kctx, nr_pages_requested);
- atomic_sub(nr_pages_requested, &kctx->used_pages);
- atomic_sub(nr_pages_requested,
- &kctx->kbdev->memdev.used_pages);
+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_left);
+ kbase_process_page_usage_dec(kctx, nr_left);
+ atomic_sub(nr_left, &kctx->used_pages);
+ atomic_sub(nr_left, &kctx->kbdev->memdev.used_pages);
invalid_request:
return -ENOMEM;
@@ -2631,18 +2646,17 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
lockdep_assert_held(&pool->pool_lock);
-#if !defined(CONFIG_MALI_2MB_ALLOC)
- WARN_ON(pool->order);
-#endif
+ kctx = alloc->imported.native.kctx;
+ kbdev = kctx->kbdev;
+
+ if (!kbdev->pagesize_2mb)
+ WARN_ON(pool->order);
if (alloc->reg) {
if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
goto invalid_request;
}
- kctx = alloc->imported.native.kctx;
- kbdev = kctx->kbdev;
-
lockdep_assert_held(&kctx->mem_partials_lock);
if (nr_pages_requested == 0)
@@ -2657,12 +2671,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
* allocation is visible to the OOM killer
*/
kbase_process_page_usage_inc(kctx, nr_pages_requested);
+ kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
tp = alloc->pages + alloc->nents;
new_pages = tp;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
int nr_lp = nr_left / (SZ_2M / SZ_4K);
res = kbase_mem_pool_alloc_pages_locked(pool,
@@ -2746,15 +2760,12 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
if (nr_left)
goto alloc_failed;
} else {
-#endif
res = kbase_mem_pool_alloc_pages_locked(pool,
nr_left,
tp);
if (res <= 0)
goto alloc_failed;
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
KBASE_TLSTREAM_AUX_PAGESALLOC(
kbdev,
@@ -2763,8 +2774,6 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
alloc->nents += nr_pages_requested;
- kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
-
done:
return new_pages;
@@ -2775,8 +2784,7 @@ alloc_failed:
struct tagged_addr *start_free = alloc->pages + alloc->nents;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pool->order) {
+ if (kbdev->pagesize_2mb && pool->order) {
while (nr_pages_to_free) {
if (is_huge_head(*start_free)) {
kbase_mem_pool_free_pages_locked(
@@ -2794,17 +2802,15 @@ alloc_failed:
}
}
} else {
-#endif
kbase_mem_pool_free_pages_locked(pool,
nr_pages_to_free,
start_free,
false, /* not dirty */
true); /* return to pool */
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
}
+ kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, nr_pages_requested);
kbase_process_page_usage_dec(kctx, nr_pages_requested);
atomic_sub(nr_pages_requested, &kctx->used_pages);
atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages);
@@ -3064,6 +3070,13 @@ KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked);
/**
* kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer.
* @alloc: The allocation for the imported user buffer.
+ *
+ * This must only be called when terminating an alloc, when its refcount
+ * (number of users) has become 0. This also ensures it is only called once all
+ * CPU mappings have been closed.
+ *
+ * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active
+ * allocations
*/
static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc);
#endif
@@ -3194,9 +3207,32 @@ out_rollback:
out_term:
return -1;
}
-
KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
+void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
+ enum kbase_page_status status)
+{
+ u32 i = 0;
+
+ for (; i < alloc->nents; i++) {
+ struct tagged_addr phys = alloc->pages[i];
+ struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
+
+ /* Skip the 4KB page that is part of a large page, as the large page is
+ * excluded from the migration process.
+ */
+ if (is_huge(phys) || is_partial(phys))
+ continue;
+
+ if (!page_md)
+ continue;
+
+ spin_lock(&page_md->migrate_lock);
+ page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
+ spin_unlock(&page_md->migrate_lock);
+ }
+}
+
bool kbase_check_alloc_flags(unsigned long flags)
{
/* Only known input flags should be set. */
@@ -3437,30 +3473,36 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
#undef KBASE_MSG_PRE
}
-/**
- * Acquire the per-context region list lock
- * @kctx: KBase context
- */
void kbase_gpu_vm_lock(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx != NULL);
mutex_lock(&kctx->reg_lock);
}
-
KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
-/**
- * Release the per-context region list lock
- * @kctx: KBase context
- */
+void kbase_gpu_vm_lock_with_pmode_sync(struct kbase_context *kctx)
+{
+#if MALI_USE_CSF
+ down_read(&kctx->kbdev->csf.pmode_sync_sem);
+#endif
+ kbase_gpu_vm_lock(kctx);
+}
+
void kbase_gpu_vm_unlock(struct kbase_context *kctx)
{
KBASE_DEBUG_ASSERT(kctx != NULL);
mutex_unlock(&kctx->reg_lock);
}
-
KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
+void kbase_gpu_vm_unlock_with_pmode_sync(struct kbase_context *kctx)
+{
+ kbase_gpu_vm_unlock(kctx);
+#if MALI_USE_CSF
+ up_read(&kctx->kbdev->csf.pmode_sync_sem);
+#endif
+}
+
#if IS_ENABLED(CONFIG_DEBUG_FS)
struct kbase_jit_debugfs_data {
int (*func)(struct kbase_jit_debugfs_data *data);
@@ -3688,12 +3730,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx)
/* prevent unprivileged use of debug file system
* in old kernel version
*/
-#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE)
- /* only for newer kernel version debug file system is safe */
const mode_t mode = 0444;
-#else
- const mode_t mode = 0400;
-#endif
/* Caller already ensures this, but we keep the pattern for
* maintenance safety.
@@ -3767,7 +3804,15 @@ static void kbase_jit_destroy_worker(struct work_struct *work)
mutex_unlock(&kctx->jit_evict_lock);
kbase_gpu_vm_lock(kctx);
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
+
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
kbase_gpu_vm_unlock(kctx);
} while (1);
@@ -3782,6 +3827,7 @@ int kbase_jit_init(struct kbase_context *kctx)
INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
#if MALI_USE_CSF
+ mutex_init(&kctx->csf.kcpu_queues.jit_lock);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head);
INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues);
#else /* !MALI_USE_CSF */
@@ -4020,25 +4066,18 @@ static int kbase_jit_grow(struct kbase_context *kctx,
if (reg->gpu_alloc->nents >= info->commit_pages)
goto done;
- /* Grow the backing */
- old_size = reg->gpu_alloc->nents;
-
/* Allocate some more pages */
delta = info->commit_pages - reg->gpu_alloc->nents;
pages_required = delta;
-#ifdef CONFIG_MALI_2MB_ALLOC
- if (pages_required >= (SZ_2M / SZ_4K)) {
+ if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
pool = &kctx->mem_pools.large[kctx->jit_group_id];
/* Round up to number of 2 MB pages required */
pages_required += ((SZ_2M / SZ_4K) - 1);
pages_required /= (SZ_2M / SZ_4K);
} else {
-#endif
pool = &kctx->mem_pools.small[kctx->jit_group_id];
-#ifdef CONFIG_MALI_2MB_ALLOC
}
-#endif
if (reg->cpu_alloc != reg->gpu_alloc)
pages_required *= 2;
@@ -4059,7 +4098,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
spin_unlock(&kctx->mem_partials_lock);
kbase_gpu_vm_unlock(kctx);
- ret = kbase_mem_pool_grow(pool, pool_delta);
+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
kbase_gpu_vm_lock(kctx);
if (ret)
@@ -4069,6 +4108,17 @@ static int kbase_jit_grow(struct kbase_context *kctx,
kbase_mem_pool_lock(pool);
}
+ if (reg->gpu_alloc->nents >= info->commit_pages) {
+ kbase_mem_pool_unlock(pool);
+ spin_unlock(&kctx->mem_partials_lock);
+ dev_info(
+ kctx->kbdev->dev,
+ "JIT alloc grown beyond the required number of initially required pages, this grow no longer needed.");
+ goto done;
+ }
+
+ old_size = reg->gpu_alloc->nents;
+ delta = info->commit_pages - old_size;
gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
delta, &prealloc_sas[0]);
if (!gpu_pages) {
@@ -4219,11 +4269,11 @@ static bool jit_allow_allocate(struct kbase_context *kctx,
const struct base_jit_alloc_info *info,
bool ignore_pressure_limit)
{
-#if MALI_USE_CSF
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-#else
+#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
-#endif
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
#if MALI_JIT_PRESSURE_LIMIT_BASE
if (!ignore_pressure_limit &&
@@ -4314,25 +4364,25 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
-#if MALI_USE_CSF
- lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
-#else
+#if !MALI_USE_CSF
lockdep_assert_held(&kctx->jctx.lock);
-#endif
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
return NULL;
-#ifdef CONFIG_MALI_2MB_ALLOC
- /* Preallocate memory for the sub-allocation structs */
- for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
- prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
- if (!prealloc_sas[i])
- goto end;
+ if (kctx->kbdev->pagesize_2mb) {
+ /* Preallocate memory for the sub-allocation structs */
+ for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
+ prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
+ if (!prealloc_sas[i])
+ goto end;
+ }
}
-#endif
- kbase_gpu_vm_lock(kctx);
+ kbase_gpu_vm_lock_with_pmode_sync(kctx);
mutex_lock(&kctx->jit_evict_lock);
/*
@@ -4414,12 +4464,12 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
kbase_jit_done_phys_increase(kctx, needed_pages);
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
- kbase_gpu_vm_unlock(kctx);
+ kbase_gpu_vm_unlock_with_pmode_sync(kctx);
if (ret < 0) {
/*
* An update to an allocation from the pool failed,
- * chances are slim a new allocation would fair any
+ * chances are slim a new allocation would fare any
* better so return the allocation to the pool and
* return the function with failure.
*/
@@ -4441,6 +4491,17 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
mutex_unlock(&kctx->jit_evict_lock);
reg = NULL;
goto end;
+ } else {
+ /* A suitable JIT allocation existed on the evict list, so we need
+ * to make sure that the NOT_MOVABLE property is cleared.
+ */
+ if (kbase_is_page_migration_enabled()) {
+ kbase_gpu_vm_lock(kctx);
+ mutex_lock(&kctx->jit_evict_lock);
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
+ mutex_unlock(&kctx->jit_evict_lock);
+ kbase_gpu_vm_unlock(kctx);
+ }
}
} else {
/* No suitable JIT allocation was found so create a new one */
@@ -4468,7 +4529,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
mutex_unlock(&kctx->jit_evict_lock);
- kbase_gpu_vm_unlock(kctx);
+ kbase_gpu_vm_unlock_with_pmode_sync(kctx);
reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension,
&flags, &gpu_addr, mmu_sync_info);
@@ -4497,6 +4558,29 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
}
}
+ /* Similarly to tiler heap init, there is a short window of time
+ * where the (either recycled or newly allocated, in our case) region has
+ * "no user free" count incremented but is still missing the DONT_NEED flag, and
+ * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
+ * allocation is the least bad option that doesn't lead to a security issue down the
+ * line (it will eventually be cleaned up during context termination).
+ *
+ * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region
+ * flags.
+ */
+ kbase_gpu_vm_lock(kctx);
+ if (unlikely(atomic_read(&reg->no_user_free_count) > 1)) {
+ kbase_gpu_vm_unlock(kctx);
+ dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");
+
+ mutex_lock(&kctx->jit_evict_lock);
+ list_move(&reg->jit_node, &kctx->jit_pool_head);
+ mutex_unlock(&kctx->jit_evict_lock);
+
+ reg = NULL;
+ goto end;
+ }
+
trace_mali_jit_alloc(reg, info->id);
kctx->jit_current_allocations++;
@@ -4514,6 +4598,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
+ kbase_gpu_vm_unlock(kctx);
end:
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
@@ -4526,6 +4611,12 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
{
u64 old_pages;
+#if !MALI_USE_CSF
+ lockdep_assert_held(&kctx->jctx.lock);
+#else /* MALI_USE_CSF */
+ lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
+#endif /* !MALI_USE_CSF */
+
/* JIT id not immediately available here, so use 0u */
trace_mali_jit_free(reg, 0u);
@@ -4540,9 +4631,9 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
u64 delta = old_pages - new_size;
if (delta) {
- mutex_lock(&kctx->reg_lock);
+ kbase_gpu_vm_lock_with_pmode_sync(kctx);
kbase_mem_shrink(kctx, reg, old_pages - delta);
- mutex_unlock(&kctx->reg_lock);
+ kbase_gpu_vm_unlock_with_pmode_sync(kctx);
}
}
@@ -4578,12 +4669,18 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
list_move(&reg->jit_node, &kctx->jit_pool_head);
+ /* Inactive JIT regions should be freed by the shrinker and not impacted
+ * by page migration. Once freed, they will enter into the page migration
+ * state machine via the mempools.
+ */
+ if (kbase_is_page_migration_enabled())
+ kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
mutex_unlock(&kctx->jit_evict_lock);
}
void kbase_jit_backing_lost(struct kbase_va_region *reg)
{
- struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
+ struct kbase_context *kctx = kbase_reg_to_kctx(reg);
if (WARN_ON(!kctx))
return;
@@ -4624,7 +4721,14 @@ bool kbase_jit_evict(struct kbase_context *kctx)
mutex_unlock(&kctx->jit_evict_lock);
if (reg) {
- reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(&reg->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(reg);
kbase_mem_free_region(kctx, reg);
}
@@ -4636,8 +4740,7 @@ void kbase_jit_term(struct kbase_context *kctx)
struct kbase_va_region *walker;
/* Free all allocations for this context */
-
- kbase_gpu_vm_lock(kctx);
+ kbase_gpu_vm_lock_with_pmode_sync(kctx);
mutex_lock(&kctx->jit_evict_lock);
/* Free all allocations from the pool */
while (!list_empty(&kctx->jit_pool_head)) {
@@ -4646,7 +4749,14 @@ void kbase_jit_term(struct kbase_context *kctx)
list_del(&walker->jit_node);
list_del_init(&walker->gpu_alloc->evict_node);
mutex_unlock(&kctx->jit_evict_lock);
- walker->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4658,7 +4768,14 @@ void kbase_jit_term(struct kbase_context *kctx)
list_del(&walker->jit_node);
list_del_init(&walker->gpu_alloc->evict_node);
mutex_unlock(&kctx->jit_evict_lock);
- walker->flags &= ~KBASE_REG_NO_USER_FREE;
+ /*
+ * Incrementing the refcount is prevented on JIT regions.
+ * If/when this ever changes we would need to compensate
+ * by implementing "free on putting the last reference",
+ * but only for JIT regions.
+ */
+ WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
+ kbase_va_region_no_user_free_dec(walker);
kbase_mem_free_region(kctx, walker);
mutex_lock(&kctx->jit_evict_lock);
}
@@ -4666,7 +4783,7 @@ void kbase_jit_term(struct kbase_context *kctx)
WARN_ON(kctx->jit_phys_pages_to_be_allocated);
#endif
mutex_unlock(&kctx->jit_evict_lock);
- kbase_gpu_vm_unlock(kctx);
+ kbase_gpu_vm_unlock_with_pmode_sync(kctx);
/*
* Flush the freeing of allocations whose backing has been freed
@@ -4772,7 +4889,23 @@ void kbase_unpin_user_buf_page(struct page *page)
#if MALI_USE_CSF
static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
{
- if (alloc->nents) {
+ /* In CSF builds, we keep pages pinned until the last reference is
+ * released on the alloc. A refcount of 0 also means we can be sure
+ * that all CPU mappings have been closed on this alloc, and no more
+ * mappings of it will be created.
+ *
+ * Further, the WARN() below captures the restriction that this
+ * function will not handle anything other than the alloc termination
+ * path, because the caller of kbase_mem_phy_alloc_put() is not
+ * required to hold the kctx's reg_lock, and so we could not handle
+ * removing an existing CPU mapping here.
+ *
+ * Refer to this function's kernel-doc comments for alternatives for
+ * unpinning a User buffer.
+ */
+
+ if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0,
+ "must only be called on terminating an allocation")) {
struct page **pages = alloc->imported.user_buf.pages;
long i;
@@ -4780,6 +4913,8 @@ static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
for (i = 0; i < alloc->nents; i++)
kbase_unpin_user_buf_page(pages[i]);
+
+ alloc->nents = 0;
}
}
#endif
@@ -4795,6 +4930,8 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
long i;
int write;
+ lockdep_assert_held(&kctx->reg_lock);
+
if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF))
return -EINVAL;
@@ -4810,18 +4947,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
-#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE
- pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages,
-#if KERNEL_VERSION(4, 4, 168) <= LINUX_VERSION_CODE && \
-KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
- write ? FOLL_WRITE : 0, pages, NULL);
-#else
- write, 0, pages, NULL);
-#endif
-#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE
- pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
- write, 0, pages, NULL);
-#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
+#if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
write ? FOLL_WRITE : 0, pages, NULL);
#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
@@ -4836,6 +4962,9 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
return pinned_pages;
if (pinned_pages != alloc->imported.user_buf.nr_pages) {
+ /* Above code already ensures there will not have been a CPU
+ * mapping by ensuring alloc->nents is 0
+ */
for (i = 0; i < pinned_pages; i++)
kbase_unpin_user_buf_page(pages[i]);
return -ENOMEM;
@@ -4849,43 +4978,65 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct kbase_va_region *reg)
{
- long pinned_pages;
+ int err;
+ long pinned_pages = 0;
struct kbase_mem_phy_alloc *alloc;
struct page **pages;
struct tagged_addr *pa;
- long i;
- unsigned long address;
+ long i, dma_mapped_pages;
struct device *dev;
- unsigned long offset;
- unsigned long local_size;
unsigned long gwt_mask = ~0;
- int err = kbase_jd_user_buf_pin_pages(kctx, reg);
-
/* Calls to this function are inherently asynchronous, with respect to
* MMU operations.
*/
const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
+ bool write;
+ enum dma_data_direction dma_dir;
+
+ /* If neither the CPU nor the GPU needs write access, use DMA_TO_DEVICE
+ * to avoid potentially-destructive CPU cache invalidates that could
+ * corruption of user data.
+ */
+ write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
+ dma_dir = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
+ err = kbase_jd_user_buf_pin_pages(kctx, reg);
if (err)
return err;
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset = address & ~PAGE_MASK;
- local_size = alloc->imported.user_buf.size;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
for (i = 0; i < pinned_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
-
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, dma_dir);
+#else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+#endif
err = dma_mapping_error(dev, dma_addr);
if (err)
goto unwind;
@@ -4893,8 +5044,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -4902,23 +5052,44 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
gwt_mask = ~KBASE_REG_GPU_WR;
#endif
- err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
- pa, kbase_reg_current_backed_size(reg),
- reg->flags & gwt_mask, kctx->as_nr,
- alloc->group_id, mmu_sync_info);
+ err = kbase_mmu_insert_pages_skip_status_update(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
+ kbase_reg_current_backed_size(reg),
+ reg->flags & gwt_mask, kctx->as_nr,
+ alloc->group_id, mmu_sync_info, NULL);
if (err == 0)
return 0;
/* fall down */
unwind:
alloc->nents = 0;
- while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_mapped_pages = i;
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
+ for (i = 0; i < dma_mapped_pages; i++) {
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, dma_dir);
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, dma_dir);
+#else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
}
- while (++i < pinned_pages) {
+ /* The user buffer could already have been previously pinned before
+ * entering this function, and hence there could potentially be CPU
+ * mappings of it
+ */
+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages);
+
+ for (i = 0; i < pinned_pages; i++) {
kbase_unpin_user_buf_page(pages[i]);
pages[i] = NULL;
}
@@ -4926,34 +5097,165 @@ unwind:
return err;
}
+/* user_buf_sync_read_only_page - This function handles syncing a single page that has read access,
+ * only, on both the CPU and * GPU, so it is ready to be unmapped.
+ * @kctx: kbase context
+ * @imported_size: the number of bytes to sync
+ * @dma_addr: DMA address of the bytes to be sync'd
+ * @offset_within_page: (unused) offset of the bytes within the page. Passed so that the calling
+ * signature is identical to user_buf_sync_writable_page().
+ */
+static void user_buf_sync_read_only_page(struct kbase_context *kctx, unsigned long imported_size,
+ dma_addr_t dma_addr, unsigned long offset_within_page)
+{
+ /* Manual cache synchronization.
+ *
+ * Writes from neither the CPU nor GPU are possible via this mapping,
+ * so we just sync the entire page to the device.
+ */
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, imported_size, DMA_TO_DEVICE);
+}
+
+/* user_buf_sync_writable_page - This function handles syncing a single page that has read
+ * and writable access, from either (or both of) the CPU and GPU,
+ * so it is ready to be unmapped.
+ * @kctx: kbase context
+ * @imported_size: the number of bytes to unmap
+ * @dma_addr: DMA address of the bytes to be unmapped
+ * @offset_within_page: offset of the bytes within the page. This is the offset to the subrange of
+ * the memory that is "imported" and so is intended for GPU access. Areas of
+ * the page outside of this - whilst still GPU accessible - are not intended
+ * for use by GPU work, and should also not be modified as the userspace CPU
+ * threads may be modifying them.
+ */
+static void user_buf_sync_writable_page(struct kbase_context *kctx, unsigned long imported_size,
+ dma_addr_t dma_addr, unsigned long offset_within_page)
+{
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size -
+ * | |/ offset_within_page
+ * | |
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+}
+
/* This function would also perform the work of unpinning pages on Job Manager
* GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
* have a corresponding call to kbase_jd_user_buf_unpin_pages().
*/
-static void kbase_jd_user_buf_unmap(struct kbase_context *kctx,
- struct kbase_mem_phy_alloc *alloc, bool writeable)
+static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
+ struct kbase_va_region *reg)
{
long i;
struct page **pages;
- unsigned long size = alloc->imported.user_buf.size;
+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
+ unsigned long remaining_size = alloc->imported.user_buf.size;
+ bool writable = (reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR));
+
+ lockdep_assert_held(&kctx->reg_lock);
KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
pages = alloc->imported.user_buf.pages;
+
+#if !MALI_USE_CSF
+ kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents);
+#endif
+
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long local_size;
+ unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+ enum dma_data_direction dma_dir = writable ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+ if (writable)
+ user_buf_sync_writable_page(kctx, imported_size, dma_addr,
+ offset_within_page);
+ else
+ user_buf_sync_read_only_page(kctx, imported_size, dma_addr,
+ offset_within_page);
- local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
- dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
- DMA_BIDIRECTIONAL);
- if (writeable)
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+#if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
+ dma_dir);
+#else
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
+#endif
+ if (writable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
kbase_unpin_user_buf_page(pages[i]);
pages[i] = NULL;
#endif
- size -= local_size;
+ remaining_size -= imported_size;
+ offset_within_page = 0;
}
#if !MALI_USE_CSF
alloc->nents = 0;
@@ -4964,7 +5266,8 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
void *src_page, size_t *to_copy, unsigned int nr_pages,
unsigned int *target_page_nr, size_t offset)
{
- void *target_page = kmap(dest_pages[*target_page_nr]);
+ void *target_page = kbase_kmap(dest_pages[*target_page_nr]);
+
size_t chunk = PAGE_SIZE-offset;
if (!target_page) {
@@ -4977,13 +5280,13 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
memcpy(target_page + offset, src_page, chunk);
*to_copy -= chunk;
- kunmap(dest_pages[*target_page_nr]);
+ kbase_kunmap(dest_pages[*target_page_nr], target_page);
*target_page_nr += 1;
if (*target_page_nr >= nr_pages || *to_copy == 0)
return 0;
- target_page = kmap(dest_pages[*target_page_nr]);
+ target_page = kbase_kmap(dest_pages[*target_page_nr]);
if (!target_page) {
pr_err("%s: kmap failure", __func__);
return -ENOMEM;
@@ -4995,16 +5298,16 @@ int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
memcpy(target_page, src_page + PAGE_SIZE-offset, chunk);
*to_copy -= chunk;
- kunmap(dest_pages[*target_page_nr]);
+ kbase_kunmap(dest_pages[*target_page_nr], target_page);
return 0;
}
-struct kbase_mem_phy_alloc *kbase_map_external_resource(
- struct kbase_context *kctx, struct kbase_va_region *reg,
- struct mm_struct *locked_mm)
+int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg,
+ struct mm_struct *locked_mm)
{
- int err;
+ int err = 0;
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
lockdep_assert_held(&kctx->reg_lock);
@@ -5013,7 +5316,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource(
case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) &&
(!reg->gpu_alloc->nents))
- goto exit;
+ return -EINVAL;
reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
if (reg->gpu_alloc->imported.user_buf
@@ -5021,7 +5324,7 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource(
err = kbase_jd_user_buf_map(kctx, reg);
if (err) {
reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
- goto exit;
+ return err;
}
}
}
@@ -5029,21 +5332,30 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource(
case KBASE_MEM_TYPE_IMPORTED_UMM: {
err = kbase_mem_umm_map(kctx, reg);
if (err)
- goto exit;
+ return err;
break;
}
default:
- goto exit;
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid external resource GPU allocation type (%x) on mapping",
+ alloc->type);
+ return -EINVAL;
}
- return kbase_mem_phy_alloc_get(reg->gpu_alloc);
-exit:
- return NULL;
+ kbase_va_region_alloc_get(kctx, reg);
+ kbase_mem_phy_alloc_get(alloc);
+ return err;
}
-void kbase_unmap_external_resource(struct kbase_context *kctx,
- struct kbase_va_region *reg, struct kbase_mem_phy_alloc *alloc)
+void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg)
{
+ /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the
+ * unmapping operation.
+ */
+ struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
+
+ lockdep_assert_held(&kctx->reg_lock);
+
switch (alloc->type) {
case KBASE_MEM_TYPE_IMPORTED_UMM: {
kbase_mem_umm_unmap(kctx, reg, alloc);
@@ -5053,28 +5365,29 @@ void kbase_unmap_external_resource(struct kbase_context *kctx,
alloc->imported.user_buf.current_mapping_usage_count--;
if (alloc->imported.user_buf.current_mapping_usage_count == 0) {
- bool writeable = true;
-
- if (!kbase_is_region_invalid_or_free(reg) &&
- reg->gpu_alloc == alloc)
- kbase_mmu_teardown_pages(
- kctx->kbdev,
- &kctx->mmu,
- reg->start_pfn,
- kbase_reg_current_backed_size(reg),
- kctx->as_nr);
-
- if (reg && ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0))
- writeable = false;
+ if (!kbase_is_region_invalid_or_free(reg)) {
+ kbase_mmu_teardown_imported_pages(
+ kctx->kbdev, &kctx->mmu, reg->start_pfn, alloc->pages,
+ kbase_reg_current_backed_size(reg),
+ kbase_reg_current_backed_size(reg), kctx->as_nr);
+ }
- kbase_jd_user_buf_unmap(kctx, alloc, writeable);
+ kbase_jd_user_buf_unmap(kctx, alloc, reg);
+ }
}
- }
break;
default:
- break;
+ WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping",
+ alloc->type);
+ return;
}
kbase_mem_phy_alloc_put(alloc);
+ kbase_va_region_alloc_put(kctx, reg);
+}
+
+static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg)
+{
+ return reg->start_pfn << PAGE_SHIFT;
}
struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
@@ -5090,7 +5403,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
* metadata which matches the region which is being acquired.
*/
list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
- if (walker->gpu_addr == gpu_addr) {
+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) {
meta = walker;
meta->ref++;
break;
@@ -5102,8 +5415,7 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
struct kbase_va_region *reg;
/* Find the region */
- reg = kbase_region_tracker_find_region_enclosing_address(
- kctx, gpu_addr);
+ reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
if (kbase_is_region_invalid_or_free(reg))
goto failed;
@@ -5111,18 +5423,18 @@ struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
if (!meta)
goto failed;
-
/*
* Fill in the metadata object and acquire a reference
* for the physical resource.
*/
- meta->alloc = kbase_map_external_resource(kctx, reg, NULL);
- meta->ref = 1;
+ meta->reg = reg;
- if (!meta->alloc)
+ /* Map the external resource to the GPU allocation of the region
+ * and acquire the reference to the VA region
+ */
+ if (kbase_map_external_resource(kctx, meta->reg, NULL))
goto fail_map;
-
- meta->gpu_addr = reg->start_pfn << PAGE_SHIFT;
+ meta->ref = 1;
list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
}
@@ -5147,7 +5459,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr)
* metadata which matches the region which is being released.
*/
list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node)
- if (walker->gpu_addr == gpu_addr)
+ if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr)
return walker;
return NULL;
@@ -5156,14 +5468,7 @@ find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr)
static void release_sticky_resource_meta(struct kbase_context *kctx,
struct kbase_ctx_ext_res_meta *meta)
{
- struct kbase_va_region *reg;
-
- /* Drop the physical memory reference and free the metadata. */
- reg = kbase_region_tracker_find_region_enclosing_address(
- kctx,
- meta->gpu_addr);
-
- kbase_unmap_external_resource(kctx, reg, meta->alloc);
+ kbase_unmap_external_resource(kctx, meta->reg);
list_del(&meta->ext_res_node);
kfree(meta);
}