diff options
Diffstat (limited to 'dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c')
-rw-r--r-- | dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c | 1160 |
1 files changed, 787 insertions, 373 deletions
diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c index 4a1004b..a68e4ea 100644 --- a/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c +++ b/dvalin/kernel/drivers/gpu/arm/midgard/mali_kbase_mem.c @@ -1,11 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation, and any use by you of this program is subject to the terms - * of such GNU licence. + * of such GNU license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,8 +17,6 @@ * along with this program; if not, you can access it online at * http://www.gnu.org/licenses/gpl-2.0.html. * - * SPDX-License-Identifier: GPL-2.0 - * */ /** @@ -29,7 +28,7 @@ #include <linux/compat.h> #include <linux/version.h> #include <linux/log2.h> -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) #include <linux/of_platform.h> #endif @@ -43,6 +42,7 @@ #include <mali_kbase_mem_pool_group.h> #include <mmu/mali_kbase_mmu.h> #include <mali_kbase_config_defaults.h> +#include <mali_kbase_trace_gpu_mem.h> /* * Alignment of objects allocated by the GPU inside a just-in-time memory @@ -89,7 +89,7 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) #error "Unknown CPU VA width for this architecture" #endif -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) if (kbase_ctx_flag(kctx, KCTX_COMPAT)) cpu_va_bits = 32; #endif @@ -98,27 +98,34 @@ static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx) } /* This function finds out which RB tree the given pfn from the GPU VA belongs - * to based on the memory zone the pfn refers to */ + * to based on the memory zone the pfn refers to + */ static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx, u64 gpu_pfn) { struct rb_root *rbtree = NULL; + struct kbase_reg_zone *exec_va_zone = + kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); /* The gpu_pfn can only be greater than the starting pfn of the EXEC_VA * zone if this has been initialized. */ - if (gpu_pfn >= kctx->exec_va_start) + if (gpu_pfn >= exec_va_zone->base_pfn) rbtree = &kctx->reg_rbtree_exec; else { u64 same_va_end; -#ifdef CONFIG_64BIT - if (kbase_ctx_flag(kctx, KCTX_COMPAT)) +#if IS_ENABLED(CONFIG_64BIT) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { #endif /* CONFIG_64BIT */ same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE; -#ifdef CONFIG_64BIT - else - same_va_end = kctx->same_va_end; +#if IS_ENABLED(CONFIG_64BIT) + } else { + struct kbase_reg_zone *same_va_zone = + kbase_ctx_reg_zone_get(kctx, + KBASE_REG_ZONE_SAME_VA); + same_va_end = kbase_reg_zone_end_pfn(same_va_zone); + } #endif /* CONFIG_64BIT */ if (gpu_pfn >= same_va_end) @@ -228,7 +235,7 @@ struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address( u64 gpu_pfn = gpu_addr >> PAGE_SHIFT; struct rb_root *rbtree = NULL; - KBASE_DEBUG_ASSERT(NULL != kctx); + KBASE_DEBUG_ASSERT(kctx != NULL); lockdep_assert_held(&kctx->reg_lock); @@ -288,7 +295,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( struct rb_root *rbtree = NULL; /* Note that this search is a linear search, as we do not have a target - address in mind, so does not benefit from the rbtree search */ + * address in mind, so does not benefit from the rbtree search + */ rbtree = reg_reqs->rbtree; for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) { @@ -303,7 +311,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( * (start_pfn + align_mask) & ~(align_mask) * * Otherwise, it aligns to n*align + offset, for the - * lowest value n that makes this still >start_pfn */ + * lowest value n that makes this still >start_pfn + */ start_pfn += align_mask; start_pfn -= (start_pfn - align_offset) & (align_mask); @@ -341,7 +350,8 @@ static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs( } /** - * @brief Remove a region object from the global list. + * Remove a region object from the global list. + * @reg: Region object to remove * * The region reg is removed, possibly by merging with other free and * compatible adjacent regions. It must be called with the context @@ -367,8 +377,9 @@ int kbase_remove_va_region(struct kbase_va_region *reg) if (rbprev) { prev = rb_entry(rbprev, struct kbase_va_region, rblink); if (prev->flags & KBASE_REG_FREE) { - /* We're compatible with the previous VMA, - * merge with it */ + /* We're compatible with the previous VMA, merge with + * it + */ WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) != (reg->flags & KBASE_REG_ZONE_MASK)); prev->nr_pages += reg->nr_pages; @@ -511,8 +522,8 @@ int kbase_add_va_region(struct kbase_context *kctx, int gpu_pc_bits = kbdev->gpu_props.props.core_props.log2_program_counter_size; - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(reg != NULL); lockdep_assert_held(&kctx->reg_lock); @@ -614,13 +625,15 @@ int kbase_add_va_region_rbtree(struct kbase_device *kbdev, size_t align_offset = align; size_t align_mask = align - 1; +#if !MALI_USE_CSF if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) { WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory", __func__, (unsigned long)align); - align_mask = reg->extent - 1; - align_offset = reg->extent - reg->initial_commit; + align_mask = reg->extension - 1; + align_offset = reg->extension - reg->initial_commit; } +#endif /* !MALI_USE_CSF */ tmp = kbase_region_tracker_find_region_meeting_reqs(reg, nr_pages, align_offset, align_mask, @@ -643,7 +656,7 @@ exit: return err; } -/** +/* * @brief Initialize the internal region tracker data structure. */ static void kbase_region_tracker_ds_init(struct kbase_context *kctx, @@ -698,6 +711,9 @@ void kbase_region_tracker_term(struct kbase_context *kctx) kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom); kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec); +#if MALI_USE_CSF + WARN_ON(!list_empty(&kctx->csf.event_pages_head)); +#endif kbase_gpu_vm_unlock(kctx); } @@ -720,23 +736,26 @@ int kbase_region_tracker_init(struct kbase_context *kctx) u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE; u64 gpu_va_limit = (1ULL << kctx->kbdev->gpu_props.mmu.va_bits) >> PAGE_SHIFT; u64 same_va_pages; + u64 same_va_base = 1u; int err; /* Take the lock as kbase_free_alloced_region requires it */ kbase_gpu_vm_lock(kctx); - same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - 1; + same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base; /* all have SAME_VA */ - same_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_same, 1, - same_va_pages, - KBASE_REG_ZONE_SAME_VA); + same_va_reg = + kbase_alloc_free_region(&kctx->reg_rbtree_same, same_va_base, + same_va_pages, KBASE_REG_ZONE_SAME_VA); if (!same_va_reg) { err = -ENOMEM; goto fail_unlock; } + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base, + same_va_pages); -#ifdef CONFIG_64BIT +#if IS_ENABLED(CONFIG_64BIT) /* 32-bit clients have custom VA zones */ if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { #endif @@ -760,19 +779,28 @@ int kbase_region_tracker_init(struct kbase_context *kctx) err = -ENOMEM; goto fail_free_same_va; } -#ifdef CONFIG_64BIT + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, + KBASE_REG_ZONE_CUSTOM_VA_BASE, + custom_va_size); +#if IS_ENABLED(CONFIG_64BIT) } else { custom_va_size = 0; } #endif + /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is + * initially U64_MAX + */ + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u); + /* Other zones are 0: kbase_create_context() uses vzalloc */ kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg); - kctx->same_va_end = same_va_pages + 1; - kctx->gpu_va_end = kctx->same_va_end + custom_va_size; - kctx->exec_va_start = U64_MAX; + kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size; kctx->jit_va = false; +#if MALI_USE_CSF + INIT_LIST_HEAD(&kctx->csf.event_pages_head); +#endif kbase_gpu_vm_unlock(kctx); return 0; @@ -784,44 +812,147 @@ fail_unlock: return err; } -#ifdef CONFIG_64BIT +static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx) +{ + struct kbase_reg_zone *exec_va_zone; + + lockdep_assert_held(&kctx->reg_lock); + exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + + return (exec_va_zone->base_pfn != U64_MAX); +} + +bool kbase_has_exec_va_zone(struct kbase_context *kctx) +{ + bool has_exec_va_zone; + + kbase_gpu_vm_lock(kctx); + has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx); + kbase_gpu_vm_unlock(kctx); + + return has_exec_va_zone; +} + +/** + * Determine if any allocations have been made on a context's region tracker + * @kctx: KBase context + * + * Check the context to determine if any allocations have been made yet from + * any of its zones. This check should be done before resizing a zone, e.g. to + * make space to add a second zone. + * + * Whilst a zone without allocations can be resized whilst other zones have + * allocations, we still check all of @kctx 's zones anyway: this is a stronger + * guarantee and should be adhered to when creating new zones anyway. + * + * Allocations from kbdev zones are not counted. + * + * Return: true if any allocs exist on any zone, false otherwise + */ +static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx) +{ + unsigned int zone_idx; + + lockdep_assert_held(&kctx->reg_lock); + + for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) { + struct kbase_reg_zone *zone; + struct kbase_va_region *reg; + u64 zone_base_addr; + unsigned long zone_bits = KBASE_REG_ZONE(zone_idx); + unsigned long reg_zone; + + zone = kbase_ctx_reg_zone_get(kctx, zone_bits); + zone_base_addr = zone->base_pfn << PAGE_SHIFT; + + reg = kbase_region_tracker_find_region_base_address( + kctx, zone_base_addr); + + if (!zone->va_size_pages) { + WARN(reg, + "Should not have found a region that starts at 0x%.16llx for zone 0x%lx", + (unsigned long long)zone_base_addr, zone_bits); + continue; + } + + if (WARN(!reg, + "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it", + (unsigned long long)zone_base_addr, zone_bits)) + return true; /* Safest return value */ + + reg_zone = reg->flags & KBASE_REG_ZONE_MASK; + if (WARN(reg_zone != zone_bits, + "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx", + (unsigned long long)zone_base_addr, zone_bits, + reg_zone)) + return true; /* Safest return value */ + + /* Unless the region is completely free, of the same size as + * the original zone, then it has allocs + */ + if ((!(reg->flags & KBASE_REG_FREE)) || + (reg->nr_pages != zone->va_size_pages)) + return true; + } + + /* All zones are the same size as originally made, so there are no + * allocs + */ + return false; +} + +#if IS_ENABLED(CONFIG_64BIT) static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, u64 jit_va_pages) { - struct kbase_va_region *same_va; + struct kbase_va_region *same_va_reg; + struct kbase_reg_zone *same_va_zone; + u64 same_va_zone_base_addr; + const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA; struct kbase_va_region *custom_va_reg; + u64 jit_va_start; lockdep_assert_held(&kctx->reg_lock); - /* First verify that a JIT_VA zone has not been created already. */ - if (kctx->jit_va) - return -EINVAL; - /* - * Modify the same VA free region after creation. Be careful to ensure - * that allocations haven't been made as they could cause an overlap - * to happen with existing same VA allocations and the custom VA zone. + * Modify the same VA free region after creation. The caller has + * ensured that allocations haven't been made, as any allocations could + * cause an overlap to happen with existing same VA allocations and the + * custom VA zone. */ - same_va = kbase_region_tracker_find_region_base_address(kctx, - PAGE_SIZE); - if (!same_va) + same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits); + same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT; + + same_va_reg = kbase_region_tracker_find_region_base_address( + kctx, same_va_zone_base_addr); + if (WARN(!same_va_reg, + "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", + (unsigned long long)same_va_zone_base_addr, same_va_zone_bits)) return -ENOMEM; - if (same_va->nr_pages < jit_va_pages || kctx->same_va_end < jit_va_pages) + /* kbase_region_tracker_has_allocs() in the caller has already ensured + * that all of the zones have no allocs, so no need to check that again + * on same_va_reg + */ + WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) || + same_va_reg->nr_pages != same_va_zone->va_size_pages); + + if (same_va_reg->nr_pages < jit_va_pages || + same_va_zone->va_size_pages < jit_va_pages) return -ENOMEM; /* It's safe to adjust the same VA zone now */ - same_va->nr_pages -= jit_va_pages; - kctx->same_va_end -= jit_va_pages; + same_va_reg->nr_pages -= jit_va_pages; + same_va_zone->va_size_pages -= jit_va_pages; + jit_va_start = kbase_reg_zone_end_pfn(same_va_zone); /* * Create a custom VA zone at the end of the VA for allocations which * JIT can use so it doesn't have to allocate VA from the kernel. */ - custom_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_custom, - kctx->same_va_end, - jit_va_pages, - KBASE_REG_ZONE_CUSTOM_VA); + custom_va_reg = + kbase_alloc_free_region(&kctx->reg_rbtree_custom, jit_va_start, + jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA); /* * The context will be destroyed if we fail here so no point @@ -829,6 +960,11 @@ static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx, */ if (!custom_va_reg) return -ENOMEM; + /* Since this is 64-bit, the custom zone will not have been + * initialized, so initialize it now + */ + kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start, + jit_va_pages); kbase_region_tracker_insert(custom_va_reg); return 0; @@ -847,16 +983,34 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) return -EINVAL; -#if MALI_JIT_PRESSURE_LIMIT if (phys_pages_limit > jit_va_pages) -#else - if (phys_pages_limit != jit_va_pages) -#endif /* MALI_JIT_PRESSURE_LIMIT */ return -EINVAL; +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (phys_pages_limit != jit_va_pages) + kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + kbase_gpu_vm_lock(kctx); -#ifdef CONFIG_64BIT + /* Verify that a JIT_VA zone has not been created already. */ + if (kctx->jit_va) { + err = -EINVAL; + goto exit_unlock; + } + + /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no + * allocs, we can ensure there are no allocs anywhere. + * + * This check is also useful in 32-bit, just to make sure init of the + * zone is always done before any allocs. + */ + if (kbase_region_tracker_has_allocs(kctx)) { + err = -ENOMEM; + goto exit_unlock; + } + +#if IS_ENABLED(CONFIG_64BIT) if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages); #endif @@ -870,13 +1024,14 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, kctx->trim_level = trim_level; kctx->jit_va = true; kctx->jit_group_id = group_id; -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE kctx->jit_phys_pages_limit = phys_pages_limit; dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n", phys_pages_limit); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ } +exit_unlock: kbase_gpu_vm_unlock(kctx); return err; @@ -884,24 +1039,33 @@ int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages, int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages) { - struct kbase_va_region *shrinking_va_reg; struct kbase_va_region *exec_va_reg; - u64 exec_va_start, exec_va_base_addr; + struct kbase_reg_zone *exec_va_zone; + struct kbase_reg_zone *target_zone; + struct kbase_va_region *target_reg; + u64 target_zone_base_addr; + unsigned long target_zone_bits; + u64 exec_va_start; int err; - /* The EXEC_VA zone shall be created by making space at the end of the - * address space. Firstly, verify that the number of EXEC_VA pages - * requested by the client is reasonable and then make sure that it is - * not greater than the address space itself before calculating the base - * address of the new zone. + /* The EXEC_VA zone shall be created by making space either: + * - for 64-bit clients, at the end of the process's address space + * - for 32-bit clients, in the CUSTOM zone + * + * Firstly, verify that the number of EXEC_VA pages requested by the + * client is reasonable and then make sure that it is not greater than + * the address space itself before calculating the base address of the + * new zone. */ if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES) return -EINVAL; kbase_gpu_vm_lock(kctx); - /* First verify that a JIT_VA zone has not been created already. */ - if (kctx->jit_va) { + /* Verify that we've not already created a EXEC_VA zone, and that the + * EXEC_VA zone must come before JIT's CUSTOM_VA. + */ + if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) { err = -EPERM; goto exit_unlock; } @@ -911,28 +1075,50 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages goto exit_unlock; } - exec_va_start = kctx->gpu_va_end - exec_va_pages; - exec_va_base_addr = exec_va_start << PAGE_SHIFT; - - shrinking_va_reg = kbase_region_tracker_find_region_enclosing_address(kctx, - exec_va_base_addr); - if (!shrinking_va_reg) { + /* Verify no allocations have already been made */ + if (kbase_region_tracker_has_allocs(kctx)) { err = -ENOMEM; goto exit_unlock; } - /* Make sure that the EXEC_VA region is still uninitialized */ - if ((shrinking_va_reg->flags & KBASE_REG_ZONE_MASK) == - KBASE_REG_ZONE_EXEC_VA) { - err = -EPERM; +#if IS_ENABLED(CONFIG_64BIT) + if (kbase_ctx_flag(kctx, KCTX_COMPAT)) { +#endif + /* 32-bit client: take from CUSTOM_VA zone */ + target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA; +#if IS_ENABLED(CONFIG_64BIT) + } else { + /* 64-bit client: take from SAME_VA zone */ + target_zone_bits = KBASE_REG_ZONE_SAME_VA; + } +#endif + target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits); + target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT; + + target_reg = kbase_region_tracker_find_region_base_address( + kctx, target_zone_base_addr); + if (WARN(!target_reg, + "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx", + (unsigned long long)target_zone_base_addr, target_zone_bits)) { + err = -ENOMEM; goto exit_unlock; } + /* kbase_region_tracker_has_allocs() above has already ensured that all + * of the zones have no allocs, so no need to check that again on + * target_reg + */ + WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) || + target_reg->nr_pages != target_zone->va_size_pages); - if (shrinking_va_reg->nr_pages <= exec_va_pages) { + if (target_reg->nr_pages <= exec_va_pages || + target_zone->va_size_pages <= exec_va_pages) { err = -ENOMEM; goto exit_unlock; } + /* Taken from the end of the target zone */ + exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages; + exec_va_reg = kbase_alloc_free_region(&kctx->reg_rbtree_exec, exec_va_start, exec_va_pages, @@ -941,13 +1127,17 @@ int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages err = -ENOMEM; goto exit_unlock; } + /* Update EXEC_VA zone + * + * not using kbase_ctx_reg_zone_init() - it was already initialized + */ + exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA); + exec_va_zone->base_pfn = exec_va_start; + exec_va_zone->va_size_pages = exec_va_pages; - shrinking_va_reg->nr_pages -= exec_va_pages; -#ifdef CONFIG_64BIT - if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) - kctx->same_va_end -= exec_va_pages; -#endif - kctx->exec_va_start = exec_va_start; + /* Update target zone and corresponding region */ + target_reg->nr_pages -= exec_va_pages; + target_zone->va_size_pages -= exec_va_pages; kbase_region_tracker_insert(exec_va_reg); err = 0; @@ -957,12 +1147,40 @@ exit_unlock: return err; } +#if MALI_USE_CSF +void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev) +{ + kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree); +} + +int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev) +{ + struct kbase_va_region *shared_reg; + u64 shared_reg_start_pfn; + u64 shared_reg_size; + + shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE; + shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE; + + kbdev->csf.shared_reg_rbtree = RB_ROOT; + + shared_reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, + shared_reg_start_pfn, + shared_reg_size, + KBASE_REG_ZONE_MCU_SHARED); + if (!shared_reg) + return -ENOMEM; + + kbase_region_tracker_insert(shared_reg); + return 0; +} +#endif int kbase_mem_init(struct kbase_device *kbdev) { int err = 0; struct kbasep_mem_device *memdev; -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) struct device_node *mgm_node = NULL; #endif @@ -976,6 +1194,12 @@ int kbase_mem_init(struct kbase_device *kbdev) /* Initialize memory usage */ atomic_set(&memdev->used_pages, 0); + spin_lock_init(&kbdev->gpu_mem_usage_lock); + kbdev->total_gpu_pages = 0; + kbdev->process_root = RB_ROOT; + kbdev->dma_buf_root = RB_ROOT; + mutex_init(&kbdev->dma_buf_lock); + #ifdef IR_THRESHOLD atomic_set(&memdev->ir_threshold, IR_THRESHOLD); #else @@ -984,7 +1208,7 @@ int kbase_mem_init(struct kbase_device *kbdev) kbdev->mgm_dev = &kbase_native_mgm_dev; -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF) /* Check to see whether or not a platform-specific memory group manager * is configured and available. */ @@ -1053,13 +1277,22 @@ void kbase_mem_term(struct kbase_device *kbdev) kbase_mem_pool_group_term(&kbdev->mem_pools); + WARN_ON(kbdev->total_gpu_pages); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root)); + WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root)); + mutex_destroy(&kbdev->dma_buf_lock); + if (kbdev->mgm_dev) module_put(kbdev->mgm_dev->owner); } KBASE_EXPORT_TEST_API(kbase_mem_term); /** - * @brief Allocate a free region object. + * Allocate a free region object. + * @rbtree: Backlink to the red-black tree of memory regions. + * @start_pfn: The Page Frame Number in GPU virtual address space. + * @nr_pages: The size of the region in pages. + * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA * * The allocated object is not part of any list yet, and is flagged as * KBASE_REG_FREE. No mapping is allocated yet. @@ -1132,7 +1365,8 @@ static struct kbase_context *kbase_reg_flags_to_kctx( } /** - * @brief Free a region object. + * Free a region object. + * @reg: Region * * The described region must be freed of any mapping. * @@ -1143,6 +1377,13 @@ static struct kbase_context *kbase_reg_flags_to_kctx( */ void kbase_free_alloced_region(struct kbase_va_region *reg) { +#if MALI_USE_CSF + if ((reg->flags & KBASE_REG_ZONE_MASK) == + KBASE_REG_ZONE_MCU_SHARED) { + kfree(reg); + return; + } +#endif if (!(reg->flags & KBASE_REG_FREE)) { struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg); @@ -1152,8 +1393,12 @@ void kbase_free_alloced_region(struct kbase_va_region *reg) if (WARN_ON(kbase_is_region_invalid(reg))) return; - dev_dbg(kctx->kbdev->dev, "Freeing memory region %p\n", + dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n", (void *)reg); +#if MALI_USE_CSF + if (reg->flags & KBASE_REG_CSF_EVENT) + kbase_unlink_event_mem_page(kctx, reg); +#endif mutex_lock(&kctx->jit_evict_lock); @@ -1233,8 +1478,8 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 else attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC); - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(reg != NULL); err = kbase_add_va_region(kctx, reg, addr, nr_pages, align); if (err) @@ -1260,7 +1505,9 @@ int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg, u64 if (err) goto bad_insert; - kbase_mem_phy_alloc_gpu_mapped(alloc->imported.alias.aliased[i].alloc); + /* Note: mapping count is tracked at alias + * creation time + */ } else { err = kbase_mmu_insert_single_page(kctx, reg->start_pfn + i * stride, @@ -1319,13 +1566,6 @@ bad_insert: reg->start_pfn, reg->nr_pages, kctx->as_nr); - if (alloc->type == KBASE_MEM_TYPE_ALIAS) { - KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased); - while (i--) - if (alloc->imported.alias.aliased[i].alloc) - kbase_mem_phy_alloc_gpu_unmapped(alloc->imported.alias.aliased[i].alloc); - } - kbase_remove_va_region(reg); return err; @@ -1339,7 +1579,6 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) { int err = 0; - size_t i; if (reg->start_pfn == 0) return 0; @@ -1364,10 +1603,9 @@ int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg) /* Update tracking, and other cleanup, depending on memory type. */ switch (reg->gpu_alloc->type) { case KBASE_MEM_TYPE_ALIAS: - KBASE_DEBUG_ASSERT(reg->gpu_alloc->imported.alias.aliased); - for (i = 0; i < reg->gpu_alloc->imported.alias.nents; i++) - if (reg->gpu_alloc->imported.alias.aliased[i].alloc) - kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc->imported.alias.aliased[i].alloc); + /* We mark the source allocs as unmapped from the GPU when + * putting reg's allocs + */ break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { struct kbase_alloc_import_user_buf *user_buf = @@ -1404,7 +1642,7 @@ static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping( unsigned long map_start; size_t map_size; - lockdep_assert_held(¤t->mm->mmap_sem); + lockdep_assert_held(kbase_mem_get_process_mmap_lock()); if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */ return NULL; @@ -1676,9 +1914,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re { int err; - KBASE_DEBUG_ASSERT(NULL != kctx); - KBASE_DEBUG_ASSERT(NULL != reg); - dev_dbg(kctx->kbdev->dev, "%s %p in kctx %p\n", + KBASE_DEBUG_ASSERT(kctx != NULL); + KBASE_DEBUG_ASSERT(reg != NULL); + dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n", __func__, (void *)reg, (void *)kctx); lockdep_assert_held(&kctx->reg_lock); @@ -1724,7 +1962,9 @@ int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *re KBASE_EXPORT_TEST_API(kbase_mem_free_region); /** - * @brief Free the region from the GPU and unregister it. + * Free the region from the GPU and unregister it. + * @kctx: KBase context + * @gpu_addr: GPU address to free * * This function implements the free operation on a memory segment. * It will loudly fail if called with outstanding mappings. @@ -1735,7 +1975,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) struct kbase_va_region *reg; KBASE_DEBUG_ASSERT(kctx != NULL); - dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %p\n", + dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n", __func__, gpu_addr, (void *)kctx); if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) { @@ -1743,7 +1983,7 @@ int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr) return -EINVAL; } - if (0 == gpu_addr) { + if (gpu_addr == 0) { dev_warn(kctx->kbdev->dev, "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using kbase_mem_free\n"); return -EINVAL; } @@ -1796,7 +2036,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_free); int kbase_update_region_flags(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned long flags) { - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(reg != NULL); KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0); reg->flags |= kbase_cache_enabled(flags, reg->nr_pages); @@ -1835,9 +2075,25 @@ int kbase_update_region_flags(struct kbase_context *kctx, reg->flags |= KBASE_REG_SHARE_IN; } +#if !MALI_USE_CSF if (flags & BASE_MEM_TILER_ALIGN_TOP) reg->flags |= KBASE_REG_TILER_ALIGN_TOP; +#endif /* !MALI_USE_CSF */ + +#if MALI_USE_CSF + if (flags & BASE_MEM_CSF_EVENT) { + reg->flags |= KBASE_REG_CSF_EVENT; + reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING; + if (!(reg->flags & KBASE_REG_SHARE_BOTH)) { + /* On non coherent platforms need to map as uncached on + * both sides. + */ + reg->flags &= ~KBASE_REG_CPU_CACHED; + reg->flags &= ~KBASE_REG_GPU_CACHED; + } + } +#endif /* Set up default MEMATTR usage */ if (!(reg->flags & KBASE_REG_GPU_CACHED)) { @@ -1851,6 +2107,13 @@ int kbase_update_region_flags(struct kbase_context *kctx, "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n"); return -EINVAL; } +#if MALI_USE_CSF + } else if (reg->flags & KBASE_REG_CSF_EVENT) { + WARN_ON(!(reg->flags & KBASE_REG_SHARE_BOTH)); + + reg->flags |= + KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED); +#endif } else if (kctx->kbdev->system_coherency == COHERENCY_ACE && (reg->flags & KBASE_REG_SHARE_BOTH)) { reg->flags |= @@ -1905,7 +2168,8 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, &kctx->kbdev->memdev.used_pages); /* Increase mm counters before we allocate pages so that this - * allocation is visible to the OOM killer */ + * allocation is visible to the OOM killer + */ kbase_process_page_usage_inc(kctx, nr_pages_requested); tp = alloc->pages + alloc->nents; @@ -2033,6 +2297,9 @@ no_new_partial: (u64)new_page_count); alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + done: return 0; @@ -2209,6 +2476,9 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked( (u64)new_page_count); alloc->nents += nr_pages_requested; + + kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested); + done: return new_pages; @@ -2303,7 +2573,7 @@ int kbase_free_phy_pages_helper( } /* early out if nothing to do */ - if (0 == nr_pages_to_free) + if (nr_pages_to_free == 0) return 0; start_free = alloc->pages + alloc->nents - nr_pages_to_free; @@ -2374,6 +2644,8 @@ int kbase_free_phy_pages_helper( kbdev, kctx->id, (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } return 0; @@ -2496,9 +2768,19 @@ void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc, kbdev, kctx->id, (u64)new_page_count); + + kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed); } } +KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked); +#if MALI_USE_CSF +/** + * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer. + * @alloc: The allocation for the imported user buffer. + */ +static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc); +#endif void kbase_mem_kref_free(struct kref *kref) { @@ -2540,8 +2822,10 @@ void kbase_mem_kref_free(struct kref *kref) aliased = alloc->imported.alias.aliased; if (aliased) { for (i = 0; i < alloc->imported.alias.nents; i++) - if (aliased[i].alloc) + if (aliased[i].alloc) { + kbase_mem_phy_alloc_gpu_unmapped(aliased[i].alloc); kbase_mem_phy_alloc_put(aliased[i].alloc); + } vfree(aliased); } break; @@ -2558,12 +2842,17 @@ void kbase_mem_kref_free(struct kref *kref) alloc->imported.umm.dma_attachment, alloc->imported.umm.sgt, DMA_BIDIRECTIONAL); + kbase_remove_dma_buf_usage(alloc->imported.umm.kctx, + alloc); } dma_buf_detach(alloc->imported.umm.dma_buf, alloc->imported.umm.dma_attachment); dma_buf_put(alloc->imported.umm.dma_buf); break; case KBASE_MEM_TYPE_IMPORTED_USER_BUF: +#if MALI_USE_CSF + kbase_jd_user_buf_unpin_pages(alloc); +#endif if (alloc->imported.user_buf.mm) mmdrop(alloc->imported.user_buf.mm); if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE) @@ -2587,7 +2876,7 @@ KBASE_EXPORT_TEST_API(kbase_mem_kref_free); int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size) { - KBASE_DEBUG_ASSERT(NULL != reg); + KBASE_DEBUG_ASSERT(reg != NULL); KBASE_DEBUG_ASSERT(vsize > 0); /* validate user provided arguments */ @@ -2600,7 +2889,7 @@ int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages))) goto out_term; - KBASE_DEBUG_ASSERT(0 != vsize); + KBASE_DEBUG_ASSERT(vsize != 0); if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0) goto out_term; @@ -2643,22 +2932,37 @@ bool kbase_check_alloc_flags(unsigned long flags) /* GPU executable memory cannot: * - Be written by the GPU * - Be grown on GPU page fault - * - Have the top of its initial commit aligned to 'extent' */ + */ + if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & + (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF))) + return false; + +#if !MALI_USE_CSF + /* GPU executable memory also cannot have the top of its initial + * commit aligned to 'extension' + */ if ((flags & BASE_MEM_PROT_GPU_EX) && (flags & - (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF | - BASE_MEM_TILER_ALIGN_TOP))) + BASE_MEM_TILER_ALIGN_TOP)) return false; +#endif /* !MALI_USE_CSF */ /* To have an allocation lie within a 4GB chunk is required only for - * TLS memory, which will never be used to contain executable code - * and also used for Tiler heap. + * TLS memory, which will never be used to contain executable code. */ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & - (BASE_MEM_PROT_GPU_EX | BASE_MEM_TILER_ALIGN_TOP))) + BASE_MEM_PROT_GPU_EX)) return false; +#if !MALI_USE_CSF + /* TLS memory should also not be used for tiler heap */ + if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags & + BASE_MEM_TILER_ALIGN_TOP)) + return false; +#endif /* !MALI_USE_CSF */ + /* GPU should have at least read or write access otherwise there is no - reason for allocating. */ + * reason for allocating. + */ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) return false; @@ -2666,14 +2970,15 @@ bool kbase_check_alloc_flags(unsigned long flags) if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED) return false; - /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported - * memory */ + /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory + */ if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) == BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) return false; /* Should not combine BASE_MEM_COHERENT_LOCAL with - * BASE_MEM_COHERENT_SYSTEM */ + * BASE_MEM_COHERENT_SYSTEM + */ if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) == (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) return false; @@ -2699,12 +3004,15 @@ bool kbase_check_import_flags(unsigned long flags) if (flags & BASE_MEM_GROW_ON_GPF) return false; +#if !MALI_USE_CSF /* Imported memory cannot be aligned to the end of its initial commit */ if (flags & BASE_MEM_TILER_ALIGN_TOP) return false; +#endif /* !MALI_USE_CSF */ /* GPU should have at least read or write access otherwise there is no - reason for importing. */ + * reason for importing. + */ if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0) return false; @@ -2716,19 +3024,19 @@ bool kbase_check_import_flags(unsigned long flags) } int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, - u64 va_pages, u64 commit_pages, u64 large_extent) + u64 va_pages, u64 commit_pages, u64 large_extension) { struct device *dev = kctx->kbdev->dev; int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size; u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT; struct kbase_va_region test_reg; - /* kbase_va_region's extent member can be of variable size, so check against that type */ - test_reg.extent = large_extent; + /* kbase_va_region's extension member can be of variable size, so check against that type */ + test_reg.extension = large_extension; #define KBASE_MSG_PRE "GPU allocation attempted with " - if (0 == va_pages) { + if (va_pages == 0) { dev_warn(dev, KBASE_MSG_PRE "0 va_pages!"); return -EINVAL; } @@ -2740,7 +3048,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, } /* Note: commit_pages is checked against va_pages during - * kbase_alloc_phy_pages() */ + * kbase_alloc_phy_pages() + */ /* Limit GPU executable allocs to GPU PC size */ if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) { @@ -2751,47 +3060,73 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, return -EINVAL; } - if ((flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && - test_reg.extent == 0) { - dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GROW_ON_GPF or BASE_MEM_TILER_ALIGN_TOP but extent == 0\n"); + if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) { + dev_warn(dev, KBASE_MSG_PRE + "BASE_MEM_GROW_ON_GPF but extension == 0\n"); + return -EINVAL; + } + +#if !MALI_USE_CSF + if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) { + dev_warn(dev, KBASE_MSG_PRE + "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n"); return -EINVAL; } if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) && - test_reg.extent != 0) { - dev_warn(dev, KBASE_MSG_PRE "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extent != 0\n"); + test_reg.extension != 0) { + dev_warn( + dev, KBASE_MSG_PRE + "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n"); + return -EINVAL; + } +#else + if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) { + dev_warn(dev, KBASE_MSG_PRE + "BASE_MEM_GROW_ON_GPF not set but extension != 0\n"); return -EINVAL; } +#endif /* !MALI_USE_CSF */ +#if !MALI_USE_CSF /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */ if (flags & BASE_MEM_TILER_ALIGN_TOP) { #define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and " - unsigned long small_extent; - - if (large_extent > BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%lld pages exceeds limit %lld", - (unsigned long long)large_extent, - BASE_MEM_TILER_ALIGN_TOP_EXTENT_MAX_PAGES); + unsigned long small_extension; + + if (large_extension > + BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) { + dev_warn(dev, + KBASE_MSG_PRE_FLAG + "extension==%lld pages exceeds limit %lld", + (unsigned long long)large_extension, + BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES); return -EINVAL; } /* For use with is_power_of_2, which takes unsigned long, so - * must ensure e.g. on 32-bit kernel it'll fit in that type */ - small_extent = (unsigned long)large_extent; + * must ensure e.g. on 32-bit kernel it'll fit in that type + */ + small_extension = (unsigned long)large_extension; - if (!is_power_of_2(small_extent)) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "extent==%ld not a non-zero power of 2", - small_extent); + if (!is_power_of_2(small_extension)) { + dev_warn(dev, + KBASE_MSG_PRE_FLAG + "extension==%ld not a non-zero power of 2", + small_extension); return -EINVAL; } - if (commit_pages > large_extent) { - dev_warn(dev, KBASE_MSG_PRE_FLAG "commit_pages==%ld exceeds extent==%ld", - (unsigned long)commit_pages, - (unsigned long)large_extent); + if (commit_pages > large_extension) { + dev_warn(dev, + KBASE_MSG_PRE_FLAG + "commit_pages==%ld exceeds extension==%ld", + (unsigned long)commit_pages, + (unsigned long)large_extension); return -EINVAL; } #undef KBASE_MSG_PRE_FLAG } +#endif /* !MALI_USE_CSF */ if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) { @@ -2805,7 +3140,8 @@ int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags, } /** - * @brief Acquire the per-context region list lock + * Acquire the per-context region list lock + * @kctx: KBase context */ void kbase_gpu_vm_lock(struct kbase_context *kctx) { @@ -2816,7 +3152,8 @@ void kbase_gpu_vm_lock(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock); /** - * @brief Release the per-context region list lock + * Release the per-context region list lock + * @kctx: KBase context */ void kbase_gpu_vm_unlock(struct kbase_context *kctx) { @@ -2826,7 +3163,7 @@ void kbase_gpu_vm_unlock(struct kbase_context *kctx) KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock); -#ifdef CONFIG_DEBUG_FS +#if IS_ENABLED(CONFIG_DEBUG_FS) struct kbase_jit_debugfs_data { int (*func)(struct kbase_jit_debugfs_data *); struct mutex lock; @@ -2879,7 +3216,7 @@ static ssize_t kbase_jit_debugfs_common_read(struct file *file, } size = scnprintf(data->buffer, sizeof(data->buffer), - "%llu,%llu,%llu", data->active_value, + "%llu,%llu,%llu\n", data->active_value, data->pool_value, data->destroy_value); } @@ -2983,19 +3320,23 @@ static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data) KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops, kbase_jit_debugfs_phys_get); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data) { struct kbase_context *kctx = data->kctx; struct kbase_va_region *reg; +#if !MALI_USE_CSF mutex_lock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ mutex_lock(&kctx->jit_evict_lock); list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { data->active_value += reg->used_pages; } mutex_unlock(&kctx->jit_evict_lock); +#if !MALI_USE_CSF mutex_unlock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ return 0; } @@ -3012,7 +3353,9 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) struct kbase_context *kctx = data->kctx; struct kbase_va_region *reg; +#if !MALI_USE_CSF mutex_lock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); list_for_each_entry(reg, &kctx->jit_active_head, jit_node) { @@ -3031,14 +3374,16 @@ static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data) } mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_unlock(kctx); +#if !MALI_USE_CSF mutex_unlock(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ return 0; } KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops, kbase_jit_debugfs_trim_get); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ void kbase_jit_debugfs_init(struct kbase_context *kctx) { @@ -3078,7 +3423,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) */ debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_phys_fops); -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* * Debugfs entry for getting the number of pages used * by JIT allocations for estimating the physical pressure @@ -3093,7 +3438,7 @@ void kbase_jit_debugfs_init(struct kbase_context *kctx) */ debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry, kctx, &kbase_jit_debugfs_trim_fops); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ } #endif /* CONFIG_DEBUG_FS */ @@ -3138,8 +3483,13 @@ int kbase_jit_init(struct kbase_context *kctx) INIT_LIST_HEAD(&kctx->jit_destroy_head); INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker); +#if MALI_USE_CSF + INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head); + INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues); +#else /* !MALI_USE_CSF */ INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head); INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc); +#endif /* MALI_USE_CSF */ mutex_unlock(&kctx->jit_evict_lock); kctx->jit_max_allocations = 0; @@ -3153,25 +3503,29 @@ int kbase_jit_init(struct kbase_context *kctx) * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets * the alignment requirements. */ -static bool meet_size_and_tiler_align_top_requirements(struct kbase_context *kctx, - struct kbase_va_region *walker, const struct base_jit_alloc_info *info) +static bool meet_size_and_tiler_align_top_requirements( + const struct kbase_va_region *walker, + const struct base_jit_alloc_info *info) { bool meet_reqs = true; if (walker->nr_pages != info->va_pages) meet_reqs = false; - else if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) { - size_t align = info->extent; + +#if !MALI_USE_CSF + if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) { + size_t align = info->extension; size_t align_mask = align - 1; if ((walker->start_pfn + info->commit_pages) & align_mask) meet_reqs = false; } +#endif /* !MALI_USE_CSF */ return meet_reqs; } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /* Function will guarantee *@freed will not exceed @pages_needed */ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, @@ -3185,7 +3539,9 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, size_t to_free = 0u; size_t max_allowed_pages = old_pages; +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ lockdep_assert_held(&kctx->reg_lock); /* Is this a JIT allocation that has been reported on? */ @@ -3213,20 +3569,20 @@ static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx, KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES); } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) { /* The GPU could report being ready to write to the next - * 'extent' sized chunk, but didn't actually write to it, so we - * can report up to 'extent' size pages more than the backed + * 'extension' sized chunk, but didn't actually write to it, so we + * can report up to 'extension' size pages more than the backed * size. * * Note, this is allowed to exceed reg->nr_pages. */ - max_allowed_pages += reg->extent; + max_allowed_pages += reg->extension; /* Also note that in these GPUs, the GPU may make a large (>1 * page) initial allocation but not actually write out to all * of it. Hence it might report that a much higher amount of * memory was used than actually was written to. This does not * result in a real warning because on growing this memory we - * round up the size of the allocation up to an 'extent' sized + * round up the size of the allocation up to an 'extension' sized * chunk, hence automatically bringing the backed size up to * the reported size. */ @@ -3308,8 +3664,12 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, struct kbase_va_region *reg, *tmp; size_t total_freed = 0; - kbase_gpu_vm_lock(kctx); - mutex_lock(&kctx->jit_evict_lock); +#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) { int err; size_t freed = 0u; @@ -3328,18 +3688,17 @@ static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx, if (!pages_needed) break; } - mutex_unlock(&kctx->jit_evict_lock); - kbase_gpu_vm_unlock(kctx); trace_mali_jit_trim(total_freed); return total_freed; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ static int kbase_jit_grow(struct kbase_context *kctx, - const struct base_jit_alloc_info *info, - struct kbase_va_region *reg) + const struct base_jit_alloc_info *info, + struct kbase_va_region *reg, + struct kbase_sub_alloc **prealloc_sas) { size_t delta; size_t pages_required; @@ -3347,15 +3706,13 @@ static int kbase_jit_grow(struct kbase_context *kctx, struct kbase_mem_pool *pool; int ret = -ENOMEM; struct tagged_addr *gpu_pages; - struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; - int i; if (info->commit_pages > reg->nr_pages) { /* Attempted to grow larger than maximum size */ return -EINVAL; } - kbase_gpu_vm_lock(kctx); + lockdep_assert_held(&kctx->reg_lock); /* Make the physical backing no longer reclaimable */ if (!kbase_mem_evictable_unmake(reg->gpu_alloc)) @@ -3372,14 +3729,6 @@ static int kbase_jit_grow(struct kbase_context *kctx, pages_required = delta; #ifdef CONFIG_MALI_2MB_ALLOC - /* Preallocate memory for the sub-allocation structs */ - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { - prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), - GFP_KERNEL); - if (!prealloc_sas[i]) - goto update_failed; - } - if (pages_required >= (SZ_2M / SZ_4K)) { pool = &kctx->mem_pools.large[kctx->jit_group_id]; /* Round up to number of 2 MB pages required */ @@ -3405,15 +3754,18 @@ static int kbase_jit_grow(struct kbase_context *kctx, */ while (kbase_mem_pool_size(pool) < pages_required) { int pool_delta = pages_required - kbase_mem_pool_size(pool); + int ret; kbase_mem_pool_unlock(pool); spin_unlock(&kctx->mem_partials_lock); + kbase_gpu_vm_unlock(kctx); + ret = kbase_mem_pool_grow(pool, pool_delta); + kbase_gpu_vm_lock(kctx); - if (kbase_mem_pool_grow(pool, pool_delta)) - goto update_failed_unlocked; + if (ret) + goto update_failed; - kbase_gpu_vm_lock(kctx); spin_lock(&kctx->mem_partials_lock); kbase_mem_pool_lock(pool); } @@ -3456,14 +3808,9 @@ done: /* Update attributes of JIT allocation taken from the pool */ reg->initial_commit = info->commit_pages; - reg->extent = info->extent; + reg->extension = info->extension; update_failed: - kbase_gpu_vm_unlock(kctx); -update_failed_unlocked: - for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) - kfree(prealloc_sas[i]); - return ret; } @@ -3492,9 +3839,9 @@ static void trace_jit_stats(struct kbase_context *kctx, max_allocations, alloc_count, va_pages, ph_pages); } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE /** - * get_jit_backed_pressure() - calculate the physical backing of all JIT + * get_jit_phys_backing() - calculate the physical backing of all JIT * allocations * * @kctx: Pointer to the kbase context whose active JIT allocations will be @@ -3502,83 +3849,50 @@ static void trace_jit_stats(struct kbase_context *kctx, * * Return: number of pages that are committed by JIT allocations */ -static size_t get_jit_backed_pressure(struct kbase_context *kctx) +static size_t get_jit_phys_backing(struct kbase_context *kctx) { - size_t backed_pressure = 0; - int jit_id; - - lockdep_assert_held(&kctx->jctx.lock); + struct kbase_va_region *walker; + size_t backing = 0; - kbase_gpu_vm_lock(kctx); - for (jit_id = 0; jit_id <= BASE_JIT_ALLOC_COUNT; jit_id++) { - struct kbase_va_region *reg = kctx->jit_alloc[jit_id]; + lockdep_assert_held(&kctx->jit_evict_lock); - if (reg && (reg != KBASE_RESERVED_REG_JIT_ALLOC)) { - /* If region has no report, be pessimistic */ - if (reg->used_pages == reg->nr_pages) { - backed_pressure += reg->nr_pages; - } else { - backed_pressure += - kbase_reg_current_backed_size(reg); - } - } + list_for_each_entry(walker, &kctx->jit_active_head, jit_node) { + backing += kbase_reg_current_backed_size(walker); } - kbase_gpu_vm_unlock(kctx); - return backed_pressure; + return backing; } -/** - * jit_trim_necessary_pages() - calculate and trim the least pages possible to - * satisfy a new JIT allocation - * - * @kctx: Pointer to the kbase context - * @info: Pointer to JIT allocation information for the new allocation - * - * Before allocating a new just-in-time memory region or reusing a previous - * one, ensure that the total JIT physical page usage also will not exceed the - * pressure limit. - * - * If there are no reported-on allocations, then we already guarantee this will - * be the case - because our current pressure then only comes from the va_pages - * of each JIT region, hence JIT physical page usage is guaranteed to be - * bounded by this. - * - * However as soon as JIT allocations become "reported on", the pressure is - * lowered to allow new JIT regions to be allocated. It is after such a point - * that the total JIT physical page usage could (either now or in the future on - * a grow-on-GPU-page-fault) exceed the pressure limit, but only on newly - * allocated JIT regions. Hence, trim any "reported on" regions. - * - * Any pages freed will go into the pool and be allocated from there in - * kbase_mem_alloc(). - */ -static void jit_trim_necessary_pages(struct kbase_context *kctx, - const struct base_jit_alloc_info *info) +void kbase_jit_trim_necessary_pages(struct kbase_context *kctx, + size_t needed_pages) { - size_t backed_pressure = 0; - size_t needed_pages = 0; + size_t jit_backing = 0; + size_t pages_to_trim = 0; - backed_pressure = get_jit_backed_pressure(kctx); +#if !MALI_USE_CSF + lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ + lockdep_assert_held(&kctx->reg_lock); + lockdep_assert_held(&kctx->jit_evict_lock); + + jit_backing = get_jit_phys_backing(kctx); /* It is possible that this is the case - if this is the first * allocation after "ignore_pressure_limit" allocation. */ - if (backed_pressure > kctx->jit_phys_pages_limit) { - needed_pages += - (backed_pressure - kctx->jit_phys_pages_limit) - + info->va_pages; + if (jit_backing > kctx->jit_phys_pages_limit) { + pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) + + needed_pages; } else { - size_t backed_diff = - kctx->jit_phys_pages_limit - backed_pressure; + size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing; - if (info->va_pages > backed_diff) - needed_pages += info->va_pages - backed_diff; + if (needed_pages > backed_diff) + pages_to_trim += needed_pages - backed_diff; } - if (needed_pages) { - size_t trimmed_pages = kbase_mem_jit_trim_pages(kctx, - needed_pages); + if (pages_to_trim) { + size_t trimmed_pages = + kbase_mem_jit_trim_pages(kctx, pages_to_trim); /* This should never happen - we already asserted that * we are not violating JIT pressure limit in earlier @@ -3586,10 +3900,10 @@ static void jit_trim_necessary_pages(struct kbase_context *kctx, * must have enough unused pages to satisfy the new * allocation */ - WARN_ON(trimmed_pages < needed_pages); + WARN_ON(trimmed_pages < pages_to_trim); } } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ /** * jit_allow_allocate() - check whether basic conditions are satisfied to allow @@ -3606,10 +3920,14 @@ static bool jit_allow_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, bool ignore_pressure_limit) { +#if MALI_USE_CSF + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); +#else lockdep_assert_held(&kctx->jctx.lock); +#endif -#if MALI_JIT_PRESSURE_LIMIT - if (likely(!ignore_pressure_limit) && +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit && ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) || (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) { dev_dbg(kctx->kbdev->dev, @@ -3618,7 +3936,7 @@ static bool jit_allow_allocate(struct kbase_context *kctx, kctx->jit_phys_pages_limit); return false; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ if (kctx->jit_current_allocations >= kctx->jit_max_allocations) { /* Too many current allocations */ @@ -3644,123 +3962,156 @@ static bool jit_allow_allocate(struct kbase_context *kctx, return true; } +static struct kbase_va_region * +find_reasonable_region(const struct base_jit_alloc_info *info, + struct list_head *pool_head, bool ignore_usage_id) +{ + struct kbase_va_region *closest_reg = NULL; + struct kbase_va_region *walker; + size_t current_diff = SIZE_MAX; + + list_for_each_entry(walker, pool_head, jit_node) { + if ((ignore_usage_id || + walker->jit_usage_id == info->usage_id) && + walker->jit_bin_id == info->bin_id && + meet_size_and_tiler_align_top_requirements(walker, info)) { + size_t min_size, max_size, diff; + + /* + * The JIT allocations VA requirements have been met, + * it's suitable but other allocations might be a + * better fit. + */ + min_size = min_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + max_size = max_t(size_t, walker->gpu_alloc->nents, + info->commit_pages); + diff = max_size - min_size; + + if (current_diff > diff) { + current_diff = diff; + closest_reg = walker; + } + + /* The allocation is an exact match */ + if (current_diff == 0) + break; + } + } + + return closest_reg; +} + struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, const struct base_jit_alloc_info *info, bool ignore_pressure_limit) { struct kbase_va_region *reg = NULL; + struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL }; + int i; +#if MALI_USE_CSF + lockdep_assert_held(&kctx->csf.kcpu_queues.lock); +#else lockdep_assert_held(&kctx->jctx.lock); +#endif if (!jit_allow_allocate(kctx, info, ignore_pressure_limit)) return NULL; -#if MALI_JIT_PRESSURE_LIMIT - if (!ignore_pressure_limit) - jit_trim_necessary_pages(kctx, info); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#ifdef CONFIG_MALI_2MB_ALLOC + /* Preallocate memory for the sub-allocation structs */ + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) { + prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL); + if (!prealloc_sas[i]) + goto end; + } +#endif + kbase_gpu_vm_lock(kctx); mutex_lock(&kctx->jit_evict_lock); /* * Scan the pool for an existing allocation which meets our * requirements and remove it. */ - if (info->usage_id != 0) { + if (info->usage_id != 0) /* First scan for an allocation with the same usage ID */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_usage_id == info->usage_id && - walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match */ - if (current_diff == 0) - break; - } - } - } + reg = find_reasonable_region(info, &kctx->jit_pool_head, false); - if (!reg) { + if (!reg) /* No allocation with the same usage ID, or usage IDs not in * use. Search for an allocation we can reuse. */ - struct kbase_va_region *walker; - size_t current_diff = SIZE_MAX; - - list_for_each_entry(walker, &kctx->jit_pool_head, jit_node) { - - if (walker->jit_bin_id == info->bin_id && - meet_size_and_tiler_align_top_requirements( - kctx, walker, info)) { - size_t min_size, max_size, diff; - - /* - * The JIT allocations VA requirements have been - * met, it's suitable but other allocations - * might be a better fit. - */ - min_size = min_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - max_size = max_t(size_t, - walker->gpu_alloc->nents, - info->commit_pages); - diff = max_size - min_size; - - if (current_diff > diff) { - current_diff = diff; - reg = walker; - } - - /* The allocation is an exact match, so stop - * looking. - */ - if (current_diff == 0) - break; - } - } - } + reg = find_reasonable_region(info, &kctx->jit_pool_head, true); if (reg) { +#if MALI_JIT_PRESSURE_LIMIT_BASE + size_t needed_pages = 0; +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + int ret; + /* * Remove the found region from the pool and add it to the * active list. */ list_move(®->jit_node, &kctx->jit_active_head); + WARN_ON(reg->gpu_alloc->evicted); + /* * Remove the allocation from the eviction list as it's no * longer eligible for eviction. This must be done before * dropping the jit_evict_lock */ list_del_init(®->gpu_alloc->evict_node); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + if (info->commit_pages > reg->gpu_alloc->nents) + needed_pages = info->commit_pages - + reg->gpu_alloc->nents; + + /* Update early the recycled JIT region's estimate of + * used_pages to ensure it doesn't get trimmed + * undesirably. This is needed as the recycled JIT + * region has been added to the active list but the + * number of used pages for it would be zero, so it + * could get trimmed instead of other allocations only + * to be regrown later resulting in a breach of the JIT + * physical pressure limit. + * Also that trimming would disturb the accounting of + * physical pages, i.e. the VM stats, as the number of + * backing pages would have changed when the call to + * kbase_mem_evictable_unmark_reclaim is made. + * + * The second call to update pressure at the end of + * this function would effectively be a nop. + */ + kbase_jit_report_update_pressure( + kctx, reg, info->va_pages, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + + kbase_jit_request_phys_increase_locked(kctx, + needed_pages); + } +#endif mutex_unlock(&kctx->jit_evict_lock); - if (kbase_jit_grow(kctx, info, reg) < 0) { + /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock', + * so any state protected by that lock might need to be + * re-evaluated if more code is added here in future. + */ + ret = kbase_jit_grow(kctx, info, reg, prealloc_sas); + +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) + kbase_jit_done_phys_increase(kctx, needed_pages); +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + kbase_gpu_vm_unlock(kctx); + + if (ret < 0) { /* * An update to an allocation from the pool failed, * chances are slim a new allocation would fair any @@ -3770,10 +4121,21 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); +#if MALI_JIT_PRESSURE_LIMIT_BASE + /* Undo the early change made to the recycled JIT + * region's estimate of used_pages. + */ + if (!ignore_pressure_limit) { + kbase_jit_report_update_pressure( + kctx, reg, 0, + KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ mutex_lock(&kctx->jit_evict_lock); list_move(®->jit_node, &kctx->jit_pool_head); mutex_unlock(&kctx->jit_evict_lock); - return NULL; + reg = NULL; + goto end; } } else { /* No suitable JIT allocation was found so create a new one */ @@ -3783,15 +4145,28 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, BASEP_MEM_NO_USER_FREE; u64 gpu_addr; - mutex_unlock(&kctx->jit_evict_lock); - +#if !MALI_USE_CSF if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP) flags |= BASE_MEM_TILER_ALIGN_TOP; +#endif /* !MALI_USE_CSF */ flags |= base_mem_group_id_set(kctx->jit_group_id); +#if MALI_JIT_PRESSURE_LIMIT_BASE + if (!ignore_pressure_limit) { + flags |= BASEP_MEM_PERFORM_JIT_TRIM; + /* The corresponding call to 'done_phys_increase' would + * be made inside the kbase_mem_alloc(). + */ + kbase_jit_request_phys_increase_locked( + kctx, info->commit_pages); + } +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + + mutex_unlock(&kctx->jit_evict_lock); + kbase_gpu_vm_unlock(kctx); reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, - info->extent, &flags, &gpu_addr); + info->extension, &flags, &gpu_addr); if (!reg) { /* Most likely not enough GPU virtual space left for * the new JIT allocation. @@ -3799,12 +4174,22 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, dev_dbg(kctx->kbdev->dev, "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n", info->va_pages, info->commit_pages); - return NULL; + goto end; } - mutex_lock(&kctx->jit_evict_lock); - list_add(®->jit_node, &kctx->jit_active_head); - mutex_unlock(&kctx->jit_evict_lock); + if (!ignore_pressure_limit) { + /* Due to enforcing of pressure limit, kbase_mem_alloc + * was instructed to perform the trimming which in turn + * would have ensured that the new JIT allocation is + * already in the jit_active_head list, so nothing to + * do here. + */ + WARN_ON(list_empty(®->jit_node)); + } else { + mutex_lock(&kctx->jit_evict_lock); + list_add(®->jit_node, &kctx->jit_active_head); + mutex_unlock(&kctx->jit_evict_lock); + } } trace_mali_jit_alloc(reg, info->id); @@ -3816,13 +4201,18 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx, reg->jit_usage_id = info->usage_id; reg->jit_bin_id = info->bin_id; -#if MALI_JIT_PRESSURE_LIMIT + reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC; +#if MALI_JIT_PRESSURE_LIMIT_BASE if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE) reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE; reg->heap_info_gpu_addr = info->heap_info_gpu_addr; kbase_jit_report_update_pressure(kctx, reg, info->va_pages, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ + +end: + for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) + kfree(prealloc_sas[i]); return reg; } @@ -3844,15 +4234,18 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) div_u64(old_pages * (100 - kctx->trim_level), 100)); u64 delta = old_pages - new_size; - if (delta) + if (delta) { + mutex_lock(&kctx->reg_lock); kbase_mem_shrink(kctx, reg, old_pages - delta); + mutex_unlock(&kctx->reg_lock); + } } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE reg->heap_info_gpu_addr = 0; kbase_jit_report_update_pressure(kctx, reg, 0, KBASE_JIT_REPORT_ON_ALLOC_OR_FREE); -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ kctx->jit_current_allocations--; kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--; @@ -3863,6 +4256,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) kbase_gpu_vm_lock(kctx); reg->flags |= KBASE_REG_DONT_NEED; + reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC; kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents); kbase_gpu_vm_unlock(kctx); @@ -3875,6 +4269,7 @@ void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg) /* This allocation can't already be on a list. */ WARN_ON(!list_empty(®->gpu_alloc->evict_node)); list_add(®->gpu_alloc->evict_node, &kctx->evict_list); + atomic_add(reg->gpu_alloc->nents, &kctx->evict_nents); list_move(®->jit_node, &kctx->jit_pool_head); @@ -3962,6 +4357,9 @@ void kbase_jit_term(struct kbase_context *kctx) kbase_mem_free_region(kctx, walker); mutex_lock(&kctx->jit_evict_lock); } +#if MALI_JIT_PRESSURE_LIMIT_BASE + WARN_ON(kctx->jit_phys_pages_to_be_allocated); +#endif mutex_unlock(&kctx->jit_evict_lock); kbase_gpu_vm_unlock(kctx); @@ -3972,7 +4370,7 @@ void kbase_jit_term(struct kbase_context *kctx) cancel_work_sync(&kctx->jit_work); } -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, struct kbase_va_region *reg, unsigned int flags) { @@ -4015,16 +4413,18 @@ void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx, out: return; } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -#if MALI_JIT_PRESSURE_LIMIT +#if MALI_JIT_PRESSURE_LIMIT_BASE void kbase_jit_report_update_pressure(struct kbase_context *kctx, struct kbase_va_region *reg, u64 new_used_pages, unsigned int flags) { u64 diff; +#if !MALI_USE_CSF lockdep_assert_held(&kctx->jctx.lock); +#endif /* !MALI_USE_CSF */ trace_mali_jit_report_pressure(reg, new_used_pages, kctx->jit_current_phys_pressure + new_used_pages - @@ -4053,19 +4453,22 @@ void kbase_jit_report_update_pressure(struct kbase_context *kctx, } } -#endif /* MALI_JIT_PRESSURE_LIMIT */ +#endif /* MALI_JIT_PRESSURE_LIMIT_BASE */ -bool kbase_has_exec_va_zone(struct kbase_context *kctx) +#if MALI_USE_CSF +static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc) { - bool has_exec_va_zone; + if (alloc->nents) { + struct page **pages = alloc->imported.user_buf.pages; + long i; - kbase_gpu_vm_lock(kctx); - has_exec_va_zone = (kctx->exec_va_start != U64_MAX); - kbase_gpu_vm_unlock(kctx); + WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages); - return has_exec_va_zone; + for (i = 0; i < alloc->nents; i++) + put_page(pages[i]); + } } - +#endif int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, struct kbase_va_region *reg) @@ -4090,7 +4493,7 @@ int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx, if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm)) return -EINVAL; -#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) +#if KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages(NULL, mm, address, alloc->imported.user_buf.nr_pages, @@ -4102,24 +4505,30 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); #endif -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0) +#elif KERNEL_VERSION(4, 9, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR, 0, pages, NULL); -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 10, 0) +#elif KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL); -#else +#elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages, reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, pages, NULL, NULL); +#else + pinned_pages = get_user_pages_remote(mm, + address, + alloc->imported.user_buf.nr_pages, + reg->flags & KBASE_REG_GPU_WR ? FOLL_WRITE : 0, + pages, NULL, NULL); #endif if (pinned_pages <= 0) @@ -4232,12 +4641,16 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, DMA_BIDIRECTIONAL); if (writeable) set_page_dirty_lock(pages[i]); +#if !MALI_USE_CSF put_page(pages[i]); pages[i] = NULL; +#endif size -= local_size; } +#if !MALI_USE_CSF alloc->nents = 0; +#endif } int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages, @@ -4296,7 +4709,8 @@ struct kbase_mem_phy_alloc *kbase_map_external_resource( goto exit; reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++; - if (1 == reg->gpu_alloc->imported.user_buf.current_mapping_usage_count) { + if (reg->gpu_alloc->imported.user_buf + .current_mapping_usage_count == 1) { err = kbase_jd_user_buf_map(kctx, reg); if (err) { reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--; @@ -4331,7 +4745,7 @@ void kbase_unmap_external_resource(struct kbase_context *kctx, case KBASE_MEM_TYPE_IMPORTED_USER_BUF: { alloc->imported.user_buf.current_mapping_usage_count--; - if (0 == alloc->imported.user_buf.current_mapping_usage_count) { + if (alloc->imported.user_buf.current_mapping_usage_count == 0) { bool writeable = true; if (!kbase_is_region_invalid_or_free(reg) && |