summaryrefslogtreecommitdiff
path: root/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c
diff options
context:
space:
mode:
Diffstat (limited to 'dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c')
-rw-r--r--dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c3069
1 files changed, 3069 insertions, 0 deletions
diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c
new file mode 100644
index 0000000..d49e343
--- /dev/null
+++ b/dvalin/kernel/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c
@@ -0,0 +1,3069 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ *
+ * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#include <mali_kbase.h>
+#include <gpu/mali_kbase_gpu_fault.h>
+#include <mali_kbase_reset_gpu.h>
+#include "mali_kbase_csf.h"
+#include "backend/gpu/mali_kbase_pm_internal.h"
+#include <linux/export.h>
+#include <linux/priority_control_manager.h>
+#include <linux/shmem_fs.h>
+#include <uapi/gpu/arm/midgard/csf/mali_gpu_csf_registers.h>
+#include "mali_kbase_csf_tiler_heap.h"
+#include <mmu/mali_kbase_mmu.h>
+#include "mali_kbase_csf_timeout.h"
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+
+#define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
+#define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
+#define POWER_DOWN_LATEST_FLUSH_VALUE ((u32)1)
+
+/**
+ * struct kbase_csf_event - CSF event callback.
+ *
+ * This structure belongs to the list of events which is part of a Kbase
+ * context, and describes a callback function with a custom parameter to pass
+ * to it when a CSF event is signalled.
+ *
+ * @link: Link to the rest of the list.
+ * @kctx: Pointer to the Kbase context this event belongs to.
+ * @callback: Callback function to call when a CSF event is signalled.
+ * @param: Parameter to pass to the callback function.
+ */
+struct kbase_csf_event {
+ struct list_head link;
+ struct kbase_context *kctx;
+ kbase_csf_event_callback *callback;
+ void *param;
+};
+
+const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
+ KBASE_QUEUE_GROUP_PRIORITY_HIGH,
+ KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+ KBASE_QUEUE_GROUP_PRIORITY_LOW,
+ KBASE_QUEUE_GROUP_PRIORITY_REALTIME
+};
+const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = {
+ BASE_QUEUE_GROUP_PRIORITY_REALTIME,
+ BASE_QUEUE_GROUP_PRIORITY_HIGH,
+ BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
+ BASE_QUEUE_GROUP_PRIORITY_LOW
+};
+
+static void put_user_pages_mmap_handle(struct kbase_context *kctx,
+ struct kbase_queue *queue)
+{
+ unsigned long cookie_nr;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (queue->handle == BASEP_MEM_INVALID_HANDLE)
+ return;
+
+ cookie_nr =
+ PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
+
+ if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) {
+ /* free up cookie */
+ kctx->csf.user_pages_info[cookie_nr] = NULL;
+ bitmap_set(kctx->csf.cookies, cookie_nr, 1);
+ }
+
+ queue->handle = BASEP_MEM_INVALID_HANDLE;
+}
+
+/* Reserve a cookie, to be returned as a handle to userspace for creating
+ * the CPU mapping of the pair of input/output pages and Hw doorbell page.
+ * Will return 0 in case of success otherwise negative on failure.
+ */
+static int get_user_pages_mmap_handle(struct kbase_context *kctx,
+ struct kbase_queue *queue)
+{
+ unsigned long cookie, cookie_nr;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (bitmap_empty(kctx->csf.cookies,
+ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) {
+ dev_err(kctx->kbdev->dev,
+ "No csf cookies available for allocation!");
+ return -ENOMEM;
+ }
+
+ /* allocate a cookie */
+ cookie_nr = find_first_bit(kctx->csf.cookies,
+ KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
+ if (kctx->csf.user_pages_info[cookie_nr]) {
+ dev_err(kctx->kbdev->dev,
+ "Inconsistent state of csf cookies!");
+ return -EINVAL;
+ }
+ kctx->csf.user_pages_info[cookie_nr] = queue;
+ bitmap_clear(kctx->csf.cookies, cookie_nr, 1);
+
+ /* relocate to correct base */
+ cookie = cookie_nr + PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
+ cookie <<= PAGE_SHIFT;
+
+ queue->handle = (u64)cookie;
+
+ return 0;
+}
+
+static void gpu_munmap_user_io_pages(struct kbase_context *kctx,
+ struct kbase_va_region *reg)
+{
+ size_t num_pages = 2;
+
+ kbase_mmu_teardown_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
+ reg->start_pfn, num_pages, MCU_AS_NR);
+
+ WARN_ON(reg->flags & KBASE_REG_FREE);
+
+ mutex_lock(&kctx->kbdev->csf.reg_lock);
+ kbase_remove_va_region(reg);
+ mutex_unlock(&kctx->kbdev->csf.reg_lock);
+}
+
+static void init_user_io_pages(struct kbase_queue *queue)
+{
+ u32 *input_addr = (u32 *)(queue->user_io_addr);
+ u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
+
+ input_addr[CS_INSERT_LO/4] = 0;
+ input_addr[CS_INSERT_HI/4] = 0;
+
+ input_addr[CS_EXTRACT_INIT_LO/4] = 0;
+ input_addr[CS_EXTRACT_INIT_HI/4] = 0;
+
+ output_addr[CS_EXTRACT_LO/4] = 0;
+ output_addr[CS_EXTRACT_HI/4] = 0;
+
+ output_addr[CS_ACTIVE/4] = 0;
+}
+
+/* Map the input/output pages in the shared interface segment of MCU firmware
+ * address space.
+ */
+static int gpu_mmap_user_io_pages(struct kbase_device *kbdev,
+ struct tagged_addr *phys, struct kbase_va_region *reg)
+{
+ unsigned long mem_flags = KBASE_REG_GPU_RD;
+ const size_t num_pages = 2;
+ int ret;
+
+#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
+ ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
+ (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
+ mem_flags |=
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+#else
+ if (kbdev->system_coherency == COHERENCY_NONE) {
+ mem_flags |=
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
+ } else {
+ mem_flags |= KBASE_REG_SHARE_BOTH |
+ KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
+ }
+#endif
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ ret = kbase_add_va_region_rbtree(kbdev, reg, 0, num_pages, 1);
+ reg->flags &= ~KBASE_REG_FREE;
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ if (ret)
+ return ret;
+
+ /* Map input page */
+ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
+ reg->start_pfn, &phys[0],
+ 1, mem_flags, MCU_AS_NR,
+ KBASE_MEM_GROUP_CSF_IO);
+ if (ret)
+ goto bad_insert;
+
+ /* Map output page, it needs rw access */
+ mem_flags |= KBASE_REG_GPU_WR;
+ ret = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
+ reg->start_pfn + 1, &phys[1],
+ 1, mem_flags, MCU_AS_NR,
+ KBASE_MEM_GROUP_CSF_IO);
+ if (ret)
+ goto bad_insert_output_page;
+
+ return 0;
+
+bad_insert_output_page:
+ kbase_mmu_teardown_pages(kbdev, &kbdev->csf.mcu_mmu,
+ reg->start_pfn, 1, MCU_AS_NR);
+bad_insert:
+ mutex_lock(&kbdev->csf.reg_lock);
+ kbase_remove_va_region(reg);
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ return ret;
+}
+
+static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
+ struct kbase_queue *queue)
+{
+ const size_t num_pages = 2;
+
+ kbase_gpu_vm_lock(kctx);
+
+ vunmap(queue->user_io_addr);
+
+ WARN_ON(num_pages > atomic_read(&kctx->permanent_mapped_pages));
+ atomic_sub(num_pages, &kctx->permanent_mapped_pages);
+
+ kbase_gpu_vm_unlock(kctx);
+}
+
+static int kernel_map_user_io_pages(struct kbase_context *kctx,
+ struct kbase_queue *queue)
+{
+ struct page *page_list[2];
+ pgprot_t cpu_map_prot;
+ int ret = 0;
+ size_t i;
+
+ kbase_gpu_vm_lock(kctx);
+
+ if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
+ atomic_read(&kctx->permanent_mapped_pages))) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ /* The pages are mapped to Userspace also, so use the same mapping
+ * attributes as used inside the CPU page fault handler.
+ */
+#if ((KERNEL_VERSION(4, 4, 147) >= LINUX_VERSION_CODE) || \
+ ((KERNEL_VERSION(4, 6, 0) > LINUX_VERSION_CODE) && \
+ (KERNEL_VERSION(4, 5, 0) <= LINUX_VERSION_CODE)))
+ cpu_map_prot = pgprot_device(PAGE_KERNEL);
+#else
+ if (kctx->kbdev->system_coherency == COHERENCY_NONE)
+ cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
+ else
+ cpu_map_prot = PAGE_KERNEL;
+#endif
+
+ for (i = 0; i < ARRAY_SIZE(page_list); i++)
+ page_list[i] = as_page(queue->phys[i]);
+
+ queue->user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
+
+ if (!queue->user_io_addr)
+ ret = -ENOMEM;
+ else
+ atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
+
+unlock:
+ kbase_gpu_vm_unlock(kctx);
+ return ret;
+}
+
+static void term_queue_group(struct kbase_queue_group *group);
+static void get_queue(struct kbase_queue *queue);
+static void release_queue(struct kbase_queue *queue);
+
+/**
+ * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
+ * for a queue at the time of bind.
+ *
+ * @kctx: Address of the kbase context within which the queue was created.
+ * @queue: Pointer to the queue to be unlinked.
+ *
+ * This function will free the pair of physical pages allocated for a GPU
+ * command queue, and also release the hardware doorbell page, that were mapped
+ * into the process address space to enable direct submission of commands to
+ * the hardware. Also releases the reference taken on the queue when the mapping
+ * was created.
+ *
+ * This function will be called only when the mapping is being removed and
+ * so the resources for queue will not get freed up until the mapping is
+ * removed even though userspace could have terminated the queue.
+ * Kernel will ensure that the termination of Kbase context would only be
+ * triggered after the mapping is removed.
+ *
+ * If an explicit or implicit unbind was missed by the userspace then the
+ * mapping will persist. On process exit kernel itself will remove the mapping.
+ */
+static void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx,
+ struct kbase_queue *queue)
+{
+ const size_t num_pages = 2;
+
+ gpu_munmap_user_io_pages(kctx, queue->reg);
+ kernel_unmap_user_io_pages(kctx, queue);
+
+ kbase_mem_pool_free_pages(
+ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ num_pages, queue->phys, true, false);
+
+ kfree(queue->reg);
+ queue->reg = NULL;
+
+ /* If the queue has already been terminated by userspace
+ * then the ref count for queue object will drop to 0 here.
+ */
+ release_queue(queue);
+}
+
+int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
+ struct kbase_queue *queue)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_va_region *reg;
+ const size_t num_pages = 2;
+ int ret;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
+ num_pages, KBASE_REG_ZONE_MCU_SHARED);
+ if (!reg)
+ return -ENOMEM;
+
+ ret = kbase_mem_pool_alloc_pages(
+ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ num_pages, queue->phys, false);
+
+ if (ret != num_pages)
+ goto phys_alloc_failed;
+
+ ret = kernel_map_user_io_pages(kctx, queue);
+ if (ret)
+ goto kernel_map_failed;
+
+ init_user_io_pages(queue);
+
+ ret = gpu_mmap_user_io_pages(kctx->kbdev, queue->phys, reg);
+ if (ret)
+ goto gpu_mmap_failed;
+
+ queue->reg = reg;
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ if (kbdev->csf.db_file_offsets >
+ (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
+ kbdev->csf.db_file_offsets = 0;
+
+ queue->db_file_offset = kbdev->csf.db_file_offsets;
+ kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
+
+ WARN(atomic_read(&queue->refcount) != 1, "Incorrect refcounting for queue object\n");
+ /* This is the second reference taken on the queue object and
+ * would be dropped only when the IO mapping is removed either
+ * explicitly by userspace or implicitly by kernel on process exit.
+ */
+ get_queue(queue);
+ queue->bind_state = KBASE_CSF_QUEUE_BOUND;
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ return 0;
+
+gpu_mmap_failed:
+ kernel_unmap_user_io_pages(kctx, queue);
+
+kernel_map_failed:
+ kbase_mem_pool_free_pages(
+ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
+ num_pages, queue->phys, false, false);
+
+phys_alloc_failed:
+ kfree(reg);
+
+ return -ENOMEM;
+}
+
+static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
+ u8 group_handle)
+{
+ uint index = group_handle;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (index < MAX_QUEUE_GROUP_NUM && kctx->csf.queue_groups[index]) {
+ if (WARN_ON(kctx->csf.queue_groups[index]->handle != index))
+ return NULL;
+ return kctx->csf.queue_groups[index];
+ }
+
+ return NULL;
+}
+
+int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
+ u8 group_handle)
+{
+ struct kbase_queue_group *group;
+
+ mutex_lock(&kctx->csf.lock);
+ group = find_queue_group(kctx, group_handle);
+ mutex_unlock(&kctx->csf.lock);
+
+ return group ? 0 : -EINVAL;
+}
+
+static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
+{
+ struct kbase_queue *queue;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ list_for_each_entry(queue, &kctx->csf.queue_list, link) {
+ if (base_addr == queue->base_addr)
+ return queue;
+ }
+
+ return NULL;
+}
+
+static void get_queue(struct kbase_queue *queue)
+{
+ WARN_ON(!atomic_inc_not_zero(&queue->refcount));
+}
+
+static void release_queue(struct kbase_queue *queue)
+{
+ lockdep_assert_held(&queue->kctx->csf.lock);
+
+ WARN_ON(atomic_read(&queue->refcount) <= 0);
+
+ if (atomic_dec_and_test(&queue->refcount)) {
+ /* The queue can't still be on the per context list. */
+ WARN_ON(!list_empty(&queue->link));
+ WARN_ON(queue->group);
+ kfree(queue);
+ }
+}
+
+static void oom_event_worker(struct work_struct *data);
+static void fatal_event_worker(struct work_struct *data);
+
+/* Between reg and reg_ex, one and only one must be null */
+static int csf_queue_register_internal(struct kbase_context *kctx,
+ struct kbase_ioctl_cs_queue_register *reg,
+ struct kbase_ioctl_cs_queue_register_ex *reg_ex)
+{
+ struct kbase_queue *queue;
+ int ret = 0;
+ struct kbase_va_region *region;
+ u64 queue_addr;
+ size_t queue_size;
+
+ /* Only one pointer expected, otherwise coding error */
+ if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) {
+ dev_err(kctx->kbdev->dev,
+ "Error, one and only one param-ptr expected!");
+ return -EINVAL;
+ }
+
+ /* struct kbase_ioctl_cs_queue_register_ex contains a full
+ * struct kbase_ioctl_cs_queue_register at the start address. So
+ * the pointer can be safely cast to pointing to a
+ * kbase_ioctl_cs_queue_register object.
+ */
+ if (reg_ex)
+ reg = (struct kbase_ioctl_cs_queue_register *)reg_ex;
+
+ /* Validate the queue priority */
+ if (reg->priority > BASE_QUEUE_MAX_PRIORITY)
+ return -EINVAL;
+
+ queue_addr = reg->buffer_gpu_addr;
+ queue_size = reg->buffer_size >> PAGE_SHIFT;
+
+ mutex_lock(&kctx->csf.lock);
+
+ /* Check if queue is already registered */
+ if (find_queue(kctx, queue_addr) != NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Check if the queue address is valid */
+ kbase_gpu_vm_lock(kctx);
+ region = kbase_region_tracker_find_region_enclosing_address(kctx,
+ queue_addr);
+
+ if (kbase_is_region_invalid_or_free(region)) {
+ ret = -ENOENT;
+ goto out_unlock_vm;
+ }
+
+ if (queue_size > (region->nr_pages -
+ ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) {
+ ret = -EINVAL;
+ goto out_unlock_vm;
+ }
+
+ /* Check address validity on cs_trace buffer etc. Don't care
+ * if not enabled (i.e. when size is 0).
+ */
+ if (reg_ex && reg_ex->ex_buffer_size) {
+ int buf_pages = (reg_ex->ex_buffer_size +
+ (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
+
+ region = kbase_region_tracker_find_region_enclosing_address(
+ kctx, reg_ex->ex_buffer_base);
+ if (kbase_is_region_invalid_or_free(region)) {
+ ret = -ENOENT;
+ goto out_unlock_vm;
+ }
+
+ if (buf_pages > (region->nr_pages -
+ ((reg_ex->ex_buffer_base >> PAGE_SHIFT) -
+ region->start_pfn))) {
+ ret = -EINVAL;
+ goto out_unlock_vm;
+ }
+
+ region = kbase_region_tracker_find_region_enclosing_address(
+ kctx, reg_ex->ex_offset_var_addr);
+ if (kbase_is_region_invalid_or_free(region)) {
+ ret = -ENOENT;
+ goto out_unlock_vm;
+ }
+ }
+
+ queue = kzalloc(sizeof(struct kbase_queue), GFP_KERNEL);
+
+ if (!queue) {
+ ret = -ENOMEM;
+ goto out_unlock_vm;
+ }
+
+ queue->kctx = kctx;
+ queue->base_addr = queue_addr;
+ queue->queue_reg = region;
+ queue->size = (queue_size << PAGE_SHIFT);
+ queue->csi_index = KBASEP_IF_NR_INVALID;
+ queue->enabled = false;
+
+ queue->priority = reg->priority;
+ atomic_set(&queue->refcount, 1);
+
+ queue->group = NULL;
+ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
+ queue->handle = BASEP_MEM_INVALID_HANDLE;
+ queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
+
+ queue->status_wait = 0;
+ queue->sync_ptr = 0;
+ queue->sync_value = 0;
+
+ queue->sb_status = 0;
+ queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
+
+ INIT_LIST_HEAD(&queue->link);
+ INIT_LIST_HEAD(&queue->error.link);
+ INIT_WORK(&queue->oom_event_work, oom_event_worker);
+ INIT_WORK(&queue->fatal_event_work, fatal_event_worker);
+ list_add(&queue->link, &kctx->csf.queue_list);
+
+ region->flags |= KBASE_REG_NO_USER_FREE;
+
+ /* Initialize the cs_trace configuration parameters, When buffer_size
+ * is 0, trace is disabled. Here we only update the fields when
+ * enabled, otherwise leave them as default zeros.
+ */
+ if (reg_ex && reg_ex->ex_buffer_size) {
+ u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET(
+ 0, reg_ex->ex_event_size);
+ cfg = CS_INSTR_CONFIG_EVENT_STATE_SET(
+ cfg, reg_ex->ex_event_state);
+
+ queue->trace_cfg = cfg;
+ queue->trace_buffer_size = reg_ex->ex_buffer_size;
+ queue->trace_buffer_base = reg_ex->ex_buffer_base;
+ queue->trace_offset_ptr = reg_ex->ex_offset_var_addr;
+ }
+
+out_unlock_vm:
+ kbase_gpu_vm_unlock(kctx);
+out:
+ mutex_unlock(&kctx->csf.lock);
+
+ return ret;
+}
+
+int kbase_csf_queue_register(struct kbase_context *kctx,
+ struct kbase_ioctl_cs_queue_register *reg)
+{
+ return csf_queue_register_internal(kctx, reg, NULL);
+}
+
+int kbase_csf_queue_register_ex(struct kbase_context *kctx,
+ struct kbase_ioctl_cs_queue_register_ex *reg)
+{
+ struct kbase_csf_global_iface const *const iface =
+ &kctx->kbdev->csf.global_iface;
+ u32 const glb_version = iface->version;
+ u32 instr = iface->instr_features;
+ u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr);
+ u32 min_buf_size = (1u << reg->ex_event_size) *
+ GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr);
+
+ /* If cs_trace_command not supported, the call fails */
+ if (glb_version < kbase_csf_interface_version(1, 1, 0))
+ return -EINVAL;
+
+ /* Validate the cs_trace configuration parameters */
+ if (reg->ex_buffer_size &&
+ ((reg->ex_event_size > max_size) ||
+ (reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
+ (reg->ex_buffer_size < min_buf_size)))
+ return -EINVAL;
+
+ return csf_queue_register_internal(kctx, NULL, reg);
+}
+
+static void unbind_queue(struct kbase_context *kctx,
+ struct kbase_queue *queue);
+
+void kbase_csf_queue_terminate(struct kbase_context *kctx,
+ struct kbase_ioctl_cs_queue_terminate *term)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_queue *queue;
+ int err;
+ bool reset_prevented = false;
+
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ if (err)
+ dev_warn(
+ kbdev->dev,
+ "Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless",
+ term->buffer_gpu_addr);
+ else
+ reset_prevented = true;
+
+ mutex_lock(&kctx->csf.lock);
+ queue = find_queue(kctx, term->buffer_gpu_addr);
+
+ if (queue) {
+ unsigned long flags;
+
+ /* As the GPU queue has been terminated by the
+ * user space, undo the actions that were performed when the
+ * queue was registered i.e. remove the queue from the per
+ * context list & release the initial reference. The subsequent
+ * lookups for the queue in find_queue() would fail.
+ */
+ list_del_init(&queue->link);
+
+ /* Stop the CSI to which queue was bound */
+ unbind_queue(kctx, queue);
+
+ kbase_gpu_vm_lock(kctx);
+ if (!WARN_ON(!queue->queue_reg)) {
+ /* After this the Userspace would be able to free the
+ * memory for GPU queue. In case the Userspace missed
+ * terminating the queue, the cleanup will happen on
+ * context termination where teardown of region tracker
+ * would free up the GPU queue memory.
+ */
+ queue->queue_reg->flags &= ~KBASE_REG_NO_USER_FREE;
+ }
+ kbase_gpu_vm_unlock(kctx);
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+ dev_dbg(kctx->kbdev->dev,
+ "Remove any pending command queue fatal from context %pK\n",
+ (void *)kctx);
+ list_del_init(&queue->error.link);
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+
+ release_queue(queue);
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+ if (reset_prevented)
+ kbase_reset_gpu_allow(kbdev);
+}
+
+int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind)
+{
+ struct kbase_queue *queue;
+ struct kbase_queue_group *group;
+ u8 max_streams;
+ int ret = -EINVAL;
+
+ mutex_lock(&kctx->csf.lock);
+
+ group = find_queue_group(kctx, bind->in.group_handle);
+ queue = find_queue(kctx, bind->in.buffer_gpu_addr);
+
+ if (!group || !queue)
+ goto out;
+
+ /* For the time being, all CSGs have the same number of CSs
+ * so we check CSG 0 for this number
+ */
+ max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num;
+
+ if (bind->in.csi_index >= max_streams)
+ goto out;
+
+ if (group->run_state == KBASE_CSF_GROUP_TERMINATED)
+ goto out;
+
+ if (queue->group || group->bound_queues[bind->in.csi_index])
+ goto out;
+
+ ret = get_user_pages_mmap_handle(kctx, queue);
+ if (ret)
+ goto out;
+
+ bind->out.mmap_handle = queue->handle;
+ group->bound_queues[bind->in.csi_index] = queue;
+ queue->group = group;
+ queue->csi_index = bind->in.csi_index;
+ queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS;
+
+out:
+ mutex_unlock(&kctx->csf.lock);
+
+ return ret;
+}
+
+static struct kbase_queue_group *get_bound_queue_group(
+ struct kbase_queue *queue)
+{
+ struct kbase_context *kctx = queue->kctx;
+ struct kbase_queue_group *group;
+
+ if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND)
+ return NULL;
+
+ if (!queue->group)
+ return NULL;
+
+ if (queue->csi_index == KBASEP_IF_NR_INVALID) {
+ dev_warn(kctx->kbdev->dev, "CS interface index is incorrect\n");
+ return NULL;
+ }
+
+ group = queue->group;
+
+ if (group->bound_queues[queue->csi_index] != queue) {
+ dev_warn(kctx->kbdev->dev, "Incorrect mapping between queues & queue groups\n");
+ return NULL;
+ }
+
+ return group;
+}
+
+void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
+{
+ if (WARN_ON(slot < 0))
+ return;
+
+ kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
+}
+
+void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
+ u32 slot_bitmap)
+{
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+ const u32 allowed_bitmap =
+ (u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
+ u32 value;
+
+ if (WARN_ON(slot_bitmap > allowed_bitmap))
+ return;
+
+ value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
+ value ^= slot_bitmap;
+ kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value,
+ slot_bitmap);
+
+ kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
+}
+
+void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
+ struct kbase_queue *queue)
+{
+ mutex_lock(&kbdev->csf.reg_lock);
+
+ if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID)
+ kbase_csf_ring_doorbell(kbdev, queue->doorbell_nr);
+
+ mutex_unlock(&kbdev->csf.reg_lock);
+}
+
+void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
+ int csi_index, int csg_nr,
+ bool ring_csg_doorbell)
+{
+ struct kbase_csf_cmd_stream_group_info *ginfo;
+ u32 value;
+
+ if (WARN_ON(csg_nr < 0) ||
+ WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
+ return;
+
+ ginfo = &kbdev->csf.global_iface.groups[csg_nr];
+
+ if (WARN_ON(csi_index < 0) ||
+ WARN_ON(csi_index >= ginfo->stream_num))
+ return;
+
+ value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
+ value ^= (1 << csi_index);
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value,
+ 1 << csi_index);
+
+ if (likely(ring_csg_doorbell))
+ kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
+}
+
+int kbase_csf_queue_kick(struct kbase_context *kctx,
+ struct kbase_ioctl_cs_queue_kick *kick)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_queue_group *group;
+ struct kbase_queue *queue;
+ int err = 0;
+
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ if (err) {
+ dev_warn(
+ kbdev->dev,
+ "Unsuccessful GPU reset detected when kicking queue (buffer_addr=0x%.16llx)",
+ kick->buffer_gpu_addr);
+ return err;
+ }
+
+ mutex_lock(&kctx->csf.lock);
+ queue = find_queue(kctx, kick->buffer_gpu_addr);
+ if (!queue)
+ err = -EINVAL;
+
+ if (!err) {
+ group = get_bound_queue_group(queue);
+ if (!group) {
+ dev_err(kctx->kbdev->dev, "queue not bound\n");
+ err = -EINVAL;
+ }
+ }
+
+ if (!err)
+ err = kbase_csf_scheduler_queue_start(queue);
+ mutex_unlock(&kctx->csf.lock);
+ kbase_reset_gpu_allow(kbdev);
+
+ return err;
+}
+
+static void unbind_stopped_queue(struct kbase_context *kctx,
+ struct kbase_queue *queue)
+{
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
+ unsigned long flags;
+
+ kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
+ bitmap_clear(queue->group->protm_pending_bitmap,
+ queue->csi_index, 1);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, PROTM_PENDING_CLEAR,
+ queue->group, queue, queue->group->protm_pending_bitmap[0]);
+ queue->group->bound_queues[queue->csi_index] = NULL;
+ queue->group = NULL;
+ kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
+
+ put_user_pages_mmap_handle(kctx, queue);
+ queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
+ }
+}
+/**
+ * unbind_queue() - Remove the linkage between a GPU command queue and the group
+ * to which it was bound or being bound.
+ *
+ * @kctx: Address of the kbase context within which the queue was created.
+ * @queue: Pointer to the queue to be unlinked.
+ *
+ * This function will also send the stop request to firmware for the CS
+ * if the group to which the GPU command queue was bound is scheduled.
+ *
+ * This function would be called when :-
+ * - queue is being unbound. This would happen when the IO mapping
+ * created on bind is removed explicitly by userspace or the process
+ * is getting exited.
+ * - queue group is being terminated which still has queues bound
+ * to it. This could happen on an explicit terminate request from userspace
+ * or when the kbase context is being terminated.
+ * - queue is being terminated without completing the bind operation.
+ * This could happen if either the queue group is terminated
+ * after the CS_QUEUE_BIND ioctl but before the 2nd part of bind operation
+ * to create the IO mapping is initiated.
+ * - There is a failure in executing the 2nd part of bind operation, inside the
+ * mmap handler, which creates the IO mapping for queue.
+ */
+
+static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
+{
+ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
+ if (queue->bind_state == KBASE_CSF_QUEUE_BOUND)
+ kbase_csf_scheduler_queue_stop(queue);
+
+ unbind_stopped_queue(kctx, queue);
+ }
+}
+
+void kbase_csf_queue_unbind(struct kbase_queue *queue)
+{
+ struct kbase_context *kctx = queue->kctx;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ /* As the process itself is exiting, the termination of queue group can
+ * be done which would be much faster than stopping of individual
+ * queues. This would ensure a faster exit for the process especially
+ * in the case where CSI gets stuck.
+ * The CSI STOP request will wait for the in flight work to drain
+ * whereas CSG TERM request would result in an immediate abort or
+ * cancellation of the pending work.
+ */
+ if (current->flags & PF_EXITING) {
+ struct kbase_queue_group *group = get_bound_queue_group(queue);
+
+ if (group)
+ term_queue_group(group);
+
+ WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND);
+ } else {
+ unbind_queue(kctx, queue);
+ }
+
+ /* Free the resources, if allocated for this queue. */
+ if (queue->reg)
+ kbase_csf_free_command_stream_user_pages(kctx, queue);
+}
+
+void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue)
+{
+ struct kbase_context *kctx = queue->kctx;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
+ unbind_stopped_queue(kctx, queue);
+
+ /* Free the resources, if allocated for this queue. */
+ if (queue->reg)
+ kbase_csf_free_command_stream_user_pages(kctx, queue);
+}
+
+/**
+ * find_free_group_handle() - Find a free handle for a queue group
+ *
+ * @kctx: Address of the kbase context within which the queue group
+ * is to be created.
+ *
+ * Return: a queue group handle on success, or a negative error code on failure.
+ */
+static int find_free_group_handle(struct kbase_context *const kctx)
+{
+ /* find the available index in the array of CSGs per this context */
+ int idx, group_handle = -ENOMEM;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ for (idx = 0;
+ (idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0);
+ idx++) {
+ if (!kctx->csf.queue_groups[idx])
+ group_handle = idx;
+ }
+
+ return group_handle;
+}
+
+/**
+ * iface_has_enough_streams() - Check that at least one CSG supports
+ * a given number of CS
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @cs_min: Minimum number of CSs required.
+ *
+ * Return: true if at least one CSG supports the given number
+ * of CSs (or more); otherwise false.
+ */
+static bool iface_has_enough_streams(struct kbase_device *const kbdev,
+ u32 const cs_min)
+{
+ bool has_enough = false;
+ struct kbase_csf_cmd_stream_group_info *const groups =
+ kbdev->csf.global_iface.groups;
+ const u32 group_num = kbdev->csf.global_iface.group_num;
+ u32 i;
+
+ for (i = 0; (i < group_num) && !has_enough; i++) {
+ if (groups[i].stream_num >= cs_min)
+ has_enough = true;
+ }
+
+ return has_enough;
+}
+
+/**
+ * create_normal_suspend_buffer() - Create normal-mode suspend buffer per
+ * queue group
+ *
+ * @kctx: Pointer to kbase context where the queue group is created at
+ * @s_buf: Pointer to suspend buffer that is attached to queue group
+ *
+ * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
+ * MMU page table. Otherwise -ENOMEM.
+ */
+static int create_normal_suspend_buffer(struct kbase_context *const kctx,
+ struct kbase_normal_suspend_buffer *s_buf)
+{
+ struct kbase_va_region *reg = NULL;
+ const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
+ const size_t nr_pages =
+ PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
+ int err = 0;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ /* Allocate and initialize Region Object */
+ reg = kbase_alloc_free_region(&kctx->kbdev->csf.shared_reg_rbtree, 0,
+ nr_pages, KBASE_REG_ZONE_MCU_SHARED);
+
+ if (!reg)
+ return -ENOMEM;
+
+ s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
+
+ if (!s_buf->phy) {
+ err = -ENOMEM;
+ goto phy_alloc_failed;
+ }
+
+ /* Get physical page for a normal suspend buffer */
+ err = kbase_mem_pool_alloc_pages(
+ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+ nr_pages, &s_buf->phy[0], false);
+
+ if (err < 0)
+ goto phy_pages_alloc_failed;
+
+ /* Insert Region Object into rbtree and make virtual address available
+ * to map it to physical page
+ */
+ mutex_lock(&kctx->kbdev->csf.reg_lock);
+ err = kbase_add_va_region_rbtree(kctx->kbdev, reg, 0, nr_pages, 1);
+ reg->flags &= ~KBASE_REG_FREE;
+ mutex_unlock(&kctx->kbdev->csf.reg_lock);
+
+ if (err)
+ goto add_va_region_failed;
+
+ /* Update MMU table */
+ err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
+ reg->start_pfn, &s_buf->phy[0],
+ nr_pages, mem_flags,
+ MCU_AS_NR, KBASE_MEM_GROUP_CSF_FW);
+ if (err)
+ goto mmu_insert_failed;
+
+ s_buf->reg = reg;
+
+ return 0;
+
+mmu_insert_failed:
+ mutex_lock(&kctx->kbdev->csf.reg_lock);
+ WARN_ON(kbase_remove_va_region(reg));
+ mutex_unlock(&kctx->kbdev->csf.reg_lock);
+
+add_va_region_failed:
+ kbase_mem_pool_free_pages(
+ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
+ &s_buf->phy[0], false, false);
+
+phy_pages_alloc_failed:
+ kfree(s_buf->phy);
+phy_alloc_failed:
+ kfree(reg);
+
+ return err;
+}
+
+/**
+ * create_protected_suspend_buffer() - Create protected-mode suspend buffer
+ * per queue group
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @s_buf: Pointer to suspend buffer that is attached to queue group
+ *
+ * Return: 0 if suspend buffer is successfully allocated and reflected to GPU
+ * MMU page table. Otherwise -ENOMEM.
+ */
+static int create_protected_suspend_buffer(struct kbase_device *const kbdev,
+ struct kbase_protected_suspend_buffer *s_buf)
+{
+ struct kbase_va_region *reg = NULL;
+ struct tagged_addr *phys = NULL;
+ const unsigned long mem_flags = KBASE_REG_GPU_RD | KBASE_REG_GPU_WR;
+ const size_t nr_pages =
+ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+ int err = 0;
+
+ /* Allocate and initialize Region Object */
+ reg = kbase_alloc_free_region(&kbdev->csf.shared_reg_rbtree, 0,
+ nr_pages, KBASE_REG_ZONE_MCU_SHARED);
+
+ if (!reg)
+ return -ENOMEM;
+
+ phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
+ if (!phys) {
+ err = -ENOMEM;
+ goto phy_alloc_failed;
+ }
+
+ s_buf->pma = kbase_csf_protected_memory_alloc(kbdev, phys,
+ nr_pages);
+ if (s_buf->pma == NULL) {
+ err = -ENOMEM;
+ goto pma_alloc_failed;
+ }
+
+ /* Insert Region Object into rbtree and make virtual address available
+ * to map it to physical page
+ */
+ mutex_lock(&kbdev->csf.reg_lock);
+ err = kbase_add_va_region_rbtree(kbdev, reg, 0, nr_pages, 1);
+ reg->flags &= ~KBASE_REG_FREE;
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ if (err)
+ goto add_va_region_failed;
+
+ /* Update MMU table */
+ err = kbase_mmu_insert_pages(kbdev, &kbdev->csf.mcu_mmu,
+ reg->start_pfn, phys,
+ nr_pages, mem_flags, MCU_AS_NR,
+ KBASE_MEM_GROUP_CSF_FW);
+ if (err)
+ goto mmu_insert_failed;
+
+ s_buf->reg = reg;
+ kfree(phys);
+ return 0;
+
+mmu_insert_failed:
+ mutex_lock(&kbdev->csf.reg_lock);
+ WARN_ON(kbase_remove_va_region(reg));
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+add_va_region_failed:
+ kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
+pma_alloc_failed:
+ kfree(phys);
+phy_alloc_failed:
+ kfree(reg);
+
+ return err;
+}
+
+static void timer_event_worker(struct work_struct *data);
+static void protm_event_worker(struct work_struct *data);
+static void term_normal_suspend_buffer(struct kbase_context *const kctx,
+ struct kbase_normal_suspend_buffer *s_buf);
+
+/**
+ * create_suspend_buffers - Setup normal and protected mode
+ * suspend buffers.
+ *
+ * @kctx: Address of the kbase context within which the queue group
+ * is to be created.
+ * @group: Pointer to GPU command queue group data.
+ *
+ * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM.
+ */
+static int create_suspend_buffers(struct kbase_context *const kctx,
+ struct kbase_queue_group * const group)
+{
+ int err = 0;
+
+ if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
+ dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
+ return -ENOMEM;
+ }
+
+ if (kctx->kbdev->csf.pma_dev) {
+ err = create_protected_suspend_buffer(kctx->kbdev,
+ &group->protected_suspend_buf);
+ if (err) {
+ term_normal_suspend_buffer(kctx,
+ &group->normal_suspend_buf);
+ dev_err(kctx->kbdev->dev, "Failed to create protected suspend buffer\n");
+ }
+ } else {
+ group->protected_suspend_buf.reg = NULL;
+ }
+
+ return err;
+}
+
+/**
+ * generate_group_uid() - Makes an ID unique to all kernel base devices
+ * and contexts, for a queue group and CSG.
+ *
+ * Return: A unique ID in the form of an unsigned 32-bit integer
+ */
+static u32 generate_group_uid(void)
+{
+ /* use first KBase device to store max UID */
+ struct kbase_device *kbdev = kbase_find_device(-1);
+ u32 uid = 1;
+
+ if (kbdev)
+ uid = (u32) atomic_inc_return(&kbdev->group_max_uid_in_devices);
+ else
+ WARN(1, "NULL kbase device pointer in group UID generation");
+
+ return uid;
+}
+
+/**
+ * create_queue_group() - Create a queue group
+ *
+ * @kctx: Address of the kbase context within which the queue group
+ * is to be created.
+ * @create: Address of a structure which contains details of the
+ * queue group which is to be created.
+ *
+ * Return: a queue group handle on success, or a negative error code on failure.
+ */
+static int create_queue_group(struct kbase_context *const kctx,
+ union kbase_ioctl_cs_queue_group_create *const create)
+{
+ int group_handle = find_free_group_handle(kctx);
+
+ if (group_handle < 0) {
+ dev_err(kctx->kbdev->dev,
+ "All queue group handles are already in use\n");
+ } else {
+ struct kbase_queue_group * const group =
+ kmalloc(sizeof(struct kbase_queue_group),
+ GFP_KERNEL);
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ if (!group) {
+ dev_err(kctx->kbdev->dev, "Failed to allocate a queue\n");
+ group_handle = -ENOMEM;
+ } else {
+ int err = 0;
+
+ group->kctx = kctx;
+ group->handle = group_handle;
+ group->csg_nr = KBASEP_CSG_NR_INVALID;
+
+ group->tiler_mask = create->in.tiler_mask;
+ group->fragment_mask = create->in.fragment_mask;
+ group->compute_mask = create->in.compute_mask;
+
+ group->tiler_max = create->in.tiler_max;
+ group->fragment_max = create->in.fragment_max;
+ group->compute_max = create->in.compute_max;
+ group->priority = kbase_csf_priority_queue_group_priority_to_relative(
+ kbase_csf_priority_check(kctx->kbdev, create->in.priority));
+ group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
+ group->faulted = false;
+
+ group->group_uid = generate_group_uid();
+ create->out.group_uid = group->group_uid;
+
+ INIT_LIST_HEAD(&group->link);
+ INIT_LIST_HEAD(&group->link_to_schedule);
+ INIT_LIST_HEAD(&group->error_fatal.link);
+ INIT_LIST_HEAD(&group->error_timeout.link);
+ INIT_LIST_HEAD(&group->error_tiler_oom.link);
+ INIT_WORK(&group->timer_event_work, timer_event_worker);
+ INIT_WORK(&group->protm_event_work, protm_event_worker);
+ bitmap_zero(group->protm_pending_bitmap,
+ MAX_SUPPORTED_STREAMS_PER_GROUP);
+
+ group->run_state = KBASE_CSF_GROUP_INACTIVE;
+ err = create_suspend_buffers(kctx, group);
+
+ if (err < 0) {
+ kfree(group);
+ group_handle = err;
+ } else {
+ int j;
+
+ kctx->csf.queue_groups[group_handle] = group;
+ for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP;
+ j++)
+ group->bound_queues[j] = NULL;
+ }
+ }
+ }
+
+ return group_handle;
+}
+
+int kbase_csf_queue_group_create(struct kbase_context *const kctx,
+ union kbase_ioctl_cs_queue_group_create *const create)
+{
+ int err = 0;
+ const u32 tiler_count = hweight64(create->in.tiler_mask);
+ const u32 fragment_count = hweight64(create->in.fragment_mask);
+ const u32 compute_count = hweight64(create->in.compute_mask);
+
+ mutex_lock(&kctx->csf.lock);
+
+ if ((create->in.tiler_max > tiler_count) ||
+ (create->in.fragment_max > fragment_count) ||
+ (create->in.compute_max > compute_count)) {
+ dev_err(kctx->kbdev->dev,
+ "Invalid maximum number of endpoints for a queue group\n");
+ err = -EINVAL;
+ } else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) {
+ dev_err(kctx->kbdev->dev, "Invalid queue group priority %u\n",
+ (unsigned int)create->in.priority);
+ err = -EINVAL;
+ } else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
+ dev_err(kctx->kbdev->dev,
+ "No CSG has at least %d CSs\n",
+ create->in.cs_min);
+ err = -EINVAL;
+ } else {
+ /* For the CSG which satisfies the condition for having
+ * the needed number of CSs, check whether it also conforms
+ * with the requirements for at least one of its CSs having
+ * the iterator of the needed type
+ * (note: for CSF v1.0 all CSs in a CSG will have access to
+ * the same iterators)
+ */
+ const int group_handle = create_queue_group(kctx, create);
+
+ if (group_handle >= 0)
+ create->out.group_handle = group_handle;
+ else
+ err = group_handle;
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+
+ return err;
+}
+
+/**
+ * term_normal_suspend_buffer() - Free normal-mode suspend buffer of queue group
+ *
+ * @kctx: Pointer to kbase context where queue group belongs to
+ * @s_buf: Pointer to queue group suspend buffer to be freed
+ */
+static void term_normal_suspend_buffer(struct kbase_context *const kctx,
+ struct kbase_normal_suspend_buffer *s_buf)
+{
+ const size_t nr_pages =
+ PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ WARN_ON(kbase_mmu_teardown_pages(
+ kctx->kbdev, &kctx->kbdev->csf.mcu_mmu,
+ s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
+
+ WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
+
+ mutex_lock(&kctx->kbdev->csf.reg_lock);
+ WARN_ON(kbase_remove_va_region(s_buf->reg));
+ mutex_unlock(&kctx->kbdev->csf.reg_lock);
+
+ kbase_mem_pool_free_pages(
+ &kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+ nr_pages, &s_buf->phy[0], false, false);
+
+ kfree(s_buf->phy);
+ s_buf->phy = NULL;
+ kfree(s_buf->reg);
+ s_buf->reg = NULL;
+}
+
+/**
+ * term_protected_suspend_buffer() - Free normal-mode suspend buffer of
+ * queue group
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @s_buf: Pointer to queue group suspend buffer to be freed
+ */
+static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
+ struct kbase_protected_suspend_buffer *s_buf)
+{
+ const size_t nr_pages =
+ PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
+
+ WARN_ON(kbase_mmu_teardown_pages(
+ kbdev, &kbdev->csf.mcu_mmu,
+ s_buf->reg->start_pfn, nr_pages, MCU_AS_NR));
+
+ WARN_ON(s_buf->reg->flags & KBASE_REG_FREE);
+
+ mutex_lock(&kbdev->csf.reg_lock);
+ WARN_ON(kbase_remove_va_region(s_buf->reg));
+ mutex_unlock(&kbdev->csf.reg_lock);
+
+ kbase_csf_protected_memory_free(kbdev, s_buf->pma, nr_pages);
+ s_buf->pma = NULL;
+ kfree(s_buf->reg);
+ s_buf->reg = NULL;
+}
+
+void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+
+ /* Currently each group supports the same number of CS */
+ u32 max_streams =
+ kctx->kbdev->csf.global_iface.groups[0].stream_num;
+ u32 i;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE &&
+ group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
+
+ for (i = 0; i < max_streams; i++) {
+ struct kbase_queue *queue =
+ group->bound_queues[i];
+
+ /* The group is already being evicted from the scheduler */
+ if (queue)
+ unbind_stopped_queue(kctx, queue);
+ }
+
+ term_normal_suspend_buffer(kctx, &group->normal_suspend_buf);
+ if (kctx->kbdev->csf.pma_dev)
+ term_protected_suspend_buffer(kctx->kbdev,
+ &group->protected_suspend_buf);
+
+ group->run_state = KBASE_CSF_GROUP_TERMINATED;
+}
+
+/**
+ * term_queue_group - Terminate a GPU command queue group.
+ *
+ * @group: Pointer to GPU command queue group data.
+ *
+ * Terminates a GPU command queue group. From the userspace perspective the
+ * group will still exist but it can't bind new queues to it. Userspace can
+ * still add work in queues bound to the group but it won't be executed. (This
+ * is because the IO mapping created upon binding such queues is still intact.)
+ */
+static void term_queue_group(struct kbase_queue_group *group)
+{
+ struct kbase_context *kctx = group->kctx;
+
+ kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
+ lockdep_assert_held(&kctx->csf.lock);
+
+ /* Stop the group and evict it from the scheduler */
+ kbase_csf_scheduler_group_deschedule(group);
+
+ if (group->run_state == KBASE_CSF_GROUP_TERMINATED)
+ return;
+
+ dev_dbg(kctx->kbdev->dev, "group %d terminating", group->handle);
+
+ kbase_csf_term_descheduled_queue_group(group);
+}
+
+static void cancel_queue_group_events(struct kbase_queue_group *group)
+{
+ cancel_work_sync(&group->timer_event_work);
+ cancel_work_sync(&group->protm_event_work);
+}
+
+void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
+ u8 group_handle)
+{
+ struct kbase_queue_group *group;
+ int err;
+ bool reset_prevented = false;
+ struct kbase_device *const kbdev = kctx->kbdev;
+
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ if (err)
+ dev_warn(
+ kbdev->dev,
+ "Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless",
+ group_handle);
+ else
+ reset_prevented = true;
+
+ mutex_lock(&kctx->csf.lock);
+
+ group = find_queue_group(kctx, group_handle);
+
+ if (group) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+ dev_dbg(kbdev->dev,
+ "Remove any pending group fatal error from context %pK\n",
+ (void *)group->kctx);
+
+ list_del_init(&group->error_tiler_oom.link);
+ list_del_init(&group->error_timeout.link);
+ list_del_init(&group->error_fatal.link);
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+
+ term_queue_group(group);
+ kctx->csf.queue_groups[group_handle] = NULL;
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+ if (reset_prevented)
+ kbase_reset_gpu_allow(kbdev);
+
+ if (!group)
+ return;
+
+ /* Cancel any pending event callbacks. If one is in progress
+ * then this thread waits synchronously for it to complete (which
+ * is why we must unlock the context first). We already ensured
+ * that no more callbacks can be enqueued by terminating the group.
+ */
+ cancel_queue_group_events(group);
+ kfree(group);
+}
+
+int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
+ struct kbase_suspend_copy_buffer *sus_buf,
+ u8 group_handle)
+{
+ struct kbase_device *const kbdev = kctx->kbdev;
+ int err;
+ struct kbase_queue_group *group;
+
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ if (err) {
+ dev_warn(
+ kbdev->dev,
+ "Unsuccessful GPU reset detected when suspending group %d",
+ group_handle);
+ return err;
+ }
+ mutex_lock(&kctx->csf.lock);
+
+ group = find_queue_group(kctx, group_handle);
+ if (group)
+ err = kbase_csf_scheduler_group_copy_suspend_buf(group,
+ sus_buf);
+ else
+ err = -EINVAL;
+
+ mutex_unlock(&kctx->csf.lock);
+ kbase_reset_gpu_allow(kbdev);
+
+ return err;
+}
+
+/**
+ * add_error() - Add an error to the list of errors to report to user space
+ *
+ * @kctx: Address of a base context associated with a GPU address space.
+ * @error: Address of the item to be added to the context's pending error list.
+ * @data: Error data to be returned to userspace.
+ *
+ * Does not wake up the event queue blocking a user thread in kbase_poll. This
+ * is to make it more efficient to add multiple errors.
+ *
+ * The added error must not already be on the context's list of errors waiting
+ * to be reported (e.g. because a previous error concerning the same object has
+ * not yet been reported).
+ */
+static void add_error(struct kbase_context *const kctx,
+ struct kbase_csf_notification *const error,
+ struct base_csf_notification const *const data)
+{
+ unsigned long flags;
+
+ if (WARN_ON(!kctx))
+ return;
+
+ if (WARN_ON(!error))
+ return;
+
+ if (WARN_ON(!data))
+ return;
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+ if (!WARN_ON(!list_empty(&error->link))) {
+ error->data = *data;
+ list_add_tail(&error->link, &kctx->csf.error_list);
+ dev_dbg(kctx->kbdev->dev,
+ "Added error %pK of type %d in context %pK\n",
+ (void *)error, data->type, (void *)kctx);
+ }
+
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+}
+
+void kbase_csf_add_group_fatal_error(
+ struct kbase_queue_group *const group,
+ struct base_gpu_queue_group_error const *const err_payload)
+{
+ struct base_csf_notification error;
+
+ if (WARN_ON(!group))
+ return;
+
+ if (WARN_ON(!err_payload))
+ return;
+
+ error = (struct base_csf_notification) {
+ .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .handle = group->handle,
+ .error = *err_payload
+ }
+ }
+ };
+
+ add_error(group->kctx, &group->error_fatal, &error);
+}
+
+void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
+ struct kbase_context *kctx)
+{
+ struct list_head evicted_groups;
+ struct kbase_queue_group *group;
+ int i;
+
+ INIT_LIST_HEAD(&evicted_groups);
+
+ mutex_lock(&kctx->csf.lock);
+
+ kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups);
+ while (!list_empty(&evicted_groups)) {
+ group = list_first_entry(&evicted_groups,
+ struct kbase_queue_group, link);
+
+ dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated",
+ kctx->tgid, kctx->id, group->handle);
+ kbase_csf_term_descheduled_queue_group(group);
+ list_del_init(&group->link);
+ }
+
+ /* Acting on the queue groups that are pending to be terminated. */
+ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
+ group = kctx->csf.queue_groups[i];
+ if (group &&
+ group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED)
+ kbase_csf_term_descheduled_queue_group(group);
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+}
+
+int kbase_csf_ctx_init(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ int err = -ENOMEM;
+
+ INIT_LIST_HEAD(&kctx->csf.event_callback_list);
+ INIT_LIST_HEAD(&kctx->csf.queue_list);
+ INIT_LIST_HEAD(&kctx->csf.link);
+ INIT_LIST_HEAD(&kctx->csf.error_list);
+
+ spin_lock_init(&kctx->csf.event_lock);
+ kctx->csf.user_reg_vma = NULL;
+ mutex_lock(&kbdev->pm.lock);
+ /* The inode information for /dev/malixx file is not available at the
+ * time of device probe as the inode is created when the device node
+ * is created by udevd (through mknod).
+ */
+ if (kctx->filp) {
+ if (!kbdev->csf.mali_file_inode)
+ kbdev->csf.mali_file_inode = kctx->filp->f_inode;
+
+ /* inode is unique for a file */
+ WARN_ON(kbdev->csf.mali_file_inode != kctx->filp->f_inode);
+ }
+ mutex_unlock(&kbdev->pm.lock);
+
+ /* Mark all the cookies as 'free' */
+ bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
+
+ kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq",
+ WQ_UNBOUND, 1);
+
+ if (likely(kctx->csf.wq)) {
+ err = kbase_csf_scheduler_context_init(kctx);
+
+ if (likely(!err)) {
+ err = kbase_csf_kcpu_queue_context_init(kctx);
+
+ if (likely(!err)) {
+ err = kbase_csf_tiler_heap_context_init(kctx);
+
+ if (likely(!err))
+ mutex_init(&kctx->csf.lock);
+ else
+ kbase_csf_kcpu_queue_context_term(kctx);
+ }
+
+ if (unlikely(err))
+ kbase_csf_scheduler_context_term(kctx);
+ }
+
+ if (unlikely(err))
+ destroy_workqueue(kctx->csf.wq);
+ }
+
+ return err;
+}
+
+void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
+ struct kbase_fault *fault)
+{
+ int gr;
+ bool reported = false;
+ struct base_gpu_queue_group_error err_payload;
+ int err;
+ struct kbase_device *kbdev;
+
+ if (WARN_ON(!kctx))
+ return;
+
+ if (WARN_ON(!fault))
+ return;
+
+ kbdev = kctx->kbdev;
+ err = kbase_reset_gpu_try_prevent(kbdev);
+ /* Regardless of whether reset failed or is currently happening, exit
+ * early
+ */
+ if (err)
+ return;
+
+ err_payload = (struct base_gpu_queue_group_error) {
+ .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
+ .payload = {
+ .fatal_group = {
+ .sideband = fault->addr,
+ .status = fault->status,
+ }
+ }
+ };
+
+ mutex_lock(&kctx->csf.lock);
+
+ for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
+ struct kbase_queue_group *const group =
+ kctx->csf.queue_groups[gr];
+
+ if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) {
+ term_queue_group(group);
+ kbase_csf_add_group_fatal_error(group, &err_payload);
+ reported = true;
+ }
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+
+ if (reported)
+ kbase_event_wakeup(kctx);
+
+ kbase_reset_gpu_allow(kbdev);
+}
+
+void kbase_csf_ctx_term(struct kbase_context *kctx)
+{
+ struct kbase_device *kbdev = kctx->kbdev;
+ struct kbase_as *as = NULL;
+ unsigned long flags;
+ u32 i;
+ int err;
+ bool reset_prevented = false;
+
+ /* As the kbase context is terminating, its debugfs sub-directory would
+ * have been removed already and so would be the debugfs file created
+ * for queue groups & kcpu queues, hence no need to explicitly remove
+ * those debugfs files.
+ */
+ kbase_csf_event_wait_remove_all(kctx);
+
+ /* Wait for a GPU reset if it is happening, prevent it if not happening */
+ err = kbase_reset_gpu_prevent_and_wait(kbdev);
+ if (err)
+ dev_warn(
+ kbdev->dev,
+ "Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless",
+ kctx->tgid, kctx->id);
+ else
+ reset_prevented = true;
+
+ mutex_lock(&kctx->csf.lock);
+ /* Iterate through the queue groups that were not terminated by
+ * userspace and issue the term request to firmware for them.
+ */
+ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
+ if (kctx->csf.queue_groups[i])
+ term_queue_group(kctx->csf.queue_groups[i]);
+ }
+ mutex_unlock(&kctx->csf.lock);
+
+ if (reset_prevented)
+ kbase_reset_gpu_allow(kbdev);
+
+ /* Now that all queue groups have been terminated, there can be no
+ * more OoM or timer event interrupts but there can be inflight work
+ * items. Destroying the wq will implicitly flush those work items.
+ */
+ destroy_workqueue(kctx->csf.wq);
+
+ /* Wait for the firmware error work item to also finish as it could
+ * be affecting this outgoing context also.
+ */
+ flush_work(&kctx->kbdev->csf.fw_error_work);
+
+ /* A work item to handle page_fault/bus_fault/gpu_fault could be
+ * pending for the outgoing context. Flush the workqueue that will
+ * execute that work item.
+ */
+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+ if (kctx->as_nr != KBASEP_AS_NR_INVALID)
+ as = &kctx->kbdev->as[kctx->as_nr];
+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+ if (as)
+ flush_workqueue(as->pf_wq);
+
+ mutex_lock(&kctx->csf.lock);
+
+ for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
+ kfree(kctx->csf.queue_groups[i]);
+ kctx->csf.queue_groups[i] = NULL;
+ }
+
+ /* Iterate through the queues that were not terminated by
+ * userspace and do the required cleanup for them.
+ */
+ while (!list_empty(&kctx->csf.queue_list)) {
+ struct kbase_queue *queue;
+
+ queue = list_first_entry(&kctx->csf.queue_list,
+ struct kbase_queue, link);
+
+ /* The reference held when the IO mapping was created on bind
+ * would have been dropped otherwise the termination of Kbase
+ * context itself wouldn't have kicked-in. So there shall be
+ * only one reference left that was taken when queue was
+ * registered.
+ */
+ if (atomic_read(&queue->refcount) != 1)
+ dev_warn(kctx->kbdev->dev,
+ "Releasing queue with incorrect refcounting!\n");
+ list_del_init(&queue->link);
+ release_queue(queue);
+ }
+
+ mutex_unlock(&kctx->csf.lock);
+
+ kbase_csf_tiler_heap_context_term(kctx);
+ kbase_csf_kcpu_queue_context_term(kctx);
+ kbase_csf_scheduler_context_term(kctx);
+
+ mutex_destroy(&kctx->csf.lock);
+}
+
+int kbase_csf_event_wait_add(struct kbase_context *kctx,
+ kbase_csf_event_callback *callback, void *param)
+{
+ int err = -ENOMEM;
+ struct kbase_csf_event *event =
+ kzalloc(sizeof(struct kbase_csf_event), GFP_KERNEL);
+
+ if (event) {
+ unsigned long flags;
+
+ event->kctx = kctx;
+ event->callback = callback;
+ event->param = param;
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+ list_add_tail(&event->link, &kctx->csf.event_callback_list);
+ dev_dbg(kctx->kbdev->dev,
+ "Added event handler %pK with param %pK\n", event,
+ event->param);
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+
+ err = 0;
+ }
+
+ return err;
+}
+
+void kbase_csf_event_wait_remove(struct kbase_context *kctx,
+ kbase_csf_event_callback *callback, void *param)
+{
+ struct kbase_csf_event *event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+ list_for_each_entry(event, &kctx->csf.event_callback_list, link) {
+ if ((event->callback == callback) && (event->param == param)) {
+ list_del(&event->link);
+ dev_dbg(kctx->kbdev->dev,
+ "Removed event handler %pK with param %pK\n",
+ event, event->param);
+ kfree(event);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+}
+
+bool kbase_csf_read_error(struct kbase_context *kctx,
+ struct base_csf_notification *event_data)
+{
+ bool got_event = true;
+ struct kbase_csf_notification *error_data = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+ if (likely(!list_empty(&kctx->csf.error_list))) {
+ error_data = list_first_entry(&kctx->csf.error_list,
+ struct kbase_csf_notification, link);
+ list_del_init(&error_data->link);
+ *event_data = error_data->data;
+ dev_dbg(kctx->kbdev->dev, "Dequeued error %pK in context %pK\n",
+ (void *)error_data, (void *)kctx);
+ } else {
+ got_event = false;
+ }
+
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+
+ return got_event;
+}
+
+bool kbase_csf_error_pending(struct kbase_context *kctx)
+{
+ bool event_pended = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+ event_pended = !list_empty(&kctx->csf.error_list);
+ dev_dbg(kctx->kbdev->dev, "%s error is pending in context %pK\n",
+ event_pended ? "An" : "No", (void *)kctx);
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+
+ return event_pended;
+}
+
+void kbase_csf_event_signal(struct kbase_context *kctx, bool notify_gpu)
+{
+ struct kbase_csf_event *event, *next_event;
+ unsigned long flags;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Signal event (%s GPU notify) for context %pK\n",
+ notify_gpu ? "with" : "without", (void *)kctx);
+
+ /* First increment the signal count and wake up event thread.
+ */
+ atomic_set(&kctx->event_count, 1);
+ kbase_event_wakeup(kctx);
+
+ /* Signal the CSF firmware. This is to ensure that pending command
+ * stream synch object wait operations are re-evaluated.
+ * Write to GLB_DOORBELL would suffice as spec says that all pending
+ * synch object wait operations are re-evaluated on a write to any
+ * CS_DOORBELL/GLB_DOORBELL register.
+ */
+ if (notify_gpu) {
+ spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
+ if (kctx->kbdev->pm.backend.gpu_powered)
+ kbase_csf_ring_doorbell(kctx->kbdev, CSF_KERNEL_DOORBELL_NR);
+ KBASE_KTRACE_ADD(kctx->kbdev, SYNC_UPDATE_EVENT_NOTIFY_GPU, kctx, 0u);
+ spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
+ }
+
+ /* Now invoke the callbacks registered on backend side.
+ * Allow item removal inside the loop, if requested by the callback.
+ */
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+ list_for_each_entry_safe(
+ event, next_event, &kctx->csf.event_callback_list, link) {
+ enum kbase_csf_event_callback_action action;
+
+ dev_dbg(kctx->kbdev->dev,
+ "Calling event handler %pK with param %pK\n",
+ (void *)event, event->param);
+ action = event->callback(event->param);
+ if (action == KBASE_CSF_EVENT_CALLBACK_REMOVE) {
+ list_del(&event->link);
+ kfree(event);
+ }
+ }
+
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+}
+
+void kbase_csf_event_wait_remove_all(struct kbase_context *kctx)
+{
+ struct kbase_csf_event *event, *next_event;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kctx->csf.event_lock, flags);
+
+ list_for_each_entry_safe(
+ event, next_event, &kctx->csf.event_callback_list, link) {
+ list_del(&event->link);
+ dev_dbg(kctx->kbdev->dev,
+ "Removed event handler %pK with param %pK\n",
+ (void *)event, event->param);
+ kfree(event);
+ }
+
+ spin_unlock_irqrestore(&kctx->csf.event_lock, flags);
+}
+
+/**
+ * handle_oom_event - Handle the OoM event generated by the firmware for the
+ * CSI.
+ *
+ * This function will handle the OoM event request from the firmware for the
+ * CS. It will retrieve the address of heap context and heap's
+ * statistics (like number of render passes in-flight) from the CS's kernel
+ * kernel output page and pass them to the tiler heap function to allocate a
+ * new chunk.
+ * It will also update the CS's kernel input page with the address
+ * of a new chunk that was allocated.
+ *
+ * @kctx: Pointer to the kbase context in which the tiler heap was initialized.
+ * @stream: Pointer to the structure containing info provided by the firmware
+ * about the CSI.
+ *
+ * Return: 0 if successfully handled the request, otherwise a negative error
+ * code on failure.
+ */
+static int handle_oom_event(struct kbase_context *const kctx,
+ struct kbase_csf_cmd_stream_info const *const stream)
+{
+ u64 gpu_heap_va =
+ kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) |
+ ((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
+ const u32 vt_start =
+ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START);
+ const u32 vt_end =
+ kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END);
+ const u32 frag_end =
+ kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END);
+ u32 renderpasses_in_flight;
+ u32 pending_frag_count;
+ u64 new_chunk_ptr;
+ int err;
+
+ if ((frag_end > vt_end) || (vt_end >= vt_start)) {
+ dev_warn(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
+ vt_start, vt_end, frag_end);
+ return -EINVAL;
+ }
+
+ renderpasses_in_flight = vt_start - frag_end;
+ pending_frag_count = vt_end - frag_end;
+
+ err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
+ gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
+
+ /* It is okay to acknowledge with a NULL chunk (firmware will then wait
+ * for the fragment jobs to complete and release chunks)
+ */
+ if (err == -EBUSY)
+ new_chunk_ptr = 0;
+ else if (err)
+ return err;
+
+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
+ new_chunk_ptr & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI,
+ new_chunk_ptr >> 32);
+
+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO,
+ new_chunk_ptr & 0xFFFFFFFF);
+ kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI,
+ new_chunk_ptr >> 32);
+
+ return 0;
+}
+
+/**
+ * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event
+ *
+ * @group: Pointer to the GPU command queue group that encountered the error
+ */
+static void report_tiler_oom_error(struct kbase_queue_group *group)
+{
+ struct base_csf_notification const
+ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .handle = group->handle,
+ .error = {
+ .error_type =
+ BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
+ } } } };
+
+ add_error(group->kctx, &group->error_tiler_oom, &error);
+ kbase_event_wakeup(group->kctx);
+}
+
+/**
+ * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
+ *
+ * @queue: Pointer to queue for which out-of-memory event was received.
+ *
+ * Called with the CSF locked for the affected GPU virtual address space.
+ * Do not call in interrupt context.
+ *
+ * Handles tiler out-of-memory for a GPU command queue and then clears the
+ * notification to allow the firmware to report out-of-memory again in future.
+ * If the out-of-memory condition was successfully handled then this function
+ * rings the relevant doorbell to notify the firmware; otherwise, it terminates
+ * the GPU command queue group to which the queue is bound. See
+ * term_queue_group() for details.
+ */
+static void kbase_queue_oom_event(struct kbase_queue *const queue)
+{
+ struct kbase_context *const kctx = queue->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_queue_group *group;
+ int slot_num, err;
+ struct kbase_csf_cmd_stream_group_info const *ginfo;
+ struct kbase_csf_cmd_stream_info const *stream;
+ int csi_index = queue->csi_index;
+ u32 cs_oom_ack, cs_oom_req;
+
+ lockdep_assert_held(&kctx->csf.lock);
+
+ group = get_bound_queue_group(queue);
+ if (!group) {
+ dev_warn(kctx->kbdev->dev, "queue not bound\n");
+ return;
+ }
+
+ kbase_csf_scheduler_lock(kbdev);
+
+ slot_num = kbase_csf_scheduler_group_get_slot(group);
+
+ /* The group could have gone off slot before this work item got
+ * a chance to execute.
+ */
+ if (slot_num < 0)
+ goto unlock;
+
+ /* If the bound group is on slot yet the kctx is marked with disabled
+ * on address-space fault, the group is pending to be killed. So skip
+ * the inflight oom operation.
+ */
+ if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
+ goto unlock;
+
+ ginfo = &kbdev->csf.global_iface.groups[slot_num];
+ stream = &ginfo->streams[csi_index];
+ cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) &
+ CS_ACK_TILER_OOM_MASK;
+ cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) &
+ CS_REQ_TILER_OOM_MASK;
+
+ /* The group could have already undergone suspend-resume cycle before
+ * this work item got a chance to execute. On CSG resume the CS_ACK
+ * register is set by firmware to reflect the CS_REQ register, which
+ * implies that all events signaled before suspension are implicitly
+ * acknowledged.
+ * A new OoM event is expected to be generated after resume.
+ */
+ if (cs_oom_ack == cs_oom_req)
+ goto unlock;
+
+ err = handle_oom_event(kctx, stream);
+
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
+ CS_REQ_TILER_OOM_MASK);
+
+ if (err) {
+ dev_warn(
+ kbdev->dev,
+ "Queue group to be terminated, couldn't handle the OoM event\n");
+ kbase_csf_scheduler_unlock(kbdev);
+ term_queue_group(group);
+ report_tiler_oom_error(group);
+ return;
+ }
+
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
+unlock:
+ kbase_csf_scheduler_unlock(kbdev);
+}
+
+/**
+ * oom_event_worker - Tiler out-of-memory handler called from a workqueue.
+ *
+ * @data: Pointer to a work_struct embedded in GPU command queue data.
+ *
+ * Handles a tiler out-of-memory condition for a GPU command queue and then
+ * releases a reference that was added to prevent the queue being destroyed
+ * while this work item was pending on a workqueue.
+ */
+static void oom_event_worker(struct work_struct *data)
+{
+ struct kbase_queue *queue =
+ container_of(data, struct kbase_queue, oom_event_work);
+ struct kbase_context *kctx = queue->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+
+ int err = kbase_reset_gpu_try_prevent(kbdev);
+ /* Regardless of whether reset failed or is currently happening, exit
+ * early
+ */
+ if (err)
+ return;
+
+ mutex_lock(&kctx->csf.lock);
+
+ kbase_queue_oom_event(queue);
+ release_queue(queue);
+
+ mutex_unlock(&kctx->csf.lock);
+ kbase_reset_gpu_allow(kbdev);
+}
+
+/**
+ * report_group_timeout_error - Report the timeout error for the group to userspace.
+ *
+ * @group: Pointer to the group for which timeout error occurred
+ */
+static void report_group_timeout_error(struct kbase_queue_group *const group)
+{
+ struct base_csf_notification const
+ error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .handle = group->handle,
+ .error = {
+ .error_type =
+ BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
+ } } } };
+
+ dev_warn(group->kctx->kbdev->dev,
+ "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
+ kbase_csf_timeout_get(group->kctx->kbdev));
+
+ add_error(group->kctx, &group->error_timeout, &error);
+ kbase_event_wakeup(group->kctx);
+}
+
+/**
+ * timer_event_worker - Handle the progress timeout error for the group
+ *
+ * @data: Pointer to a work_struct embedded in GPU command queue group data.
+ *
+ * Terminate the CSG and report the error to userspace
+ */
+static void timer_event_worker(struct work_struct *data)
+{
+ struct kbase_queue_group *const group =
+ container_of(data, struct kbase_queue_group, timer_event_work);
+ struct kbase_context *const kctx = group->kctx;
+ bool reset_prevented = false;
+ int err = kbase_reset_gpu_prevent_and_wait(kctx->kbdev);
+
+ if (err)
+ dev_warn(
+ kctx->kbdev->dev,
+ "Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
+ group->handle);
+ else
+ reset_prevented = true;
+
+ mutex_lock(&kctx->csf.lock);
+
+ term_queue_group(group);
+ report_group_timeout_error(group);
+
+ mutex_unlock(&kctx->csf.lock);
+ if (reset_prevented)
+ kbase_reset_gpu_allow(kctx->kbdev);
+}
+
+/**
+ * handle_progress_timer_event - Progress timer timeout event handler.
+ *
+ * @group: Pointer to GPU queue group for which the timeout event is received.
+ *
+ * Enqueue a work item to terminate the group and notify the event notification
+ * thread of progress timeout fault for the GPU command queue group.
+ */
+static void handle_progress_timer_event(struct kbase_queue_group *const group)
+{
+ queue_work(group->kctx->csf.wq, &group->timer_event_work);
+}
+
+/**
+ * protm_event_worker - Protected mode switch request event handler
+ * called from a workqueue.
+ *
+ * @data: Pointer to a work_struct embedded in GPU command queue group data.
+ *
+ * Request to switch to protected mode.
+ */
+static void protm_event_worker(struct work_struct *data)
+{
+ struct kbase_queue_group *const group =
+ container_of(data, struct kbase_queue_group, protm_event_work);
+
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_BEGIN,
+ group, 0u);
+ kbase_csf_scheduler_group_protm_enter(group);
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
+ group, 0u);
+}
+
+static void report_queue_fatal_error(struct kbase_queue *const queue,
+ u32 cs_fatal, u64 cs_fatal_info,
+ u8 group_handle)
+{
+ struct base_csf_notification error =
+ { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
+ .payload = {
+ .csg_error = {
+ .handle = group_handle,
+ .error = {
+ .error_type =
+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
+ .payload = {
+ .fatal_queue = {
+ .sideband =
+ cs_fatal_info,
+ .status = cs_fatal,
+ .csi_index =
+ queue->csi_index,
+ } } } } } };
+
+ add_error(queue->kctx, &queue->error, &error);
+ kbase_event_wakeup(queue->kctx);
+}
+
+/**
+ * handle_fault_event - Handler for CS fault.
+ *
+ * @queue: Pointer to queue for which fault event was received.
+ * @stream: Pointer to the structure containing info provided by the
+ * firmware about the CSI.
+ *
+ * Prints meaningful CS fault information.
+ *
+ */
+static void
+handle_fault_event(struct kbase_queue *const queue,
+ struct kbase_csf_cmd_stream_info const *const stream)
+{
+ const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
+ const u64 cs_fault_info =
+ kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
+ ((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI)
+ << 32);
+ const u8 cs_fault_exception_type =
+ CS_FAULT_EXCEPTION_TYPE_GET(cs_fault);
+ const u32 cs_fault_exception_data =
+ CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
+ const u64 cs_fault_info_exception_data =
+ CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
+ struct kbase_device *const kbdev = queue->kctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ dev_warn(kbdev->dev,
+ "Ctx %d_%d Group %d CSG %d CSI: %d\n"
+ "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
+ "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
+ "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
+ queue->kctx->tgid, queue->kctx->id, queue->group->handle,
+ queue->group->csg_nr, queue->csi_index,
+ cs_fault_exception_type,
+ kbase_gpu_exception_name(cs_fault_exception_type),
+ cs_fault_exception_data, cs_fault_info_exception_data);
+
+ if (cs_fault_exception_type ==
+ CS_FAULT_EXCEPTION_TYPE_RESOURCE_EVICTION_TIMEOUT)
+ report_queue_fatal_error(queue, GPU_EXCEPTION_TYPE_SW_FAULT_2,
+ 0, queue->group->handle);
+}
+
+/**
+ * fatal_event_worker - Handle the fatal error for the GPU queue
+ *
+ * @data: Pointer to a work_struct embedded in GPU command queue.
+ *
+ * Terminate the CSG and report the error to userspace.
+ */
+static void fatal_event_worker(struct work_struct *const data)
+{
+ struct kbase_queue *const queue =
+ container_of(data, struct kbase_queue, fatal_event_work);
+ struct kbase_context *const kctx = queue->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_queue_group *group;
+ u8 group_handle;
+ bool reset_prevented = false;
+ int err = kbase_reset_gpu_prevent_and_wait(kbdev);
+
+ if (err)
+ dev_warn(
+ kbdev->dev,
+ "Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless");
+ else
+ reset_prevented = true;
+
+ mutex_lock(&kctx->csf.lock);
+
+ group = get_bound_queue_group(queue);
+ if (!group) {
+ dev_warn(kbdev->dev, "queue not bound when handling fatal event");
+ goto unlock;
+ }
+
+ group_handle = group->handle;
+ term_queue_group(group);
+ report_queue_fatal_error(queue, queue->cs_fatal, queue->cs_fatal_info,
+ group_handle);
+
+unlock:
+ release_queue(queue);
+ mutex_unlock(&kctx->csf.lock);
+ if (reset_prevented)
+ kbase_reset_gpu_allow(kbdev);
+}
+
+/**
+ * handle_fatal_event - Handler for CS fatal.
+ *
+ * @queue: Pointer to queue for which fatal event was received.
+ * @stream: Pointer to the structure containing info provided by the
+ * firmware about the CSI.
+ *
+ * Prints meaningful CS fatal information.
+ * Enqueue a work item to terminate the group and report the fatal error
+ * to user space.
+ */
+static void
+handle_fatal_event(struct kbase_queue *const queue,
+ struct kbase_csf_cmd_stream_info const *const stream)
+{
+ const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
+ const u64 cs_fatal_info =
+ kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) |
+ ((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI)
+ << 32);
+ const u32 cs_fatal_exception_type =
+ CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal);
+ const u32 cs_fatal_exception_data =
+ CS_FATAL_EXCEPTION_DATA_GET(cs_fatal);
+ const u64 cs_fatal_info_exception_data =
+ CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info);
+ struct kbase_device *const kbdev = queue->kctx->kbdev;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ dev_warn(kbdev->dev,
+ "Ctx %d_%d Group %d CSG %d CSI: %d\n"
+ "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
+ "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
+ "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
+ queue->kctx->tgid, queue->kctx->id, queue->group->handle,
+ queue->group->csg_nr, queue->csi_index,
+ cs_fatal_exception_type,
+ kbase_gpu_exception_name(cs_fatal_exception_type),
+ cs_fatal_exception_data, cs_fatal_info_exception_data);
+
+ if (cs_fatal_exception_type ==
+ CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
+ queue_work(system_wq, &kbdev->csf.fw_error_work);
+ } else {
+ get_queue(queue);
+ queue->cs_fatal = cs_fatal;
+ queue->cs_fatal_info = cs_fatal_info;
+ if (!queue_work(queue->kctx->csf.wq, &queue->fatal_event_work))
+ release_queue(queue);
+ }
+}
+
+/**
+ * handle_queue_exception_event - Handler for CS fatal/fault exception events.
+ *
+ * @queue: Pointer to queue for which fatal/fault event was received.
+ * @cs_req: Value of the CS_REQ register from the CS's input page.
+ * @cs_ack: Value of the CS_ACK register from the CS's output page.
+ */
+static void handle_queue_exception_event(struct kbase_queue *const queue,
+ const u32 cs_req, const u32 cs_ack)
+{
+ struct kbase_csf_cmd_stream_group_info const *ginfo;
+ struct kbase_csf_cmd_stream_info const *stream;
+ struct kbase_context *const kctx = queue->kctx;
+ struct kbase_device *const kbdev = kctx->kbdev;
+ struct kbase_queue_group *group = queue->group;
+ int csi_index = queue->csi_index;
+ int slot_num = group->csg_nr;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ ginfo = &kbdev->csf.global_iface.groups[slot_num];
+ stream = &ginfo->streams[csi_index];
+
+ if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
+ handle_fatal_event(queue, stream);
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FATAL_MASK);
+ }
+
+ if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
+ handle_fault_event(queue, stream);
+ kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
+ CS_REQ_FAULT_MASK);
+ kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
+ }
+}
+
+/**
+ * process_cs_interrupts - Process interrupts for a CS.
+ *
+ * @group: Pointer to GPU command queue group data.
+ * @ginfo: The CSG interface provided by the firmware.
+ * @irqreq: CSG's IRQ request bitmask (one bit per CS).
+ * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
+ *
+ * If the interrupt request bitmask differs from the acknowledge bitmask
+ * then the firmware is notifying the host of an event concerning those
+ * CSs indicated by bits whose value differs. The actions required
+ * are then determined by examining which notification flags differ between
+ * the request and acknowledge registers for the individual CS(s).
+ */
+static void process_cs_interrupts(struct kbase_queue_group *const group,
+ struct kbase_csf_cmd_stream_group_info const *const ginfo,
+ u32 const irqreq, u32 const irqack)
+{
+ struct kbase_device *const kbdev = group->kctx->kbdev;
+ u32 remaining = irqreq ^ irqack;
+ bool protm_pend = false;
+ const bool group_suspending =
+ !kbase_csf_scheduler_group_events_enabled(kbdev, group);
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ while (remaining != 0) {
+ int const i = ffs(remaining) - 1;
+ struct kbase_queue *const queue = group->bound_queues[i];
+
+ remaining &= ~(1 << i);
+
+ /* The queue pointer can be NULL, but if it isn't NULL then it
+ * cannot disappear since scheduler spinlock is held and before
+ * freeing a bound queue it has to be first unbound which
+ * requires scheduler spinlock.
+ */
+ if (queue && !WARN_ON(queue->csi_index != i)) {
+ struct kbase_csf_cmd_stream_info const *const stream =
+ &ginfo->streams[i];
+ u32 const cs_req = kbase_csf_firmware_cs_input_read(
+ stream, CS_REQ);
+ u32 const cs_ack =
+ kbase_csf_firmware_cs_output(stream, CS_ACK);
+ struct workqueue_struct *wq = group->kctx->csf.wq;
+
+ if ((cs_req & CS_REQ_EXCEPTION_MASK) ^
+ (cs_ack & CS_ACK_EXCEPTION_MASK)) {
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_FAULT_INTERRUPT, group, queue, cs_req ^ cs_ack);
+ handle_queue_exception_event(queue, cs_req, cs_ack);
+ }
+
+ /* PROTM_PEND and TILER_OOM can be safely ignored
+ * because they will be raised again if the group
+ * is assigned a CSG slot in future.
+ */
+ if (group_suspending) {
+ u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
+ u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
+
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_IGNORED_INTERRUPTS_GROUP_SUSPEND,
+ group, queue, cs_req_remain ^ cs_ack_remain);
+ continue;
+ }
+
+ if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
+ (cs_ack & CS_ACK_TILER_OOM_MASK))) {
+ get_queue(queue);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_TILER_OOM_INTERRUPT, group, queue,
+ cs_req ^ cs_ack);
+ if (WARN_ON(!queue_work(wq, &queue->oom_event_work))) {
+ /* The work item shall not have been
+ * already queued, there can be only
+ * one pending OoM event for a
+ * queue.
+ */
+ release_queue(queue);
+ }
+ }
+
+ if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
+ (cs_ack & CS_ACK_PROTM_PEND_MASK)) {
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_INTERRUPT, group, queue,
+ cs_req ^ cs_ack);
+
+ dev_dbg(kbdev->dev,
+ "Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
+ queue->csi_index, group->handle,
+ group->csg_nr);
+
+ bitmap_set(group->protm_pending_bitmap, i, 1);
+ KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, PROTM_PENDING_SET, group, queue,
+ group->protm_pending_bitmap[0]);
+ protm_pend = true;
+ }
+ }
+ }
+
+ if (protm_pend)
+ queue_work(group->kctx->csf.wq, &group->protm_event_work);
+}
+
+/**
+ * process_csg_interrupts - Process interrupts for a CSG.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @csg_nr: CSG number.
+ *
+ * Handles interrupts for a CSG and for CSs within it.
+ *
+ * If the CSG's request register value differs from its acknowledge register
+ * then the firmware is notifying the host of an event concerning the whole
+ * group. The actions required are then determined by examining which
+ * notification flags differ between those two register values.
+ *
+ * See process_cs_interrupts() for details of per-stream interrupt handling.
+ */
+static void process_csg_interrupts(struct kbase_device *const kbdev,
+ int const csg_nr)
+{
+ struct kbase_csf_cmd_stream_group_info *ginfo;
+ struct kbase_queue_group *group = NULL;
+ u32 req, ack, irqreq, irqack;
+
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+
+ if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
+ return;
+
+ KBASE_KTRACE_ADD(kbdev, CSG_INTERRUPT_PROCESS, NULL, csg_nr);
+
+ ginfo = &kbdev->csf.global_iface.groups[csg_nr];
+ req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
+ ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
+ irqreq = kbase_csf_firmware_csg_output(ginfo, CSG_IRQ_REQ);
+ irqack = kbase_csf_firmware_csg_input_read(ginfo, CSG_IRQ_ACK);
+
+ /* There may not be any pending CSG/CS interrupts to process */
+ if ((req == ack) && (irqreq == irqack))
+ goto out;
+
+ /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
+ * examining the CS_ACK & CS_REQ bits. This would ensure that Host
+ * doesn't misses an interrupt for the CS in the race scenario where
+ * whilst Host is servicing an interrupt for the CS, firmware sends
+ * another interrupt for that CS.
+ */
+ kbase_csf_firmware_csg_input(ginfo, CSG_IRQ_ACK, irqreq);
+
+ group = kbase_csf_scheduler_get_group_on_slot(kbdev, csg_nr);
+
+ /* The group pointer can be NULL here if interrupts for the group
+ * (like SYNC_UPDATE, IDLE notification) were delayed and arrived
+ * just after the suspension of group completed. However if not NULL
+ * then the group pointer cannot disappear even if User tries to
+ * terminate the group whilst this loop is running as scheduler
+ * spinlock is held and for freeing a group that is resident on a CSG
+ * slot scheduler spinlock is required.
+ */
+ if (!group)
+ goto out;
+
+ if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
+ goto out;
+
+ if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
+ kbase_csf_firmware_csg_input_mask(ginfo,
+ CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
+
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SYNC_UPDATE_INTERRUPT, group, req ^ ack);
+ kbase_csf_event_signal_cpu_only(group->kctx);
+ }
+
+ if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
+ CSG_REQ_IDLE_MASK);
+
+ set_bit(csg_nr, scheduler->csg_slots_idle_mask);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
+ scheduler->csg_slots_idle_mask[0]);
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_IDLE_INTERRUPT, group, req ^ ack);
+ dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
+ group->handle, csg_nr);
+
+ /* Check if the scheduling tick can be advanced */
+ if (kbase_csf_scheduler_all_csgs_idle(kbdev) &&
+ !scheduler->gpu_idle_fw_timer_enabled) {
+ kbase_csf_scheduler_advance_tick_nolock(kbdev);
+ }
+ }
+
+ if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
+ kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
+ CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
+
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_PROGRESS_TIMER_INTERRUPT,
+ group, req ^ ack);
+ dev_info(kbdev->dev,
+ "Timeout notification received for group %u of ctx %d_%d on slot %d\n",
+ group->handle, group->kctx->tgid, group->kctx->id, csg_nr);
+
+ handle_progress_timer_event(group);
+ }
+
+ process_cs_interrupts(group, ginfo, irqreq, irqack);
+
+out:
+ /* group may still be NULL here */
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
+ ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
+}
+
+/**
+ * process_prfcnt_interrupts - Process performance counter interrupts.
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface.
+ * @glb_req: Global request register value.
+ * @glb_ack: Global acknowledge register value.
+ *
+ * Handles interrupts issued by the firmware that relate to the performance
+ * counters. For example, on completion of a performance counter sample. It is
+ * expected that the scheduler spinlock is already held on calling this
+ * function.
+ */
+static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
+ u32 glb_ack)
+{
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+
+ lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
+
+ /* Process PRFCNT_SAMPLE interrupt. */
+ if (kbdev->csf.hwcnt.request_pending &&
+ ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) ==
+ (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) {
+ kbdev->csf.hwcnt.request_pending = false;
+
+ dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received.");
+
+ kbase_hwcnt_backend_csf_on_prfcnt_sample(
+ &kbdev->hwcnt_gpu_iface);
+ }
+
+ /* Process PRFCNT_ENABLE interrupt. */
+ if (kbdev->csf.hwcnt.enable_pending &&
+ ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) ==
+ (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) {
+ kbdev->csf.hwcnt.enable_pending = false;
+
+ dev_dbg(kbdev->dev,
+ "PRFCNT_ENABLE status changed interrupt received.");
+
+ if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK)
+ kbase_hwcnt_backend_csf_on_prfcnt_enable(
+ &kbdev->hwcnt_gpu_iface);
+ else
+ kbase_hwcnt_backend_csf_on_prfcnt_disable(
+ &kbdev->hwcnt_gpu_iface);
+ }
+
+ /* Process PRFCNT_THRESHOLD interrupt. */
+ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) {
+ dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received.");
+
+ kbase_hwcnt_backend_csf_on_prfcnt_threshold(
+ &kbdev->hwcnt_gpu_iface);
+
+ /* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to
+ * the same value as GLB_ACK.PRFCNT_THRESHOLD
+ * flag in order to enable reporting of another
+ * PRFCNT_THRESHOLD event.
+ */
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_PRFCNT_THRESHOLD_MASK);
+ }
+
+ /* Process PRFCNT_OVERFLOW interrupt. */
+ if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) {
+ dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received.");
+
+ kbase_hwcnt_backend_csf_on_prfcnt_overflow(
+ &kbdev->hwcnt_gpu_iface);
+
+ /* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to
+ * the same value as GLB_ACK.PRFCNT_OVERFLOW
+ * flag in order to enable reporting of another
+ * PRFCNT_OVERFLOW event.
+ */
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_PRFCNT_OVERFLOW_MASK);
+ }
+}
+
+void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
+{
+ unsigned long flags;
+ u32 remaining = val;
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
+
+ if (val & JOB_IRQ_GLOBAL_IF) {
+ const struct kbase_csf_global_iface *const global_iface =
+ &kbdev->csf.global_iface;
+ struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
+
+ kbdev->csf.interrupt_received = true;
+ remaining &= ~JOB_IRQ_GLOBAL_IF;
+
+ if (!kbdev->csf.firmware_reloaded)
+ kbase_csf_firmware_reload_completed(kbdev);
+ else if (global_iface->output) {
+ u32 glb_req, glb_ack;
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ glb_req = kbase_csf_firmware_global_input_read(
+ global_iface, GLB_REQ);
+ glb_ack = kbase_csf_firmware_global_output(
+ global_iface, GLB_ACK);
+ KBASE_KTRACE_ADD(kbdev, GLB_REQ_ACQ, NULL, glb_req ^ glb_ack);
+
+ if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK) {
+ dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_PROTM_EXIT_MASK);
+ WARN_ON(!kbase_csf_scheduler_protected_mode_in_use(kbdev));
+ KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_EXIT_PROTM, scheduler->active_protm_grp, 0u);
+ scheduler->active_protm_grp = NULL;
+ kbdev->protected_mode = false;
+ kbase_ipa_control_protm_exited(kbdev);
+ kbase_hwcnt_backend_csf_protm_exited(
+ &kbdev->hwcnt_gpu_iface);
+ }
+
+ /* Handle IDLE Hysteresis notification event */
+ if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
+ int non_idle_offslot_grps;
+ bool can_suspend_on_idle;
+ dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
+ kbase_csf_firmware_global_input_mask(
+ global_iface, GLB_REQ, glb_ack,
+ GLB_REQ_IDLE_EVENT_MASK);
+
+ non_idle_offslot_grps = atomic_read(&scheduler->non_idle_offslot_grps);
+ can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
+ KBASE_KTRACE_ADD(kbdev, SCHEDULER_CAN_IDLE, NULL,
+ ((u64)(u32)non_idle_offslot_grps) | (((u64)can_suspend_on_idle) << 32));
+
+ if (!non_idle_offslot_grps) {
+ if (can_suspend_on_idle)
+ queue_work(system_highpri_wq,
+ &scheduler->gpu_idle_work);
+ } else {
+ /* Advance the scheduling tick to get
+ * the non-idle suspended groups loaded
+ * soon.
+ */
+ kbase_csf_scheduler_advance_tick_nolock(
+ kbdev);
+ }
+ }
+
+ process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
+
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ /* Invoke the MCU state machine as a state transition
+ * might have completed.
+ */
+ kbase_pm_update_state(kbdev);
+ }
+
+ if (!remaining) {
+ wake_up_all(&kbdev->csf.event_wait);
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
+ return;
+ }
+ }
+
+ kbase_csf_scheduler_spin_lock(kbdev, &flags);
+ while (remaining != 0) {
+ int const csg_nr = ffs(remaining) - 1;
+
+ process_csg_interrupts(kbdev, csg_nr);
+ remaining &= ~(1 << csg_nr);
+ }
+ kbase_csf_scheduler_spin_unlock(kbdev, flags);
+
+ wake_up_all(&kbdev->csf.event_wait);
+ KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
+}
+
+void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev)
+{
+ if (kbdev->csf.db_filp) {
+ struct page *page = as_page(kbdev->csf.dummy_db_page);
+
+ kbase_mem_pool_free(
+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+ page, false);
+
+ fput(kbdev->csf.db_filp);
+ }
+}
+
+int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
+{
+ struct tagged_addr phys;
+ struct file *filp;
+ int ret;
+
+ filp = shmem_file_setup("mali csf", MAX_LFS_FILESIZE, VM_NORESERVE);
+ if (IS_ERR(filp))
+ return PTR_ERR(filp);
+
+ ret = kbase_mem_pool_alloc_pages(
+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
+ 1, &phys, false);
+
+ if (ret <= 0) {
+ fput(filp);
+ return ret;
+ }
+
+ kbdev->csf.db_filp = filp;
+ kbdev->csf.dummy_db_page = phys;
+ kbdev->csf.db_file_offsets = 0;
+
+ return 0;
+}
+
+void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
+{
+ if (as_phys_addr_t(kbdev->csf.dummy_user_reg_page)) {
+ struct page *page = as_page(kbdev->csf.dummy_user_reg_page);
+
+ kbase_mem_pool_free(
+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page,
+ false);
+ }
+}
+
+int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
+{
+ struct tagged_addr phys;
+ struct page *page;
+ u32 *addr;
+ int ret;
+
+ kbdev->csf.dummy_user_reg_page = as_tagged(0);
+
+ ret = kbase_mem_pool_alloc_pages(
+ &kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
+ false);
+
+ if (ret <= 0)
+ return ret;
+
+ page = as_page(phys);
+ addr = kmap_atomic(page);
+
+ /* Write a special value for the latest flush register inside the
+ * dummy page
+ */
+ addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
+
+ kbase_sync_single_for_device(kbdev, kbase_dma_addr(page), sizeof(u32),
+ DMA_BIDIRECTIONAL);
+ kunmap_atomic(addr);
+
+ kbdev->csf.dummy_user_reg_page = phys;
+
+ return 0;
+}
+
+u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority)
+{
+ struct priority_control_manager_device *pcm_device = kbdev->pcm_dev;
+ u8 out_priority = req_priority;
+
+ if (pcm_device) {
+ req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority);
+ out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority);
+ out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority);
+ }
+
+ return out_priority;
+}
+