diff options
Diffstat (limited to 'mali_kbase/csf/mali_kbase_csf_csg_debugfs.c')
-rw-r--r-- | mali_kbase/csf/mali_kbase_csf_csg_debugfs.c | 308 |
1 files changed, 194 insertions, 114 deletions
diff --git a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c index 2075797..c94e656 100644 --- a/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c +++ b/mali_kbase/csf/mali_kbase_csf_csg_debugfs.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note /* * - * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved. + * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved. * * This program is free software and is provided to you under the terms of the * GNU General Public License version 2 as published by the Free Software @@ -23,11 +23,137 @@ #include <mali_kbase.h> #include <linux/seq_file.h> #include <linux/delay.h> -#include <csf/mali_kbase_csf_trace_buffer.h> #include <backend/gpu/mali_kbase_pm_internal.h> #if IS_ENABLED(CONFIG_DEBUG_FS) #include "mali_kbase_csf_tl_reader.h" +#include <linux/version_compat_defs.h> + +/* Wait time to be used cumulatively for all the CSG slots. + * Since scheduler lock is held when STATUS_UPDATE request is sent, there won't be + * any other Host request pending on the FW side and usually FW would be responsive + * to the Doorbell IRQs as it won't do any polling for a long time and also it won't + * have to wait for any HW state transition to complete for publishing the status. + * So it is reasonable to expect that handling of STATUS_UPDATE request would be + * relatively very quick. + */ +#define STATUS_UPDATE_WAIT_TIMEOUT 500 + +/* The bitmask of CSG slots for which the STATUS_UPDATE request completed. + * The access to it is serialized with scheduler lock, so at a time it would + * get used either for "active_groups" or per context "groups" debugfs file. + */ +static DECLARE_BITMAP(csg_slots_status_updated, MAX_SUPPORTED_CSGS); + +static +bool csg_slot_status_update_finish(struct kbase_device *kbdev, u32 csg_nr) +{ + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[csg_nr]; + + return !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ + kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & + CSG_REQ_STATUS_UPDATE_MASK); +} + +static +bool csg_slots_status_update_finish(struct kbase_device *kbdev, + const unsigned long *slots_mask) +{ + const u32 max_csg_slots = kbdev->csf.global_iface.group_num; + bool changed = false; + u32 csg_nr; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + for_each_set_bit(csg_nr, slots_mask, max_csg_slots) { + if (csg_slot_status_update_finish(kbdev, csg_nr)) { + set_bit(csg_nr, csg_slots_status_updated); + changed = true; + } + } + + return changed; +} + +static void wait_csg_slots_status_update_finish(struct kbase_device *kbdev, + unsigned long *slots_mask) +{ + const u32 max_csg_slots = kbdev->csf.global_iface.group_num; + long remaining = kbase_csf_timeout_in_jiffies(STATUS_UPDATE_WAIT_TIMEOUT); + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + bitmap_zero(csg_slots_status_updated, max_csg_slots); + + while (!bitmap_empty(slots_mask, max_csg_slots) && remaining) { + remaining = wait_event_timeout(kbdev->csf.event_wait, + csg_slots_status_update_finish(kbdev, slots_mask), + remaining); + if (likely(remaining)) { + bitmap_andnot(slots_mask, slots_mask, + csg_slots_status_updated, max_csg_slots); + } else { + dev_warn(kbdev->dev, + "STATUS_UPDATE request timed out for slots 0x%lx", + slots_mask[0]); + } + } +} + +void kbase_csf_debugfs_update_active_groups_status(struct kbase_device *kbdev) +{ + u32 max_csg_slots = kbdev->csf.global_iface.group_num; + DECLARE_BITMAP(used_csgs, MAX_SUPPORTED_CSGS) = { 0 }; + u32 csg_nr; + unsigned long flags; + + lockdep_assert_held(&kbdev->csf.scheduler.lock); + + /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell + * ring for Extract offset update, shall not be made when MCU has been + * put to sleep otherwise it will undesirably make MCU exit the sleep + * state. Also it isn't really needed as FW will implicitly update the + * status of all on-slot groups when MCU sleep request is sent to it. + */ + if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { + /* Wait for the MCU sleep request to complete. */ + kbase_pm_wait_for_desired_state(kbdev); + bitmap_copy(csg_slots_status_updated, + kbdev->csf.scheduler.csg_inuse_bitmap, max_csg_slots); + return; + } + + for (csg_nr = 0; csg_nr < max_csg_slots; csg_nr++) { + struct kbase_queue_group *const group = + kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; + if (!group) + continue; + /* Ring the User doorbell for FW to update the Extract offset */ + kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); + set_bit(csg_nr, used_csgs); + } + + /* Return early if there are no on-slot groups */ + if (bitmap_empty(used_csgs, max_csg_slots)) + return; + + kbase_csf_scheduler_spin_lock(kbdev, &flags); + for_each_set_bit(csg_nr, used_csgs, max_csg_slots) { + struct kbase_csf_cmd_stream_group_info const *const ginfo = + &kbdev->csf.global_iface.groups[csg_nr]; + kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, + ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), + CSG_REQ_STATUS_UPDATE_MASK); + } + + BUILD_BUG_ON(MAX_SUPPORTED_CSGS > (sizeof(used_csgs[0]) * BITS_PER_BYTE)); + kbase_csf_ring_csg_slots_doorbell(kbdev, used_csgs[0]); + kbase_csf_scheduler_spin_unlock(kbdev, flags); + wait_csg_slots_status_update_finish(kbdev, used_csgs); + /* Wait for the User doobell ring to take effect */ + msleep(100); +} #define MAX_SCHED_STATE_STRING_LEN (16) static const char *scheduler_state_to_string(struct kbase_device *kbdev, @@ -77,16 +203,32 @@ static const char *blocked_reason_to_string(u32 reason_id) return cs_blocked_reason[reason_id]; } +static bool sb_source_supported(u32 glb_version) +{ + bool supported = false; + + if (((GLB_VERSION_MAJOR_GET(glb_version) == 3) && + (GLB_VERSION_MINOR_GET(glb_version) >= 5)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 2) && + (GLB_VERSION_MINOR_GET(glb_version) >= 6)) || + ((GLB_VERSION_MAJOR_GET(glb_version) == 1) && + (GLB_VERSION_MINOR_GET(glb_version) >= 3))) + supported = true; + + return supported; +} + static void kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - struct seq_file *file, u32 wait_status, u32 wait_sync_value, - u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, - u32 blocked_reason) + struct seq_file *file, u32 glb_version, u32 wait_status, u32 wait_sync_value, + u64 wait_sync_live_value, u64 wait_sync_pointer, u32 sb_status, u32 blocked_reason) { #define WAITING "Waiting" #define NOT_WAITING "Not waiting" seq_printf(file, "SB_MASK: %d\n", CS_STATUS_WAIT_SB_MASK_GET(wait_status)); + if (sb_source_supported(glb_version)) + seq_printf(file, "SB_SOURCE: %d\n", CS_STATUS_WAIT_SB_SOURCE_GET(wait_status)); seq_printf(file, "PROGRESS_WAIT: %s\n", CS_STATUS_WAIT_PROGRESS_WAIT_GET(wait_status) ? WAITING : NOT_WAITING); @@ -145,7 +287,8 @@ static void kbasep_csf_scheduler_dump_active_cs_trace(struct seq_file *file, static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, struct kbase_queue *queue) { - u32 *addr; + u64 *addr; + u32 *addr32; u64 cs_extract; u64 cs_insert; u32 cs_active; @@ -156,20 +299,25 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, struct kbase_vmap_struct *mapping; u64 *evt; u64 wait_sync_live_value; + u32 glb_version; if (!queue) return; + glb_version = queue->kctx->kbdev->csf.global_iface.version; + if (WARN_ON(queue->csi_index == KBASEP_IF_NR_INVALID || !queue->group)) return; - addr = (u32 *)queue->user_io_addr; - cs_insert = addr[CS_INSERT_LO/4] | ((u64)addr[CS_INSERT_HI/4] << 32); + addr = queue->user_io_addr; + cs_insert = addr[CS_INSERT_LO / sizeof(*addr)]; + + addr = queue->user_io_addr + PAGE_SIZE / sizeof(*addr); + cs_extract = addr[CS_EXTRACT_LO / sizeof(*addr)]; - addr = (u32 *)(queue->user_io_addr + PAGE_SIZE); - cs_extract = addr[CS_EXTRACT_LO/4] | ((u64)addr[CS_EXTRACT_HI/4] << 32); - cs_active = addr[CS_ACTIVE/4]; + addr32 = (u32 *)(queue->user_io_addr + PAGE_SIZE / sizeof(*addr)); + cs_active = addr32[CS_ACTIVE / sizeof(*addr32)]; #define KBASEP_CSF_DEBUGFS_CS_HEADER_USER_IO \ "Bind Idx, Ringbuf addr, Size, Prio, Insert offset, Extract offset, Active, Doorbell\n" @@ -200,9 +348,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, } kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, wait_status, wait_sync_value, - wait_sync_live_value, wait_sync_pointer, - sb_status, blocked_reason); + file, glb_version, wait_status, wait_sync_value, + wait_sync_live_value, wait_sync_pointer, sb_status, blocked_reason); } } else { struct kbase_device const *const kbdev = @@ -257,9 +404,8 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, } kbasep_csf_scheduler_dump_active_queue_cs_status_wait( - file, wait_status, wait_sync_value, - wait_sync_live_value, wait_sync_pointer, sb_status, - blocked_reason); + file, glb_version, wait_status, wait_sync_value, wait_sync_live_value, + wait_sync_pointer, sb_status, blocked_reason); /* Dealing with cs_trace */ if (kbase_csf_scheduler_queue_has_trace(queue)) kbasep_csf_scheduler_dump_active_cs_trace(file, stream); @@ -270,54 +416,6 @@ static void kbasep_csf_scheduler_dump_active_queue(struct seq_file *file, seq_puts(file, "\n"); } -static void update_active_group_status(struct seq_file *file, - struct kbase_queue_group *const group) -{ - struct kbase_device *const kbdev = group->kctx->kbdev; - struct kbase_csf_cmd_stream_group_info const *const ginfo = - &kbdev->csf.global_iface.groups[group->csg_nr]; - long remaining = kbase_csf_timeout_in_jiffies(kbdev->csf.fw_timeout_ms); - unsigned long flags; - - /* Global doorbell ring for CSG STATUS_UPDATE request or User doorbell - * ring for Extract offset update, shall not be made when MCU has been - * put to sleep otherwise it will undesirably make MCU exit the sleep - * state. Also it isn't really needed as FW will implicitly update the - * status of all on-slot groups when MCU sleep request is sent to it. - */ - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) - return; - - /* Ring the User doobell shared between the queues bound to this - * group, to have FW update the CS_EXTRACT for all the queues - * bound to the group. Ring early so that FW gets adequate time - * for the handling. - */ - kbase_csf_ring_doorbell(kbdev, group->doorbell_nr); - - kbase_csf_scheduler_spin_lock(kbdev, &flags); - kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, - ~kbase_csf_firmware_csg_output(ginfo, CSG_ACK), - CSG_REQ_STATUS_UPDATE_MASK); - kbase_csf_scheduler_spin_unlock(kbdev, flags); - kbase_csf_ring_csg_doorbell(kbdev, group->csg_nr); - - remaining = wait_event_timeout(kbdev->csf.event_wait, - !((kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ) ^ - kbase_csf_firmware_csg_output(ginfo, CSG_ACK)) & - CSG_REQ_STATUS_UPDATE_MASK), remaining); - - if (!remaining) { - dev_err(kbdev->dev, - "Timed out for STATUS_UPDATE on group %d on slot %d", - group->handle, group->csg_nr); - - seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", - group->csg_nr); - seq_puts(file, "*** The following group-record is likely stale\n"); - } -} - static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, struct kbase_queue_group *const group) { @@ -331,8 +429,6 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, u8 slot_priority = kbdev->csf.scheduler.csg_slots[group->csg_nr].priority; - update_active_group_status(file, group); - ep_c = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_CURRENT); ep_r = kbase_csf_firmware_csg_output(ginfo, CSG_STATUS_EP_REQ); @@ -348,25 +444,25 @@ static void kbasep_csf_scheduler_dump_active_group(struct seq_file *file, CSG_STATUS_STATE_IDLE_MASK) idle = 'Y'; - seq_puts(file, "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); - seq_printf(file, "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", - group->handle, - group->csg_nr, - slot_priority, - group->run_state, - group->priority, - CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), - CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), - CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), - CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), - CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), - CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), - exclusive, - idle); - - /* Wait for the User doobell ring to take effect */ - if (kbdev->csf.scheduler.state != SCHED_SLEEPING) - msleep(100); + if (!test_bit(group->csg_nr, csg_slots_status_updated)) { + seq_printf(file, "*** Warn: Timed out for STATUS_UPDATE on slot %d\n", + group->csg_nr); + seq_puts(file, "*** The following group-record is likely stale\n"); + } + seq_puts( + file, + "GroupID, CSG NR, CSG Prio, Run State, Priority, C_EP(Alloc/Req), F_EP(Alloc/Req), T_EP(Alloc/Req), Exclusive, Idle\n"); + seq_printf( + file, + "%7d, %6d, %8d, %9d, %8d, %11d/%3d, %11d/%3d, %11d/%3d, %9c, %4c\n", + group->handle, group->csg_nr, slot_priority, group->run_state, + group->priority, CSG_STATUS_EP_CURRENT_COMPUTE_EP_GET(ep_c), + CSG_STATUS_EP_REQ_COMPUTE_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_FRAGMENT_EP_GET(ep_c), + CSG_STATUS_EP_REQ_FRAGMENT_EP_GET(ep_r), + CSG_STATUS_EP_CURRENT_TILER_EP_GET(ep_c), + CSG_STATUS_EP_REQ_TILER_EP_GET(ep_r), exclusive, idle); + } else { seq_puts(file, "GroupID, CSG NR, Run State, Priority\n"); seq_printf(file, "%7d, %6d, %9d, %8d\n", @@ -404,22 +500,19 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, { u32 gr; struct kbase_context *const kctx = file->private; - struct kbase_device *const kbdev = kctx->kbdev; + struct kbase_device *kbdev; if (WARN_ON(!kctx)) return -EINVAL; + kbdev = kctx->kbdev; + seq_printf(file, "MALI_CSF_CSG_DEBUGFS_VERSION: v%u\n", MALI_CSF_CSG_DEBUGFS_VERSION); - mutex_lock(&kctx->csf.lock); + rt_mutex_lock(&kctx->csf.lock); kbase_csf_scheduler_lock(kbdev); - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { - /* Wait for the MCU sleep request to complete. Please refer the - * update_active_group_status() function for the explanation. - */ - kbase_pm_wait_for_desired_state(kbdev); - } + kbase_csf_debugfs_update_active_groups_status(kbdev); for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) { struct kbase_queue_group *const group = kctx->csf.queue_groups[gr]; @@ -428,7 +521,7 @@ static int kbasep_csf_queue_group_debugfs_show(struct seq_file *file, kbasep_csf_scheduler_dump_active_group(file, group); } kbase_csf_scheduler_unlock(kbdev); - mutex_unlock(&kctx->csf.lock); + rt_mutex_unlock(&kctx->csf.lock); return 0; } @@ -453,12 +546,7 @@ static int kbasep_csf_scheduler_dump_active_groups(struct seq_file *file, MALI_CSF_CSG_DEBUGFS_VERSION); kbase_csf_scheduler_lock(kbdev); - if (kbdev->csf.scheduler.state == SCHED_SLEEPING) { - /* Wait for the MCU sleep request to complete. Please refer the - * update_active_group_status() function for the explanation. - */ - kbase_pm_wait_for_desired_state(kbdev); - } + kbase_csf_debugfs_update_active_groups_status(kbdev); for (csg_nr = 0; csg_nr < num_groups; csg_nr++) { struct kbase_queue_group *const group = kbdev->csf.scheduler.csg_slots[csg_nr].resident_group; @@ -500,11 +588,7 @@ static const struct file_operations kbasep_csf_queue_group_debugfs_fops = { void kbase_csf_queue_group_debugfs_init(struct kbase_context *kctx) { struct dentry *file; -#if (KERNEL_VERSION(4, 7, 0) <= LINUX_VERSION_CODE) const mode_t mode = 0444; -#else - const mode_t mode = 0400; -#endif if (WARN_ON(!kctx || IS_ERR_OR_NULL(kctx->kctx_dentry))) return; @@ -556,14 +640,11 @@ static int kbasep_csf_debugfs_scheduling_timer_kick_set( return 0; } -DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, - &kbasep_csf_debugfs_scheduling_timer_enabled_get, - &kbasep_csf_debugfs_scheduling_timer_enabled_set, - "%llu\n"); -DEFINE_SIMPLE_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, - NULL, - &kbasep_csf_debugfs_scheduling_timer_kick_set, - "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_enabled_fops, + &kbasep_csf_debugfs_scheduling_timer_enabled_get, + &kbasep_csf_debugfs_scheduling_timer_enabled_set, "%llu\n"); +DEFINE_DEBUGFS_ATTRIBUTE(kbasep_csf_debugfs_scheduling_timer_kick_fops, NULL, + &kbasep_csf_debugfs_scheduling_timer_kick_set, "%llu\n"); /** * kbase_csf_debugfs_scheduler_state_get() - Get the state of scheduler. @@ -671,7 +752,6 @@ void kbase_csf_debugfs_init(struct kbase_device *kbdev) &kbasep_csf_debugfs_scheduler_state_fops); kbase_csf_tl_reader_debugfs_init(kbdev); - kbase_csf_firmware_trace_buffer_debugfs_init(kbdev); } #else |