summaryrefslogtreecommitdiff
path: root/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
diff options
context:
space:
mode:
Diffstat (limited to 'mali_kbase/backend/gpu/mali_kbase_model_dummy.c')
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c741
1 files changed, 469 insertions, 272 deletions
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index 603ffcf..46bcdc7 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -62,8 +62,9 @@
* document
*/
#include <mali_kbase.h>
+#include <device/mali_kbase_device.h>
#include <gpu/mali_kbase_gpu_regmap.h>
-#include <backend/gpu/mali_kbase_model_dummy.h>
+#include <backend/gpu/mali_kbase_model_linux.h>
#include <mali_kbase_mem_linux.h>
#if MALI_USE_CSF
@@ -80,71 +81,23 @@ static bool ipa_control_timer_enabled;
#endif
#define LO_MASK(M) ((M) & 0xFFFFFFFF)
-
-static u32 get_implementation_register(u32 reg)
-{
- switch (reg) {
- case GPU_CONTROL_REG(SHADER_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_SHADER_PRESENT);
- case GPU_CONTROL_REG(TILER_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
- case GPU_CONTROL_REG(L2_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
- case GPU_CONTROL_REG(STACK_PRESENT_LO):
- return LO_MASK(DUMMY_IMPLEMENTATION_STACK_PRESENT);
-
- case GPU_CONTROL_REG(SHADER_PRESENT_HI):
- case GPU_CONTROL_REG(TILER_PRESENT_HI):
- case GPU_CONTROL_REG(L2_PRESENT_HI):
- case GPU_CONTROL_REG(STACK_PRESENT_HI):
- /* *** FALLTHROUGH *** */
- default:
- return 0;
- }
-}
-
-struct {
- unsigned long prfcnt_base;
- u32 *prfcnt_base_cpu;
- struct kbase_device *kbdev;
- struct tagged_addr *pages;
- size_t page_count;
-
- u32 time;
-
- struct {
- u32 jm;
- u32 tiler;
- u32 l2;
- u32 shader;
- } prfcnt_en;
-
- u64 l2_present;
- u64 shader_present;
-
#if !MALI_USE_CSF
- u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
-#else
- u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
-#endif /* !MALI_USE_CSF */
- u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
- u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
- KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
- u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
- KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+#define HI_MASK(M) ((M) & 0xFFFFFFFF00000000)
+#endif
-} performance_counters = {
- .l2_present = DUMMY_IMPLEMENTATION_L2_PRESENT,
- .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
-};
+/* Construct a value for the THREAD_FEATURES register, *except* the two most
+ * significant bits, which are set to IMPLEMENTATION_MODEL in
+ * midgard_model_read_reg().
+ */
+#if MALI_USE_CSF
+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
+#else
+#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
+ ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
+#endif
-struct job_slot {
- int job_active;
- int job_queued;
- int job_complete_irq_asserted;
- int job_irq_mask;
- int job_disabled;
-};
+struct error_status_t hw_error_status;
/**
* struct control_reg_values_t - control register values specific to the GPU being 'emulated'
@@ -162,6 +115,9 @@ struct job_slot {
* @mmu_features: MMU features
* @gpu_features_lo: GPU features (low)
* @gpu_features_hi: GPU features (high)
+ * @shader_present: Available shader bitmap
+ * @stack_present: Core stack present bitmap
+ *
*/
struct control_reg_values_t {
const char *name;
@@ -176,16 +132,32 @@ struct control_reg_values_t {
u32 mmu_features;
u32 gpu_features_lo;
u32 gpu_features_hi;
+ u32 shader_present;
+ u32 stack_present;
+};
+
+struct job_slot {
+ int job_active;
+ int job_queued;
+ int job_complete_irq_asserted;
+ int job_irq_mask;
+ int job_disabled;
};
struct dummy_model_t {
int reset_completed;
int reset_completed_mask;
+#if !MALI_USE_CSF
int prfcnt_sample_completed;
+#endif /* !MALI_USE_CSF */
int power_changed_mask; /* 2bits: _ALL,_SINGLE */
int power_changed; /* 1bit */
bool clean_caches_completed;
bool clean_caches_completed_irq_enabled;
+#if MALI_USE_CSF
+ bool flush_pa_range_completed;
+ bool flush_pa_range_completed_irq_enabled;
+#endif
int power_on; /* 6bits: SHADER[4],TILER,L2 */
u32 stack_power_on_lo;
u32 coherency_enable;
@@ -196,45 +168,6 @@ struct dummy_model_t {
void *data;
};
-void gpu_device_set_data(void *model, void *data)
-{
- struct dummy_model_t *dummy = (struct dummy_model_t *)model;
-
- dummy->data = data;
-}
-
-void *gpu_device_get_data(void *model)
-{
- struct dummy_model_t *dummy = (struct dummy_model_t *)model;
-
- return dummy->data;
-}
-
-#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
-
-/* SCons should pass in a default GPU, but other ways of building (e.g.
- * in-tree) won't, so define one here in case.
- */
-#ifndef CONFIG_MALI_NO_MALI_DEFAULT_GPU
-#define CONFIG_MALI_NO_MALI_DEFAULT_GPU "tMIx"
-#endif
-
-static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
-module_param(no_mali_gpu, charp, 0000);
-MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
-
-/* Construct a value for the THREAD_FEATURES register, *except* the two most
- * significant bits, which are set to IMPLEMENTATION_MODEL in
- * midgard_model_read_reg().
- */
-#if MALI_USE_CSF
-#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
- ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 24))
-#else
-#define THREAD_FEATURES_PARTIAL(MAX_REGISTERS, MAX_TASK_QUEUE, MAX_TG_SPLIT) \
- ((MAX_REGISTERS) | ((MAX_TASK_QUEUE) << 16) | ((MAX_TG_SPLIT) << 24))
-#endif
-
/* Array associating GPU names with control register values. The first
* one is used in the case of no match.
*/
@@ -251,6 +184,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tHEx",
@@ -264,6 +199,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tSIx",
@@ -277,6 +214,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tDVx",
@@ -290,6 +229,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2821,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tNOx",
@@ -303,6 +244,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGOx_r0p0",
@@ -316,6 +259,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGOx_r1p0",
@@ -330,6 +275,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2823,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tTRx",
@@ -343,6 +290,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tNAx",
@@ -356,6 +305,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tBEx",
@@ -369,6 +320,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TBEX,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tBAx",
@@ -382,19 +335,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
- },
- {
- .name = "tDUx",
- .gpu_id = GPU_ID2_MAKE(10, 2, 0, 1, 0, 0, 0),
- .as_present = 0xFF,
- .thread_max_threads = 0x180,
- .thread_max_workgroup_size = 0x180,
- .thread_max_barrier_size = 0x180,
- .thread_features = THREAD_FEATURES_PARTIAL(0x6000, 4, 0),
- .tiler_features = 0x809,
- .mmu_features = 0x2830,
- .gpu_features_lo = 0,
- .gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tODx",
@@ -408,6 +350,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TODX,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tGRx",
@@ -422,6 +366,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tVAx",
@@ -436,6 +382,8 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT,
+ .stack_present = DUMMY_IMPLEMENTATION_STACK_PRESENT,
},
{
.name = "tTUx",
@@ -450,10 +398,95 @@ static const struct control_reg_values_t all_control_reg_values[] = {
.mmu_features = 0x2830,
.gpu_features_lo = 0xf,
.gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTUX,
+ .stack_present = 0xF,
+ },
+ {
+ .name = "tTIx",
+ .gpu_id = GPU_ID2_MAKE(12, 8, 1, 0, 0, 0, 0),
+ .as_present = 0xFF,
+ .thread_max_threads = 0x800,
+ .thread_max_workgroup_size = 0x400,
+ .thread_max_barrier_size = 0x400,
+ .thread_features = THREAD_FEATURES_PARTIAL(0x10000, 16, 0),
+ .core_features = 0x1, /* core_1e64fma4tex */
+ .tiler_features = 0x809,
+ .mmu_features = 0x2830,
+ .gpu_features_lo = 0xf,
+ .gpu_features_hi = 0,
+ .shader_present = DUMMY_IMPLEMENTATION_SHADER_PRESENT_TTIX,
+ .stack_present = 0xF,
},
};
-struct error_status_t hw_error_status;
+static struct {
+ spinlock_t access_lock;
+#if !MALI_USE_CSF
+ unsigned long prfcnt_base;
+#endif /* !MALI_USE_CSF */
+ u32 *prfcnt_base_cpu;
+
+ u32 time;
+
+ struct gpu_model_prfcnt_en prfcnt_en;
+
+ u64 l2_present;
+ u64 shader_present;
+
+#if !MALI_USE_CSF
+ u64 jm_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+#else
+ u64 cshw_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+#endif /* !MALI_USE_CSF */
+ u64 tiler_counters[KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+ u64 l2_counters[KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS *
+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+ u64 shader_counters[KBASE_DUMMY_MODEL_MAX_SHADER_CORES *
+ KBASE_DUMMY_MODEL_COUNTER_PER_CORE];
+} performance_counters;
+
+static u32 get_implementation_register(u32 reg,
+ const struct control_reg_values_t *const control_reg_values)
+{
+ switch (reg) {
+ case GPU_CONTROL_REG(SHADER_PRESENT_LO):
+ return LO_MASK(control_reg_values->shader_present);
+ case GPU_CONTROL_REG(TILER_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_TILER_PRESENT);
+ case GPU_CONTROL_REG(L2_PRESENT_LO):
+ return LO_MASK(DUMMY_IMPLEMENTATION_L2_PRESENT);
+ case GPU_CONTROL_REG(STACK_PRESENT_LO):
+ return LO_MASK(control_reg_values->stack_present);
+
+ case GPU_CONTROL_REG(SHADER_PRESENT_HI):
+ case GPU_CONTROL_REG(TILER_PRESENT_HI):
+ case GPU_CONTROL_REG(L2_PRESENT_HI):
+ case GPU_CONTROL_REG(STACK_PRESENT_HI):
+ /* *** FALLTHROUGH *** */
+ default:
+ return 0;
+ }
+}
+
+void gpu_device_set_data(void *model, void *data)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)model;
+
+ dummy->data = data;
+}
+
+void *gpu_device_get_data(void *model)
+{
+ struct dummy_model_t *dummy = (struct dummy_model_t *)model;
+
+ return dummy->data;
+}
+
+#define signal_int(m, s) m->slots[(s)].job_complete_irq_asserted = 1
+
+static char *no_mali_gpu = CONFIG_MALI_NO_MALI_DEFAULT_GPU;
+module_param(no_mali_gpu, charp, 0000);
+MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
#if MALI_USE_CSF
static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
@@ -464,6 +497,7 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
u32 event_index;
u64 value = 0;
u32 core;
+ unsigned long flags;
if (WARN_ON(core_type >= KBASE_IPA_CORE_TYPE_NUM))
return 0;
@@ -475,17 +509,20 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
(ipa_ctl_select_config[core_type] >> (cnt_idx * 8)) & 0xFF;
/* Currently only primary counter blocks are supported */
- if (WARN_ON(event_index >= 64))
+ if (WARN_ON(event_index >=
+ (KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS + KBASE_DUMMY_MODEL_COUNTER_PER_CORE)))
return 0;
/* The actual events start index 4 onwards. Spec also says PRFCNT_EN,
* TIMESTAMP_LO or TIMESTAMP_HI pseudo-counters do not make sense for
* IPA counters. If selected, the value returned for them will be zero.
*/
- if (WARN_ON(event_index <= 3))
+ if (WARN_ON(event_index < KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS))
return 0;
- event_index -= 4;
+ event_index -= KBASE_DUMMY_MODEL_COUNTER_HEADER_DWORDS;
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
switch (core_type) {
case KBASE_IPA_CORE_TYPE_CSHW:
@@ -514,28 +551,46 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type,
event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
}
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+
if (is_low_word)
return (value & U32_MAX);
else
return (value >> 32);
}
+#endif /* MALI_USE_CSF */
-void gpu_model_clear_prfcnt_values(void)
+/**
+ * gpu_model_clear_prfcnt_values_nolock - Clear performance counter values
+ *
+ * Sets all performance counter values to zero. The performance counter access
+ * lock must be held when calling this function.
+ */
+static void gpu_model_clear_prfcnt_values_nolock(void)
{
- memset(performance_counters.cshw_counters, 0,
- sizeof(performance_counters.cshw_counters));
-
- memset(performance_counters.tiler_counters, 0,
- sizeof(performance_counters.tiler_counters));
-
- memset(performance_counters.l2_counters, 0,
- sizeof(performance_counters.l2_counters));
-
+ lockdep_assert_held(&performance_counters.access_lock);
+#if !MALI_USE_CSF
+ memset(performance_counters.jm_counters, 0, sizeof(performance_counters.jm_counters));
+#else
+ memset(performance_counters.cshw_counters, 0, sizeof(performance_counters.cshw_counters));
+#endif /* !MALI_USE_CSF */
+ memset(performance_counters.tiler_counters, 0, sizeof(performance_counters.tiler_counters));
+ memset(performance_counters.l2_counters, 0, sizeof(performance_counters.l2_counters));
memset(performance_counters.shader_counters, 0,
sizeof(performance_counters.shader_counters));
}
+
+#if MALI_USE_CSF
+void gpu_model_clear_prfcnt_values(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
+ gpu_model_clear_prfcnt_values_nolock();
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+}
KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
-#endif
+#endif /* MALI_USE_CSF */
/**
* gpu_model_dump_prfcnt_blocks() - Dump performance counter values to buffer
@@ -545,17 +600,20 @@ KBASE_EXPORT_TEST_API(gpu_model_clear_prfcnt_values);
* @block_count: Number of blocks to dump
* @prfcnt_enable_mask: Counter enable mask
* @blocks_present: Available blocks bit mask
+ *
+ * The performance counter access lock must be held before calling this
+ * function.
*/
-static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
- u32 block_count,
- u32 prfcnt_enable_mask,
- u64 blocks_present)
+static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index, u32 block_count,
+ u32 prfcnt_enable_mask, u64 blocks_present)
{
u32 block_idx, counter;
u32 counter_value = 0;
u32 *prfcnt_base;
u32 index = 0;
+ lockdep_assert_held(&performance_counters.access_lock);
+
prfcnt_base = performance_counters.prfcnt_base_cpu;
for (block_idx = 0; block_idx < block_count; block_idx++) {
@@ -594,35 +652,18 @@ static void gpu_model_dump_prfcnt_blocks(u64 *values, u32 *out_index,
}
}
-/**
- * gpu_model_sync_dummy_prfcnt() - Synchronize dumped performance counter values
- *
- * Used to ensure counter values are not lost if cache invalidation is performed
- * prior to reading.
- */
-static void gpu_model_sync_dummy_prfcnt(void)
-{
- int i;
- struct page *pg;
-
- for (i = 0; i < performance_counters.page_count; i++) {
- pg = as_page(performance_counters.pages[i]);
- kbase_sync_single_for_device(performance_counters.kbdev,
- kbase_dma_addr(pg), PAGE_SIZE,
- DMA_BIDIRECTIONAL);
- }
-}
-
-static void midgard_model_dump_prfcnt(void)
+static void gpu_model_dump_nolock(void)
{
u32 index = 0;
+ lockdep_assert_held(&performance_counters.access_lock);
+
#if !MALI_USE_CSF
- gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index,
- 1, 0xffffffff, 0x1);
+ gpu_model_dump_prfcnt_blocks(performance_counters.jm_counters, &index, 1,
+ performance_counters.prfcnt_en.fe, 0x1);
#else
- gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index,
- 1, 0xffffffff, 0x1);
+ gpu_model_dump_prfcnt_blocks(performance_counters.cshw_counters, &index, 1,
+ performance_counters.prfcnt_en.fe, 0x1);
#endif /* !MALI_USE_CSF */
gpu_model_dump_prfcnt_blocks(performance_counters.tiler_counters,
&index, 1,
@@ -637,12 +678,48 @@ static void midgard_model_dump_prfcnt(void)
performance_counters.prfcnt_en.shader,
performance_counters.shader_present);
- gpu_model_sync_dummy_prfcnt();
+ /* Counter values are cleared after each dump */
+ gpu_model_clear_prfcnt_values_nolock();
/* simulate a 'long' time between samples */
performance_counters.time += 10;
}
+#if !MALI_USE_CSF
+static void midgard_model_dump_prfcnt(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
+ gpu_model_dump_nolock();
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+}
+#else
+void gpu_model_prfcnt_dump_request(u32 *sample_buf, struct gpu_model_prfcnt_en enable_maps)
+{
+ unsigned long flags;
+
+ if (WARN_ON(!sample_buf))
+ return;
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
+ performance_counters.prfcnt_base_cpu = sample_buf;
+ performance_counters.prfcnt_en = enable_maps;
+ gpu_model_dump_nolock();
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+}
+
+void gpu_model_glb_request_job_irq(void *model)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
+ hw_error_status.job_irq_status |= JOB_IRQ_GLOBAL_IF;
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
+ gpu_device_raise_irq(model, MODEL_LINUX_JOB_IRQ);
+}
+#endif /* !MALI_USE_CSF */
+
static void init_register_statuses(struct dummy_model_t *dummy)
{
int i;
@@ -671,8 +748,10 @@ static void init_register_statuses(struct dummy_model_t *dummy)
performance_counters.time = 0;
}
-static void update_register_statuses(struct dummy_model_t *dummy, int job_slot)
+static void update_register_statuses(struct dummy_model_t *dummy, unsigned int job_slot)
{
+ lockdep_assert_held(&hw_error_status.access_lock);
+
if (hw_error_status.errors_mask & IS_A_JOB_ERROR) {
if (job_slot == hw_error_status.current_job_slot) {
#if !MALI_USE_CSF
@@ -922,6 +1001,7 @@ static void update_job_irq_js_state(struct dummy_model_t *dummy, int mask)
{
int i;
+ lockdep_assert_held(&hw_error_status.access_lock);
pr_debug("%s", "Updating the JS_ACTIVE register");
for (i = 0; i < NUM_SLOTS; i++) {
@@ -967,6 +1047,21 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
size_t i;
const struct control_reg_values_t *ret = NULL;
+ /* Edge case for tGOx, as it has 2 entries in the table for its R0 and R1
+ * revisions respectively. As none of them are named "tGOx" the name comparison
+ * needs to be fixed in these cases. CONFIG_GPU_HWVER should be one of "r0p0"
+ * or "r1p0" and is derived from the DDK's build configuration. In cases
+ * where it is unavailable, it defaults to tGOx r1p0.
+ */
+ if (!strcmp(gpu, "tGOx")) {
+#ifdef CONFIG_GPU_HWVER
+ if (!strcmp(CONFIG_GPU_HWVER, "r0p0"))
+ gpu = "tGOx_r0p0";
+ else if (!strcmp(CONFIG_GPU_HWVER, "r1p0"))
+#endif /* CONFIG_GPU_HWVER defined */
+ gpu = "tGOx_r1p0";
+ }
+
for (i = 0; i < ARRAY_SIZE(all_control_reg_values); ++i) {
const struct control_reg_values_t * const fcrv = &all_control_reg_values[i];
@@ -986,17 +1081,29 @@ static const struct control_reg_values_t *find_control_reg_values(const char *gp
return ret;
}
-void *midgard_model_create(const void *config)
+void *midgard_model_create(struct kbase_device *kbdev)
{
struct dummy_model_t *dummy = NULL;
+ spin_lock_init(&hw_error_status.access_lock);
+ spin_lock_init(&performance_counters.access_lock);
+
dummy = kzalloc(sizeof(*dummy), GFP_KERNEL);
if (dummy) {
dummy->job_irq_js_state = 0;
init_register_statuses(dummy);
dummy->control_reg_values = find_control_reg_values(no_mali_gpu);
+ performance_counters.l2_present = get_implementation_register(
+ GPU_CONTROL_REG(L2_PRESENT_LO), dummy->control_reg_values);
+ performance_counters.shader_present = get_implementation_register(
+ GPU_CONTROL_REG(SHADER_PRESENT_LO), dummy->control_reg_values);
+
+ gpu_device_set_data(dummy, kbdev);
+
+ dev_info(kbdev->dev, "Using Dummy Model");
}
+
return dummy;
}
@@ -1009,18 +1116,24 @@ static void midgard_model_get_outputs(void *h)
{
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+ lockdep_assert_held(&hw_error_status.access_lock);
+
if (hw_error_status.job_irq_status)
- gpu_device_raise_irq(dummy, GPU_DUMMY_JOB_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_JOB_IRQ);
if ((dummy->power_changed && dummy->power_changed_mask) ||
(dummy->reset_completed & dummy->reset_completed_mask) ||
hw_error_status.gpu_error_irq ||
- (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled) ||
- dummy->prfcnt_sample_completed)
- gpu_device_raise_irq(dummy, GPU_DUMMY_GPU_IRQ);
+#if !MALI_USE_CSF
+ dummy->prfcnt_sample_completed ||
+#else
+ (dummy->flush_pa_range_completed && dummy->flush_pa_range_completed_irq_enabled) ||
+#endif
+ (dummy->clean_caches_completed && dummy->clean_caches_completed_irq_enabled))
+ gpu_device_raise_irq(dummy, MODEL_LINUX_GPU_IRQ);
if (hw_error_status.mmu_irq_rawstat & hw_error_status.mmu_irq_mask)
- gpu_device_raise_irq(dummy, GPU_DUMMY_MMU_IRQ);
+ gpu_device_raise_irq(dummy, MODEL_LINUX_MMU_IRQ);
}
static void midgard_model_update(void *h)
@@ -1028,6 +1141,8 @@ static void midgard_model_update(void *h)
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
int i;
+ lockdep_assert_held(&hw_error_status.access_lock);
+
for (i = 0; i < NUM_SLOTS; i++) {
if (!dummy->slots[i].job_active)
continue;
@@ -1074,6 +1189,8 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
{
int i;
+ lockdep_assert_held(&hw_error_status.access_lock);
+
for (i = 0; i < NUM_SLOTS; i++) {
if (dummy->slots[i].job_active) {
hw_error_status.job_irq_rawstat |= (1 << (16 + i));
@@ -1083,13 +1200,17 @@ static void invalidate_active_jobs(struct dummy_model_t *dummy)
}
}
-u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
+void midgard_model_write_reg(void *h, u32 addr, u32 value)
{
+ unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
+
#if !MALI_USE_CSF
if ((addr >= JOB_CONTROL_REG(JOB_SLOT0)) &&
(addr < (JOB_CONTROL_REG(JOB_SLOT15) + 0x80))) {
- int slot_idx = (addr >> 7) & 0xf;
+ unsigned int slot_idx = (addr >> 7) & 0xf;
KBASE_DEBUG_ASSERT(slot_idx < NUM_SLOTS);
if (addr == JOB_SLOT_REG(slot_idx, JS_HEAD_NEXT_LO)) {
@@ -1176,6 +1297,9 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->reset_completed_mask = (value >> 8) & 0x01;
dummy->power_changed_mask = (value >> 9) & 0x03;
dummy->clean_caches_completed_irq_enabled = (value & (1u << 17)) != 0u;
+#if MALI_USE_CSF
+ dummy->flush_pa_range_completed_irq_enabled = (value & (1u << 20)) != 0u;
+#endif
} else if (addr == GPU_CONTROL_REG(COHERENCY_ENABLE)) {
dummy->coherency_enable = value;
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_CLEAR)) {
@@ -1188,8 +1312,16 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
if (value & (1 << 17))
dummy->clean_caches_completed = false;
- if (value & (1 << 16))
+
+#if MALI_USE_CSF
+ if (value & (1u << 20))
+ dummy->flush_pa_range_completed = false;
+#endif /* MALI_USE_CSF */
+
+#if !MALI_USE_CSF
+ if (value & PRFCNT_SAMPLE_COMPLETED) /* (1 << 16) */
dummy->prfcnt_sample_completed = 0;
+#endif /* !MALI_USE_CSF */
/*update error status */
hw_error_status.gpu_error_irq &= ~(value);
@@ -1214,21 +1346,42 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
pr_debug("clean caches requested");
dummy->clean_caches_completed = true;
break;
+#if MALI_USE_CSF
+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2:
+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC:
+ case GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_FULL:
+ pr_debug("pa range flush requested");
+ dummy->flush_pa_range_completed = true;
+ break;
+#endif /* MALI_USE_CSF */
+#if !MALI_USE_CSF
case GPU_COMMAND_PRFCNT_SAMPLE:
midgard_model_dump_prfcnt();
dummy->prfcnt_sample_completed = 1;
+#endif /* !MALI_USE_CSF */
default:
break;
}
+#if MALI_USE_CSF
+ } else if (addr >= GPU_CONTROL_REG(GPU_COMMAND_ARG0_LO) &&
+ addr <= GPU_CONTROL_REG(GPU_COMMAND_ARG1_HI)) {
+ /* Writes ignored */
+#endif
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
dummy->l2_config = value;
}
#if MALI_USE_CSF
- else if (addr >= GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET) &&
- addr < GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET +
- (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE))) {
- if (addr == GPU_CONTROL_REG(CSF_HW_DOORBELL_PAGE_OFFSET))
+ else if (addr >= CSF_HW_DOORBELL_PAGE_OFFSET &&
+ addr < CSF_HW_DOORBELL_PAGE_OFFSET +
+ (CSF_NUM_DOORBELL * CSF_HW_DOORBELL_PAGE_SIZE)) {
+ if (addr == CSF_HW_DOORBELL_PAGE_OFFSET)
hw_error_status.job_irq_status = JOB_IRQ_GLOBAL_IF;
+ } else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
+ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
+ /* Do nothing */
+ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
+ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
+ /* Do nothing */
} else if (addr == IPA_CONTROL_REG(COMMAND)) {
pr_debug("Received IPA_CONTROL command");
} else if (addr == IPA_CONTROL_REG(TIMER)) {
@@ -1249,14 +1402,13 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
}
}
#endif
- else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+ else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) {
hw_error_status.mmu_irq_mask = value;
- } else if (addr == MMU_REG(MMU_IRQ_CLEAR)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_CLEAR)) {
hw_error_status.mmu_irq_rawstat &= (~value);
- } else if ((addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
- (addr <= MMU_AS_REG(15, AS_STATUS))) {
- int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
- >> 6;
+ } else if ((addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) &&
+ (addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS)))) {
+ int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6;
switch (addr & 0x3F) {
case AS_COMMAND:
@@ -1346,20 +1498,24 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
mem_addr_space, addr, value);
break;
}
- } else if (addr >= GPU_CONTROL_REG(PRFCNT_BASE_LO) &&
- addr <= GPU_CONTROL_REG(PRFCNT_MMU_L2_EN)) {
+ } else {
switch (addr) {
+#if !MALI_USE_CSF
case PRFCNT_BASE_LO:
- performance_counters.prfcnt_base |= value;
+ performance_counters.prfcnt_base =
+ HI_MASK(performance_counters.prfcnt_base) | value;
+ performance_counters.prfcnt_base_cpu =
+ (u32 *)(uintptr_t)performance_counters.prfcnt_base;
break;
case PRFCNT_BASE_HI:
- performance_counters.prfcnt_base |= ((u64) value) << 32;
+ performance_counters.prfcnt_base =
+ LO_MASK(performance_counters.prfcnt_base) | (((u64)value) << 32);
+ performance_counters.prfcnt_base_cpu =
+ (u32 *)(uintptr_t)performance_counters.prfcnt_base;
break;
-#if !MALI_USE_CSF
case PRFCNT_JM_EN:
- performance_counters.prfcnt_en.jm = value;
+ performance_counters.prfcnt_en.fe = value;
break;
-#endif /* !MALI_USE_CSF */
case PRFCNT_SHADER_EN:
performance_counters.prfcnt_en.shader = value;
break;
@@ -1369,9 +1525,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
case PRFCNT_MMU_L2_EN:
performance_counters.prfcnt_en.l2 = value;
break;
- }
- } else {
- switch (addr) {
+#endif /* !MALI_USE_CSF */
case TILER_PWRON_LO:
dummy->power_on |= (value & 1) << 1;
/* Also ensure L2 is powered on */
@@ -1379,7 +1533,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->power_changed = 1;
break;
case SHADER_PWRON_LO:
- dummy->power_on |= (value & 0xF) << 2;
+ dummy->power_on |=
+ (value & dummy->control_reg_values->shader_present) << 2;
dummy->power_changed = 1;
break;
case L2_PWRON_LO:
@@ -1395,7 +1550,8 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
dummy->power_changed = 1;
break;
case SHADER_PWROFF_LO:
- dummy->power_on &= ~((value & 0xF) << 2);
+ dummy->power_on &=
+ ~((value & dummy->control_reg_values->shader_present) << 2);
dummy->power_changed = 1;
break;
case L2_PWROFF_LO:
@@ -1416,6 +1572,7 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
case PWR_OVERRIDE0:
#if !MALI_USE_CSF
case JM_CONFIG:
+ case PRFCNT_CONFIG:
#else /* !MALI_USE_CSF */
case CSF_CONFIG:
#endif /* !MALI_USE_CSF */
@@ -1434,13 +1591,16 @@ u8 midgard_model_write_reg(void *h, u32 addr, u32 value)
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
-
- return 1;
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
}
-u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
+void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
{
+ unsigned long flags;
struct dummy_model_t *dummy = (struct dummy_model_t *)h;
+
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
+
*value = 0; /* 0 by default */
#if !MALI_USE_CSF
if (addr == JOB_CONTROL_REG(JOB_IRQ_JS_STATE)) {
@@ -1475,24 +1635,44 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
#endif /* !MALI_USE_CSF */
else if (addr == GPU_CONTROL_REG(GPU_IRQ_MASK)) {
*value = (dummy->reset_completed_mask << 8) |
- (dummy->power_changed_mask << 9) | (1 << 7) | 1;
+ ((dummy->clean_caches_completed_irq_enabled ? 1u : 0u) << 17) |
+#if MALI_USE_CSF
+ ((dummy->flush_pa_range_completed_irq_enabled ? 1u : 0u) << 20) |
+#endif
+ (dummy->power_changed_mask << 9) | (1 << 7) | 1;
pr_debug("GPU_IRQ_MASK read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_RAWSTAT)) {
*value = (dummy->power_changed << 9) | (dummy->power_changed << 10) |
(dummy->reset_completed << 8) |
+#if !MALI_USE_CSF
+ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
+#endif /* !MALI_USE_CSF */
((dummy->clean_caches_completed ? 1u : 0u) << 17) |
- (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
+#if MALI_USE_CSF
+ ((dummy->flush_pa_range_completed ? 1u : 0u) << 20) |
+#endif
+ hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_RAWSTAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_IRQ_STATUS)) {
*value = ((dummy->power_changed && (dummy->power_changed_mask & 0x1)) << 9) |
((dummy->power_changed && (dummy->power_changed_mask & 0x2)) << 10) |
((dummy->reset_completed & dummy->reset_completed_mask) << 8) |
+#if !MALI_USE_CSF
+ (dummy->prfcnt_sample_completed ? PRFCNT_SAMPLE_COMPLETED : 0) |
+#endif /* !MALI_USE_CSF */
(((dummy->clean_caches_completed &&
dummy->clean_caches_completed_irq_enabled) ?
1u :
0u)
<< 17) |
- (dummy->prfcnt_sample_completed << 16) | hw_error_status.gpu_error_irq;
+#if MALI_USE_CSF
+ (((dummy->flush_pa_range_completed &&
+ dummy->flush_pa_range_completed_irq_enabled) ?
+ 1u :
+ 0u)
+ << 20) |
+#endif
+ hw_error_status.gpu_error_irq;
pr_debug("GPU_IRQ_STAT read %x", *value);
} else if (addr == GPU_CONTROL_REG(GPU_STATUS)) {
*value = 0;
@@ -1504,8 +1684,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = hw_error_status.gpu_fault_status;
} else if (addr == GPU_CONTROL_REG(L2_CONFIG)) {
*value = dummy->l2_config;
- } else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
- (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
+ }
+#if MALI_USE_CSF
+ else if ((addr >= GPU_CONTROL_REG(SYSC_ALLOC0)) &&
+ (addr < GPU_CONTROL_REG(SYSC_ALLOC(SYSC_ALLOC_COUNT)))) {
+ *value = 0;
+ } else if ((addr >= GPU_CONTROL_REG(ASN_HASH_0)) &&
+ (addr < GPU_CONTROL_REG(ASN_HASH(ASN_HASH_COUNT)))) {
+ *value = 0;
+ }
+#endif
+ else if ((addr >= GPU_CONTROL_REG(SHADER_PRESENT_LO)) &&
+ (addr <= GPU_CONTROL_REG(L2_MMU_CONFIG))) {
switch (addr) {
case GPU_CONTROL_REG(SHADER_PRESENT_LO):
case GPU_CONTROL_REG(SHADER_PRESENT_HI):
@@ -1515,27 +1705,27 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
case GPU_CONTROL_REG(L2_PRESENT_HI):
case GPU_CONTROL_REG(STACK_PRESENT_LO):
case GPU_CONTROL_REG(STACK_PRESENT_HI):
- *value = get_implementation_register(addr);
+ *value = get_implementation_register(addr, dummy->control_reg_values);
break;
case GPU_CONTROL_REG(SHADER_READY_LO):
*value = (dummy->power_on >> 0x02) &
- get_implementation_register(
- GPU_CONTROL_REG(SHADER_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(SHADER_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(TILER_READY_LO):
*value = (dummy->power_on >> 0x01) &
- get_implementation_register(
- GPU_CONTROL_REG(TILER_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(TILER_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(L2_READY_LO):
*value = dummy->power_on &
- get_implementation_register(
- GPU_CONTROL_REG(L2_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(L2_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(STACK_READY_LO):
*value = dummy->stack_power_on_lo &
- get_implementation_register(
- GPU_CONTROL_REG(STACK_PRESENT_LO));
+ get_implementation_register(GPU_CONTROL_REG(STACK_PRESENT_LO),
+ dummy->control_reg_values);
break;
case GPU_CONTROL_REG(SHADER_READY_HI):
@@ -1729,10 +1919,9 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
} else if (addr >= GPU_CONTROL_REG(CYCLE_COUNT_LO)
&& addr <= GPU_CONTROL_REG(TIMESTAMP_HI)) {
*value = 0;
- } else if (addr >= MMU_AS_REG(0, AS_TRANSTAB_LO)
- && addr <= MMU_AS_REG(15, AS_STATUS)) {
- int mem_addr_space = (addr - MMU_AS_REG(0, AS_TRANSTAB_LO))
- >> 6;
+ } else if (addr >= MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO)) &&
+ addr <= MMU_STAGE1_REG(MMU_AS_REG(15, AS_STATUS))) {
+ int mem_addr_space = (addr - MMU_STAGE1_REG(MMU_AS_REG(0, AS_TRANSTAB_LO))) >> 6;
switch (addr & 0x3F) {
case AS_TRANSTAB_LO:
@@ -1776,11 +1965,11 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = 0;
break;
}
- } else if (addr == MMU_REG(MMU_IRQ_MASK)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_MASK)) {
*value = hw_error_status.mmu_irq_mask;
- } else if (addr == MMU_REG(MMU_IRQ_RAWSTAT)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_RAWSTAT)) {
*value = hw_error_status.mmu_irq_rawstat;
- } else if (addr == MMU_REG(MMU_IRQ_STATUS)) {
+ } else if (addr == MMU_CONTROL_REG(MMU_IRQ_STATUS)) {
*value = hw_error_status.mmu_irq_mask &
hw_error_status.mmu_irq_rawstat;
}
@@ -1788,8 +1977,7 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
else if (addr == IPA_CONTROL_REG(STATUS)) {
*value = (ipa_control_timer_enabled << 31);
} else if ((addr >= IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_CSHW_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_CSHW_REG_LO(0))) >> 3;
bool is_low_word =
@@ -1798,8 +1986,7 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_CSHW,
counter_index, is_low_word);
} else if ((addr >= IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_MEMSYS_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_MEMSYS_REG_LO(0))) >> 3;
bool is_low_word =
@@ -1808,8 +1995,7 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_MEMSYS,
counter_index, is_low_word);
} else if ((addr >= IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_TILER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_TILER_REG_LO(0))) >> 3;
bool is_low_word =
@@ -1818,8 +2004,7 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = gpu_model_get_prfcnt_value(KBASE_IPA_CORE_TYPE_TILER,
counter_index, is_low_word);
} else if ((addr >= IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) &&
- (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(
- IPA_CTL_MAX_VAL_CNT_IDX)))) {
+ (addr <= IPA_CONTROL_REG(VALUE_SHADER_REG_HI(IPA_CTL_MAX_VAL_CNT_IDX)))) {
u32 counter_index =
(addr - IPA_CONTROL_REG(VALUE_SHADER_REG_LO(0))) >> 3;
bool is_low_word =
@@ -1840,18 +2025,18 @@ u8 midgard_model_read_reg(void *h, u32 addr, u32 * const value)
*value = 0;
}
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
CSTD_UNUSED(dummy);
-
- return 1;
}
-static u32 set_user_sample_core_type(u64 *counters,
- u32 *usr_data_start, u32 usr_data_offset,
- u32 usr_data_size, u32 core_count)
+static u32 set_user_sample_core_type(u64 *counters, u32 *usr_data_start, u32 usr_data_offset,
+ u32 usr_data_size, u32 core_count)
{
u32 sample_size;
u32 *usr_data = NULL;
+ lockdep_assert_held(&performance_counters.access_lock);
+
sample_size =
core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u32);
@@ -1866,11 +2051,7 @@ static u32 set_user_sample_core_type(u64 *counters,
u32 i;
for (i = 0; i < loop_cnt; i++) {
- if (copy_from_user(&counters[i], &usr_data[i],
- sizeof(u32))) {
- model_error_log(KBASE_CORE, "Unable to set counter sample 2");
- break;
- }
+ counters[i] = usr_data[i];
}
}
@@ -1884,6 +2065,8 @@ static u32 set_kernel_sample_core_type(u64 *counters,
u32 sample_size;
u64 *usr_data = NULL;
+ lockdep_assert_held(&performance_counters.access_lock);
+
sample_size =
core_count * KBASE_DUMMY_MODEL_COUNTER_PER_CORE * sizeof(u64);
@@ -1900,49 +2083,70 @@ static u32 set_kernel_sample_core_type(u64 *counters,
}
/* Counter values injected through ioctl are of 32 bits */
-void gpu_model_set_dummy_prfcnt_sample(u32 *usr_data, u32 usr_data_size)
+int gpu_model_set_dummy_prfcnt_user_sample(u32 __user *data, u32 size)
{
+ unsigned long flags;
+ u32 *user_data;
u32 offset = 0;
+ if (data == NULL || size == 0 || size > KBASE_DUMMY_MODEL_COUNTER_TOTAL * sizeof(u32))
+ return -EINVAL;
+
+ /* copy_from_user might sleep so can't be called from inside a spinlock
+ * allocate a temporary buffer for user data and copy to that before taking
+ * the lock
+ */
+ user_data = kmalloc(size, GFP_KERNEL);
+ if (!user_data)
+ return -ENOMEM;
+
+ if (copy_from_user(user_data, data, size)) {
+ model_error_log(KBASE_CORE, "Unable to copy prfcnt data from userspace");
+ kfree(user_data);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
- offset = set_user_sample_core_type(performance_counters.jm_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_user_sample_core_type(performance_counters.jm_counters, user_data, offset,
+ size, 1);
#else
- offset = set_user_sample_core_type(performance_counters.cshw_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_user_sample_core_type(performance_counters.cshw_counters, user_data, offset,
+ size, 1);
#endif /* !MALI_USE_CSF */
- offset = set_user_sample_core_type(performance_counters.tiler_counters,
- usr_data, offset, usr_data_size,
- hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
- offset = set_user_sample_core_type(performance_counters.l2_counters,
- usr_data, offset, usr_data_size,
- KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
- offset = set_user_sample_core_type(performance_counters.shader_counters,
- usr_data, offset, usr_data_size,
- KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
+ offset = set_user_sample_core_type(performance_counters.tiler_counters, user_data, offset,
+ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_user_sample_core_type(performance_counters.l2_counters, user_data, offset,
+ size, KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS);
+ offset = set_user_sample_core_type(performance_counters.shader_counters, user_data, offset,
+ size, KBASE_DUMMY_MODEL_MAX_SHADER_CORES);
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
+
+ kfree(user_data);
+ return 0;
}
/* Counter values injected through kutf are of 64 bits */
-void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *usr_data, u32 usr_data_size)
+void gpu_model_set_dummy_prfcnt_kernel_sample(u64 *data, u32 size)
{
+ unsigned long flags;
u32 offset = 0;
+ spin_lock_irqsave(&performance_counters.access_lock, flags);
#if !MALI_USE_CSF
- offset = set_kernel_sample_core_type(performance_counters.jm_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_kernel_sample_core_type(performance_counters.jm_counters, data, offset, size,
+ 1);
#else
- offset = set_kernel_sample_core_type(performance_counters.cshw_counters,
- usr_data, offset, usr_data_size, 1);
+ offset = set_kernel_sample_core_type(performance_counters.cshw_counters, data, offset, size,
+ 1);
#endif /* !MALI_USE_CSF */
- offset = set_kernel_sample_core_type(performance_counters.tiler_counters,
- usr_data, offset, usr_data_size,
- hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
- offset = set_kernel_sample_core_type(performance_counters.l2_counters,
- usr_data, offset, usr_data_size,
- hweight64(performance_counters.l2_present));
- offset = set_kernel_sample_core_type(performance_counters.shader_counters,
- usr_data, offset, usr_data_size,
- hweight64(performance_counters.shader_present));
+ offset = set_kernel_sample_core_type(performance_counters.tiler_counters, data, offset,
+ size, hweight64(DUMMY_IMPLEMENTATION_TILER_PRESENT));
+ offset = set_kernel_sample_core_type(performance_counters.l2_counters, data, offset, size,
+ hweight64(performance_counters.l2_present));
+ offset = set_kernel_sample_core_type(performance_counters.shader_counters, data, offset,
+ size, hweight64(performance_counters.shader_present));
+ spin_unlock_irqrestore(&performance_counters.access_lock, flags);
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_kernel_sample);
@@ -1977,21 +2181,12 @@ void gpu_model_set_dummy_prfcnt_cores(struct kbase_device *kbdev,
}
KBASE_EXPORT_TEST_API(gpu_model_set_dummy_prfcnt_cores);
-void gpu_model_set_dummy_prfcnt_base_cpu(u32 *base, struct kbase_device *kbdev,
- struct tagged_addr *pages,
- size_t page_count)
-{
- performance_counters.prfcnt_base_cpu = base;
- performance_counters.kbdev = kbdev;
- performance_counters.pages = pages;
- performance_counters.page_count = page_count;
-}
-
int gpu_model_control(void *model,
struct kbase_model_control_params *params)
{
struct dummy_model_t *dummy = (struct dummy_model_t *)model;
int i;
+ unsigned long flags;
if (params->command == KBASE_MC_DISABLE_JOBS) {
for (i = 0; i < NUM_SLOTS; i++)
@@ -2000,8 +2195,10 @@ int gpu_model_control(void *model,
return -EINVAL;
}
+ spin_lock_irqsave(&hw_error_status.access_lock, flags);
midgard_model_update(dummy);
midgard_model_get_outputs(dummy);
+ spin_unlock_irqrestore(&hw_error_status.access_lock, flags);
return 0;
}