1 files changed, 240 insertions, 31 deletions
diff --git a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
index de3babe..69e8dd3 100644
--- a/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
+++ b/dvalin/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_pm_metrics.c
@@ -1,11 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
  * Foundation, and any use by you of this program is subject to the terms
- * of such GNU licence.
+ * of such GNU license.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -16,12 +17,8 @@
  * along with this program; if not, you can access it online at
  * http://www.gnu.org/licenses/gpl-2.0.html.
  *
- * SPDX-License-Identifier: GPL-2.0
- *
  */
 
-
-
 /*
  * Metrics for power management
  */
@@ -29,22 +26,28 @@
 #include <mali_kbase.h>
 #include <mali_kbase_pm.h>
 #include <backend/gpu/mali_kbase_pm_internal.h>
+
+#if MALI_USE_CSF
+#include "backend/gpu/mali_kbase_clk_rate_trace_mgr.h"
+#include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
+#else
 #include <backend/gpu/mali_kbase_jm_rb.h>
+#endif /* !MALI_USE_CSF */
+
 #include <backend/gpu/mali_kbase_pm_defs.h>
 #include <mali_linux_trace.h>
 
-/* When VSync is being hit aim for utilisation between 70-90% */
-#define KBASE_PM_VSYNC_MIN_UTILISATION          70
-#define KBASE_PM_VSYNC_MAX_UTILISATION          90
-/* Otherwise aim for 10-40% */
-#define KBASE_PM_NO_VSYNC_MIN_UTILISATION       10
-#define KBASE_PM_NO_VSYNC_MAX_UTILISATION       40
-
 /* Shift used for kbasep_pm_metrics_data.time_busy/idle - units of (1 << 8) ns
  * This gives a maximum period between samples of 2^(32+8)/100 ns = slightly
- * under 11s. Exceeding this will cause overflow */
+ * under 11s. Exceeding this will cause overflow
+ */
 #define KBASE_PM_TIME_SHIFT			8
 
+#if MALI_USE_CSF
+/* To get the GPU_ACTIVE value in nano seconds unit */
+#define GPU_ACTIVE_SCALING_FACTOR ((u64)1E9)
+#endif
+
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
 {
@@ -71,11 +74,45 @@ static enum hrtimer_restart dvfs_callback(struct hrtimer *timer)
 
 int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 {
-	KBASE_DEBUG_ASSERT(kbdev != NULL);
+#if MALI_USE_CSF
+	struct kbase_ipa_control_perf_counter perf_counter;
+	int err;
 
+	/* One counter group */
+	const size_t NUM_PERF_COUNTERS = 1;
+
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
 	kbdev->pm.backend.metrics.kbdev = kbdev;
+	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+	kbdev->pm.backend.metrics.values.time_busy = 0;
+	kbdev->pm.backend.metrics.values.time_idle = 0;
+	kbdev->pm.backend.metrics.values.time_in_protm = 0;
+
+	perf_counter.scaling_factor = GPU_ACTIVE_SCALING_FACTOR;
 
+	/* Normalize values by GPU frequency */
+	perf_counter.gpu_norm = true;
+
+	/* We need the GPU_ACTIVE counter, which is in the CSHW group */
+	perf_counter.type = KBASE_IPA_CORE_TYPE_CSHW;
+
+	/* We need the GPU_ACTIVE counter */
+	perf_counter.idx = GPU_ACTIVE_CNT_IDX;
+
+	err = kbase_ipa_control_register(
+		kbdev, &perf_counter, NUM_PERF_COUNTERS,
+		&kbdev->pm.backend.metrics.ipa_control_client);
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to register IPA with kbase_ipa_control: err=%d",
+			err);
+		return -1;
+	}
+#else
+	KBASE_DEBUG_ASSERT(kbdev != NULL);
+	kbdev->pm.backend.metrics.kbdev = kbdev;
 	kbdev->pm.backend.metrics.time_period_start = ktime_get();
+
 	kbdev->pm.backend.metrics.gpu_active = false;
 	kbdev->pm.backend.metrics.active_cl_ctx[0] = 0;
 	kbdev->pm.backend.metrics.active_cl_ctx[1] = 0;
@@ -89,16 +126,25 @@ int kbasep_pm_metrics_init(struct kbase_device *kbdev)
 	kbdev->pm.backend.metrics.values.busy_cl[1] = 0;
 	kbdev->pm.backend.metrics.values.busy_gl = 0;
 
+#endif
 	spin_lock_init(&kbdev->pm.backend.metrics.lock);
 
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 	hrtimer_init(&kbdev->pm.backend.metrics.timer, CLOCK_MONOTONIC,
 							HRTIMER_MODE_REL);
 	kbdev->pm.backend.metrics.timer.function = dvfs_callback;
-
+	kbdev->pm.backend.metrics.initialized = true;
 	kbase_pm_metrics_start(kbdev);
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
 
+#if MALI_USE_CSF
+	/* The sanity check on the GPU_ACTIVE performance counter
+	 * is skipped for Juno platforms that have timing problems.
+	 */
+	kbdev->pm.backend.metrics.skip_gpu_active_sanity_check =
+		of_machine_is_compatible("arm,juno");
+#endif
+
 	return 0;
 }
 KBASE_EXPORT_TEST_API(kbasep_pm_metrics_init);
@@ -115,7 +161,13 @@ void kbasep_pm_metrics_term(struct kbase_device *kbdev)
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
 
 	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+	kbdev->pm.backend.metrics.initialized = false;
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
+
+#if MALI_USE_CSF
+	kbase_ipa_control_unregister(
+		kbdev, kbdev->pm.backend.metrics.ipa_control_client);
+#endif
 }
 
 KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
@@ -123,8 +175,117 @@ KBASE_EXPORT_TEST_API(kbasep_pm_metrics_term);
 /* caller needs to hold kbdev->pm.backend.metrics.lock before calling this
  * function
  */
+#if MALI_USE_CSF
+#if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
+static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev)
+{
+	int err;
+	u64 gpu_active_counter;
+	u64 protected_time;
+	ktime_t now;
+
+	lockdep_assert_held(&kbdev->pm.backend.metrics.lock);
+
+	/* Query IPA_CONTROL for the latest GPU-active and protected-time
+	 * info.
+	 */
+	err = kbase_ipa_control_query(
+		kbdev, kbdev->pm.backend.metrics.ipa_control_client,
+		&gpu_active_counter, 1, &protected_time);
+
+	/* Read the timestamp after reading the GPU_ACTIVE counter value.
+	 * This ensures the time gap between the 2 reads is consistent for
+	 * a meaningful comparison between the increment of GPU_ACTIVE and
+	 * elapsed time. The lock taken inside kbase_ipa_control_query()
+	 * function can cause lot of variation.
+	 */
+	now = ktime_get();
+
+	if (err) {
+		dev_err(kbdev->dev,
+			"Failed to query the increment of GPU_ACTIVE counter: err=%d",
+			err);
+	} else {
+		u64 diff_ns;
+		s64 diff_ns_signed;
+		u32 ns_time;
+		ktime_t diff = ktime_sub(
+			now, kbdev->pm.backend.metrics.time_period_start);
+
+		diff_ns_signed = ktime_to_ns(diff);
+
+		if (diff_ns_signed < 0)
+			return;
+
+		diff_ns = (u64)diff_ns_signed;
+
+#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
+		/* The GPU_ACTIVE counter shouldn't clock-up more time than has
+		 * actually elapsed - but still some margin needs to be given
+		 * when doing the comparison. There could be some drift between
+		 * the CPU and GPU clock.
+		 *
+		 * Can do the check only in a real driver build, as an arbitrary
+		 * value for GPU_ACTIVE can be fed into dummy model in no_mali
+		 * configuration which may not correspond to the real elapsed
+		 * time.
+		 */
+		if (!kbdev->pm.backend.metrics.skip_gpu_active_sanity_check) {
+			/* Use a margin value that is approximately 1% of the time
+			 * difference.
+			 */
+			u64 margin_ns = diff_ns >> 6;
+			if (gpu_active_counter > (diff_ns + margin_ns)) {
+				dev_info(
+					kbdev->dev,
+					"GPU activity takes longer than time interval: %llu ns > %llu ns",
+					(unsigned long long)gpu_active_counter,
+					(unsigned long long)diff_ns);
+			}
+		}
+#endif
+		/* Calculate time difference in units of 256ns */
+		ns_time = (u32)(diff_ns >> KBASE_PM_TIME_SHIFT);
+
+		/* Add protected_time to gpu_active_counter so that time in
+		 * protected mode is included in the apparent GPU active time,
+		 * then convert it from units of 1ns to units of 256ns, to
+		 * match what JM GPUs use. The assumption is made here that the
+		 * GPU is 100% busy while in protected mode, so we should add
+		 * this since the GPU can't (and thus won't) update these
+		 * counters while it's actually in protected mode.
+		 *
+		 * Perform the add after dividing each value down, to reduce
+		 * the chances of overflows.
+		 */
+		protected_time >>= KBASE_PM_TIME_SHIFT;
+		gpu_active_counter >>= KBASE_PM_TIME_SHIFT;
+		gpu_active_counter += protected_time;
+
+		/* Ensure the following equations don't go wrong if ns_time is
+		 * slightly larger than gpu_active_counter somehow
+		 */
+		gpu_active_counter = MIN(gpu_active_counter, ns_time);
+
+		kbdev->pm.backend.metrics.values.time_busy +=
+			gpu_active_counter;
+
+		kbdev->pm.backend.metrics.values.time_idle +=
+			ns_time - gpu_active_counter;
+
+		/* Also make time in protected mode available explicitly,
+		 * so users of this data have this info, too.
+		 */
+		kbdev->pm.backend.metrics.values.time_in_protm +=
+			protected_time;
+	}
+
+	kbdev->pm.backend.metrics.time_period_start = now;
+}
+#endif /* defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS) */
+#else
 static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
-								ktime_t now)
+					       ktime_t now)
 {
 	ktime_t diff;
 
@@ -149,12 +310,13 @@ static void kbase_pm_get_dvfs_utilisation_calc(struct kbase_device *kbdev,
 		if (kbdev->pm.backend.metrics.active_gl_ctx[2])
 			kbdev->pm.backend.metrics.values.busy_gl += ns_time;
 	} else {
-		kbdev->pm.backend.metrics.values.time_idle += (u32) (ktime_to_ns(diff)
-							>> KBASE_PM_TIME_SHIFT);
+		kbdev->pm.backend.metrics.values.time_idle +=
+			(u32)(ktime_to_ns(diff) >> KBASE_PM_TIME_SHIFT);
 	}
 
 	kbdev->pm.backend.metrics.time_period_start = now;
 }
+#endif  /* MALI_USE_CSF */
 
 #if defined(CONFIG_MALI_DEVFREQ) || defined(CONFIG_MALI_MIDGARD_DVFS)
 void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
@@ -165,14 +327,23 @@ void kbase_pm_get_dvfs_metrics(struct kbase_device *kbdev,
 	unsigned long flags;
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
+#if MALI_USE_CSF
+	kbase_pm_get_dvfs_utilisation_calc(kbdev);
+#else
 	kbase_pm_get_dvfs_utilisation_calc(kbdev, ktime_get());
+#endif
 
 	memset(diff, 0, sizeof(*diff));
 	diff->time_busy = cur->time_busy - last->time_busy;
 	diff->time_idle = cur->time_idle - last->time_idle;
+
+#if MALI_USE_CSF
+	diff->time_in_protm = cur->time_in_protm - last->time_in_protm;
+#else
 	diff->busy_cl[0] = cur->busy_cl[0] - last->busy_cl[0];
 	diff->busy_cl[1] = cur->busy_cl[1] - last->busy_cl[1];
 	diff->busy_gl = cur->busy_gl - last->busy_gl;
+#endif
 
 	*last = *cur;
 
@@ -184,26 +355,42 @@ KBASE_EXPORT_TEST_API(kbase_pm_get_dvfs_metrics);
 #ifdef CONFIG_MALI_MIDGARD_DVFS
 void kbase_pm_get_dvfs_action(struct kbase_device *kbdev)
 {
-	int utilisation, util_gl_share;
-	int util_cl_share[2];
-	int busy;
+	int utilisation;
 	struct kbasep_pm_metrics *diff;
+#if !MALI_USE_CSF
+	int busy;
+	int util_gl_share;
+	int util_cl_share[2];
+#endif
 
 	KBASE_DEBUG_ASSERT(kbdev != NULL);
 
 	diff = &kbdev->pm.backend.metrics.dvfs_diff;
 
-	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last, diff);
+	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->pm.backend.metrics.dvfs_last,
+				  diff);
 
 	utilisation = (100 * diff->time_busy) /
 			max(diff->time_busy + diff->time_idle, 1u);
 
+#if !MALI_USE_CSF
 	busy = max(diff->busy_gl + diff->busy_cl[0] + diff->busy_cl[1], 1u);
+
 	util_gl_share = (100 * diff->busy_gl) / busy;
 	util_cl_share[0] = (100 * diff->busy_cl[0]) / busy;
 	util_cl_share[1] = (100 * diff->busy_cl[1]) / busy;
 
-	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share, util_cl_share);
+	kbase_platform_dvfs_event(kbdev, utilisation, util_gl_share,
+				  util_cl_share);
+#else
+	/* Note that, at present, we don't pass protected-mode time to the
+	 * platform here. It's unlikely to be useful, however, as the platform
+	 * probably just cares whether the GPU is busy or not; time in
+	 * protected mode is already added to busy-time at this point, though,
+	 * so we should be good.
+	 */
+	kbase_platform_dvfs_event(kbdev, utilisation);
+#endif
 }
 
 bool kbase_pm_metrics_is_active(struct kbase_device *kbdev)
@@ -224,11 +411,20 @@ KBASE_EXPORT_TEST_API(kbase_pm_metrics_is_active);
 void kbase_pm_metrics_start(struct kbase_device *kbdev)
 {
 	unsigned long flags;
+	bool update = true;
+
+	if (unlikely(!kbdev->pm.backend.metrics.initialized))
+		return;
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
-	kbdev->pm.backend.metrics.timer_active = true;
+	if (!kbdev->pm.backend.metrics.timer_active)
+		kbdev->pm.backend.metrics.timer_active = true;
+	else
+		update = false;
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-	hrtimer_start(&kbdev->pm.backend.metrics.timer,
+
+	if (update)
+		hrtimer_start(&kbdev->pm.backend.metrics.timer,
 			HR_TIMER_DELAY_MSEC(kbdev->pm.dvfs_period),
 			HRTIMER_MODE_REL);
 }
@@ -236,16 +432,26 @@ void kbase_pm_metrics_start(struct kbase_device *kbdev)
 void kbase_pm_metrics_stop(struct kbase_device *kbdev)
 {
 	unsigned long flags;
+	bool update = true;
+
+	if (unlikely(!kbdev->pm.backend.metrics.initialized))
+		return;
 
 	spin_lock_irqsave(&kbdev->pm.backend.metrics.lock, flags);
-	kbdev->pm.backend.metrics.timer_active = false;
+	if (kbdev->pm.backend.metrics.timer_active)
+		kbdev->pm.backend.metrics.timer_active = false;
+	else
+		update = false;
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
-	hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
+
+	if (update)
+		hrtimer_cancel(&kbdev->pm.backend.metrics.timer);
 }
 
 
 #endif /* CONFIG_MALI_MIDGARD_DVFS */
 
+#if !MALI_USE_CSF
 /**
  * kbase_pm_metrics_active_calc - Update PM active counts based on currently
  *                                running atoms
@@ -270,7 +476,8 @@ static void kbase_pm_metrics_active_calc(struct kbase_device *kbdev)
 		struct kbase_jd_atom *katom = kbase_gpu_inspect(kbdev, js, 0);
 
 		/* Head atom may have just completed, so if it isn't running
-		 * then try the next atom */
+		 * then try the next atom
+		 */
 		if (katom && katom->gpu_rb_state != KBASE_ATOM_GPU_RB_SUBMITTED)
 			katom = kbase_gpu_inspect(kbdev, js, 1);
 
@@ -309,10 +516,12 @@ void kbase_pm_metrics_update(struct kbase_device *kbdev, ktime_t *timestamp)
 		timestamp = &now;
 	}
 
-	/* Track how long CL and/or GL jobs have been busy for */
+	/* Track how much of time has been spent busy or idle. For JM GPUs,
+	 * this also evaluates how long CL and/or GL jobs have been busy for.
+	 */
 	kbase_pm_get_dvfs_utilisation_calc(kbdev, *timestamp);
 
 	kbase_pm_metrics_active_calc(kbdev);
-
 	spin_unlock_irqrestore(&kbdev->pm.backend.metrics.lock, flags);
 }
+#endif /* !MALI_USE_CSF */