mali_kbase/tl/backend/mali_kbase_timeline_csf.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
 *
 * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU license.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, you can access it online at
 * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 */

#include <tl/mali_kbase_tracepoints.h>
#include <tl/mali_kbase_timeline.h>
#include <tl/mali_kbase_timeline_priv.h>

#include <mali_kbase.h>

#define GPU_FEATURES_CROSS_STREAM_SYNC_MASK (1ull << 3ull)

void kbase_create_timeline_objects(struct kbase_device *kbdev)
{
	unsigned int as_nr;
	unsigned int slot_i;
	struct kbase_context *kctx;
	struct kbase_timeline *timeline = kbdev->timeline;
	struct kbase_tlstream *summary =
		&kbdev->timeline->streams[TL_STREAM_TYPE_OBJ_SUMMARY];
	u32 const kbdev_has_cross_stream_sync =
		(kbdev->gpu_props.props.raw_props.gpu_features &
		 GPU_FEATURES_CROSS_STREAM_SYNC_MASK) ?
			1 :
			0;
	u32 const arch_maj = (kbdev->gpu_props.props.raw_props.gpu_id &
			      GPU_ID2_ARCH_MAJOR) >>
			     GPU_ID2_ARCH_MAJOR_SHIFT;
	u32 const num_sb_entries = arch_maj >= 11 ? 16 : 8;
	u32 const supports_gpu_sleep =
#ifdef KBASE_PM_RUNTIME
		kbdev->pm.backend.gpu_sleep_supported;
#else
		false;
#endif /* KBASE_PM_RUNTIME */

	/* Summarize the Address Space objects. */
	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
		__kbase_tlstream_tl_new_as(summary, &kbdev->as[as_nr], as_nr);

	/* Create Legacy GPU object to track in AOM for dumping */
	__kbase_tlstream_tl_new_gpu(summary,
			kbdev,
			kbdev->gpu_props.props.raw_props.gpu_id,
			kbdev->gpu_props.num_cores);


	for (as_nr = 0; as_nr < kbdev->nr_hw_address_spaces; as_nr++)
		__kbase_tlstream_tl_lifelink_as_gpu(summary,
				&kbdev->as[as_nr],
				kbdev);

	/* Trace the creation of a new kbase device and set its properties. */
	__kbase_tlstream_tl_kbase_new_device(summary, kbdev->gpu_props.props.raw_props.gpu_id,
					     kbdev->gpu_props.num_cores,
					     kbdev->csf.global_iface.group_num,
					     kbdev->nr_hw_address_spaces, num_sb_entries,
					     kbdev_has_cross_stream_sync, supports_gpu_sleep);

	/* Lock the context list, to ensure no changes to the list are made
	 * while we're summarizing the contexts and their contents.
	 */
	mutex_lock(&timeline->tl_kctx_list_lock);

	/* Hold the scheduler lock while we emit the current state
	 * We also need to continue holding the lock until after the first body
	 * stream tracepoints are emitted to ensure we don't change the
	 * scheduler until after then
	 */
	mutex_lock(&kbdev->csf.scheduler.lock);

	for (slot_i = 0; slot_i < kbdev->csf.global_iface.group_num; slot_i++) {

		struct kbase_queue_group *group =
			kbdev->csf.scheduler.csg_slots[slot_i].resident_group;

		if (group)
			__kbase_tlstream_tl_kbase_device_program_csg(
				summary,
				kbdev->gpu_props.props.raw_props.gpu_id,
				group->kctx->id, group->handle, slot_i, 0);
	}

	/* Reset body stream buffers while holding the kctx lock.
	 * As we are holding the lock, we can guarantee that no kctx creation or
	 * deletion tracepoints can be fired from outside of this function by
	 * some other thread.
	 */
	kbase_timeline_streams_body_reset(timeline);

	mutex_unlock(&kbdev->csf.scheduler.lock);

	/* For each context in the device... */
	list_for_each_entry(kctx, &timeline->tl_kctx_list, tl_kctx_list_node) {
		size_t i;
		struct kbase_tlstream *body =
			&timeline->streams[TL_STREAM_TYPE_OBJ];

		/* Lock the context's KCPU queues, to ensure no KCPU-queue
		 * related actions can occur in this context from now on.
		 */
		mutex_lock(&kctx->csf.kcpu_queues.lock);

		/* Acquire the MMU lock, to ensure we don't get a concurrent
		 * address space assignment while summarizing this context's
		 * address space.
		 */
		mutex_lock(&kbdev->mmu_hw_mutex);

		/* Trace the context itself into the body stream, not the
		 * summary stream.
		 * We place this in the body to ensure it is ordered after any
		 * other tracepoints related to the contents of the context that
		 * might have been fired before acquiring all of the per-context
		 * locks.
		 * This ensures that those tracepoints will not actually affect
		 * the object model state, as they reference a context that
		 * hasn't been traced yet. They may, however, cause benign
		 * errors to be emitted.
		 */
		__kbase_tlstream_tl_kbase_new_ctx(body, kctx->id,
				kbdev->gpu_props.props.raw_props.gpu_id);

		/* Also trace with the legacy AOM tracepoint for dumping */
		__kbase_tlstream_tl_new_ctx(body,
				kctx,
				kctx->id,
				(u32)(kctx->tgid));

		/* Trace the currently assigned address space */
		if (kctx->as_nr != KBASEP_AS_NR_INVALID)
			__kbase_tlstream_tl_kbase_ctx_assign_as(body, kctx->id,
				kctx->as_nr);


		/* Trace all KCPU queues in the context into the body stream.
		 * As we acquired the KCPU lock after resetting the body stream,
		 * it's possible that some KCPU-related events for this context
		 * occurred between that reset and now.
		 * These will cause errors to be emitted when parsing the
		 * timeline, but they will not affect the correctness of the
		 * object model.
		 */
		for (i = 0; i < KBASEP_MAX_KCPU_QUEUES; i++) {
			const struct kbase_kcpu_command_queue *kcpu_queue =
				kctx->csf.kcpu_queues.array[i];

			if (kcpu_queue)
				__kbase_tlstream_tl_kbase_new_kcpuqueue(
					body, kcpu_queue, kcpu_queue->id, kcpu_queue->kctx->id,
					kcpu_queue->num_pending_cmds);
		}

		mutex_unlock(&kbdev->mmu_hw_mutex);
		mutex_unlock(&kctx->csf.kcpu_queues.lock);

		/* Now that all per-context locks for this context have been
		 * released, any per-context tracepoints that are fired from
		 * any other threads will go into the body stream after
		 * everything that was just summarised into the body stream in
		 * this iteration of the loop, so will start to correctly update
		 * the object model state.
		 */
	}

	mutex_unlock(&timeline->tl_kctx_list_lock);

	/* Static object are placed into summary packet that needs to be
	 * transmitted first. Flush all streams to make it available to
	 * user space.
	 */
	kbase_timeline_streams_flush(timeline);
}