summaryrefslogtreecommitdiff
path: root/mali_kbase/thirdparty/mali_kbase_mmap.c
blob: 1e636b9a775960faa7c604f79b7cbf66a67713fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
/*
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU licence.
 *
 * A copy of the licence is included with the program, and can also be obtained
 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA  02110-1301, USA.
 */

#include "linux/mman.h"
#include <linux/version_compat_defs.h>
#include <mali_kbase.h>

/* mali_kbase_mmap.c
 *
 * This file contains Linux specific implementation of
 * kbase_context_get_unmapped_area() interface.
 */


/**
 * align_and_check() - Align the specified pointer to the provided alignment and
 *                     check that it is still in range.
 * @gap_end:        Highest possible start address for allocation (end of gap in
 *                  address space)
 * @gap_start:      Start address of current memory area / gap in address space
 * @info:           vm_unmapped_area_info structure passed to caller, containing
 *                  alignment, length and limits for the allocation
 * @is_shader_code: True if the allocation is for shader code (which has
 *                  additional alignment requirements)
 * @is_same_4gb_page: True if the allocation needs to reside completely within
 *                    a 4GB chunk
 *
 * Return: true if gap_end is now aligned correctly and is still in range,
 *         false otherwise
 */
static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
		struct vm_unmapped_area_info *info, bool is_shader_code,
		bool is_same_4gb_page)
{
	/* Compute highest gap address at the desired alignment */
	(*gap_end) -= info->length;
	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;

	if (is_shader_code) {
		/* Check for 4GB boundary */
		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
			(*gap_end) -= (info->align_offset ? info->align_offset :
					info->length);
		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
			(*gap_end) -= (info->align_offset ? info->align_offset :
					info->length);

		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
				info->length) & BASE_MEM_MASK_4GB))
			return false;
	} else if (is_same_4gb_page) {
		unsigned long start = *gap_end;
		unsigned long end = *gap_end + info->length;
		unsigned long mask = ~((unsigned long)U32_MAX);

		/* Check if 4GB boundary is straddled */
		if ((start & mask) != ((end - 1) & mask)) {
			unsigned long offset = end - (end & mask);
			/* This is to ensure that alignment doesn't get
			 * disturbed in an attempt to prevent straddling at
			 * 4GB boundary. The GPU VA is aligned to 2MB when the
			 * allocation size is > 2MB and there is enough CPU &
			 * GPU virtual space.
			 */
			unsigned long rounded_offset =
					ALIGN(offset, info->align_mask + 1);

			start -= rounded_offset;
			end -= rounded_offset;

			*gap_end = start;

			/* The preceding 4GB boundary shall not get straddled,
			 * even after accounting for the alignment, as the
			 * size of allocation is limited to 4GB and the initial
			 * start location was already aligned.
			 */
			WARN_ON((start & mask) != ((end - 1) & mask));
		}
	}


	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
		return false;

	return true;
}

/**
 * kbase_unmapped_area_topdown() - allocates new areas top-down from
 *                                 below the stack limit.
 * @info:              Information about the memory area to allocate.
 * @is_shader_code:    Boolean which denotes whether the allocated area is
 *                      intended for the use by shader core in which case a
 *                      special alignment requirements apply.
 * @is_same_4gb_page: Boolean which indicates whether the allocated area needs
 *                    to reside completely within a 4GB chunk.
 *
 * The unmapped_area_topdown() function in the Linux kernel is not exported
 * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
 * module and also make use of the fact that some of the requirements for
 * the unmapped area are known in advance, we implemented an extended version
 * of this function and prefixed it with 'kbase_'.
 *
 * The difference in the call parameter list comes from the fact that
 * kbase_unmapped_area_topdown() is called with additional parameters which
 * are provided to indicate whether the allocation is for a shader core memory,
 * which has additional alignment requirements, and whether the allocation can
 * straddle a 4GB boundary.
 *
 * The modification of the original Linux function lies in how the computation
 * of the highest gap address at the desired alignment is performed once the
 * gap with desirable properties is found. For this purpose a special function
 * is introduced (@ref align_and_check()) which beside computing the gap end
 * at the desired alignment also performs additional alignment checks for the
 * case when the memory is executable shader core memory, for which it is
 * ensured that the gap does not end on a 4GB boundary, and for the case when
 * memory needs to be confined within a 4GB chunk.
 *
 * Return: address of the found gap end (high limit) if area is found;
 *         -ENOMEM if search is unsuccessful
 */

static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
		*info, bool is_shader_code, bool is_same_4gb_page)
{
#if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE)
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	unsigned long length, low_limit, high_limit, gap_start, gap_end;

	/* Adjust search length to account for worst case alignment overhead */
	length = info->length + info->align_mask;
	if (length < info->length)
		return -ENOMEM;

	/*
	 * Adjust search limits by the desired length.
	 * See implementation comment at top of unmapped_area().
	 */
	gap_end = info->high_limit;
	if (gap_end < length)
		return -ENOMEM;
	high_limit = gap_end - length;

	if (info->low_limit > high_limit)
		return -ENOMEM;
	low_limit = info->low_limit + length;

	/* Check highest gap, which does not precede any rbtree node */
	gap_start = mm->highest_vm_end;
	if (gap_start <= high_limit) {
		if (align_and_check(&gap_end, gap_start, info,
				is_shader_code, is_same_4gb_page))
			return gap_end;
	}

	/* Check if rbtree root looks promising */
	if (RB_EMPTY_ROOT(&mm->mm_rb))
		return -ENOMEM;
	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
	if (vma->rb_subtree_gap < length)
		return -ENOMEM;

	while (true) {
		/* Visit right subtree if it looks promising */
		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
			struct vm_area_struct *right =
				rb_entry(vma->vm_rb.rb_right,
					 struct vm_area_struct, vm_rb);
			if (right->rb_subtree_gap >= length) {
				vma = right;
				continue;
			}
		}

check_current:
		/* Check if current node has a suitable gap */
		gap_end = vma->vm_start;
		if (gap_end < low_limit)
			return -ENOMEM;
		if (gap_start <= high_limit && gap_end - gap_start >= length) {
			/* We found a suitable gap. Clip it with the original
			 * high_limit.
			 */
			if (gap_end > info->high_limit)
				gap_end = info->high_limit;

			if (align_and_check(&gap_end, gap_start, info,
					is_shader_code, is_same_4gb_page))
				return gap_end;
		}

		/* Visit left subtree if it looks promising */
		if (vma->vm_rb.rb_left) {
			struct vm_area_struct *left =
				rb_entry(vma->vm_rb.rb_left,
					 struct vm_area_struct, vm_rb);
			if (left->rb_subtree_gap >= length) {
				vma = left;
				continue;
			}
		}

		/* Go back up the rbtree to find next candidate node */
		while (true) {
			struct rb_node *prev = &vma->vm_rb;

			if (!rb_parent(prev))
				return -ENOMEM;
			vma = rb_entry(rb_parent(prev),
				       struct vm_area_struct, vm_rb);
			if (prev == vma->vm_rb.rb_right) {
				gap_start = vma->vm_prev ?
					vma->vm_prev->vm_end : 0;
				goto check_current;
			}
		}
	}
#else
	unsigned long length, high_limit, gap_start, gap_end;

	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
	/* Adjust search length to account for worst case alignment overhead */
	length = info->length + info->align_mask;
	if (length < info->length)
		return -ENOMEM;

	/*
	 * Adjust search limits by the desired length.
	 * See implementation comment at top of unmapped_area().
	 */
	gap_end = info->high_limit;
	if (gap_end < length)
		return -ENOMEM;
	high_limit = gap_end - length;

	if (info->low_limit > high_limit)
		return -ENOMEM;

	while (true) {
		if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
			return -ENOMEM;
		gap_end = mas.last + 1;
		gap_start = mas.min;

		if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
			return gap_end;
	}
#endif
	return -ENOMEM;
}


/* This function is based on Linux kernel's arch_get_unmapped_area, but
 * simplified slightly. Modifications come from the fact that some values
 * about the memory area are known in advance.
 */
unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
		const unsigned long addr, const unsigned long len,
		const unsigned long pgoff, const unsigned long flags)
{
	struct mm_struct *mm = current->mm;
	struct vm_unmapped_area_info info;
	unsigned long align_offset = 0;
	unsigned long align_mask = 0;
#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
	unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base);
	unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr);
#else
	unsigned long high_limit = mm->mmap_base;
	unsigned long low_limit = PAGE_SIZE;
#endif
	int cpu_va_bits = BITS_PER_LONG;
	int gpu_pc_bits =
	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
	bool is_shader_code = false;
	bool is_same_4gb_page = false;
	unsigned long ret;

	/* the 'nolock' form is used here:
	 * - the base_pfn of the SAME_VA zone does not change
	 * - in normal use, va_size_pages is constant once the first allocation
	 *   begins
	 *
	 * However, in abnormal use this function could be processing whilst
	 * another new zone is being setup in a different thread (e.g. to
	 * borrow part of the SAME_VA zone). In the worst case, this path may
	 * witness a higher SAME_VA end_pfn than the code setting up the new
	 * zone.
	 *
	 * This is safe because once we reach the main allocation functions,
	 * we'll see the updated SAME_VA end_pfn and will determine that there
	 * is no free region at the address found originally by too large a
	 * same_va_end_addr here, and will fail the allocation gracefully.
	 */
	struct kbase_reg_zone *zone =
		kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
	u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);

	/* requested length too big for entire address space */
	if (len > mmap_end - kbase_mmap_min_addr)
		return -ENOMEM;
#endif

	/* err on fixed address */
	if ((flags & MAP_FIXED) || addr)
		return -EINVAL;

#if IS_ENABLED(CONFIG_64BIT)
	/* too big? */
	if (len > TASK_SIZE - SZ_2M)
		return -ENOMEM;

	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
		high_limit =
			min_t(unsigned long, high_limit, same_va_end_addr);

		/* If there's enough (> 33 bits) of GPU VA space, align
		 * to 2MB boundaries.
		 */
		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
			if (len >= SZ_2M) {
				align_offset = SZ_2M;
				align_mask = SZ_2M - 1;
			}
		}

		low_limit = SZ_2M;
	} else {
		cpu_va_bits = 32;
	}
#endif /* CONFIG_64BIT */
	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
		int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
		struct kbase_va_region *reg;

		/* Need to hold gpu vm lock when using reg */
		kbase_gpu_vm_lock(kctx);
		reg = kctx->pending_regions[cookie];
		if (!reg) {
			kbase_gpu_vm_unlock(kctx);
			return -EINVAL;
		}
		if (!(reg->flags & KBASE_REG_GPU_NX)) {
			if (cpu_va_bits > gpu_pc_bits) {
				align_offset = 1ULL << gpu_pc_bits;
				align_mask = align_offset - 1;
				is_shader_code = true;
			}
#if !MALI_USE_CSF
		} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
			unsigned long extension_bytes =
				(unsigned long)(reg->extension
						<< PAGE_SHIFT);
			/* kbase_check_alloc_sizes() already satisfies
			 * these checks, but they're here to avoid
			 * maintenance hazards due to the assumptions
			 * involved
			 */
			WARN_ON(reg->extension >
				(ULONG_MAX >> PAGE_SHIFT));
			WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
			WARN_ON(!is_power_of_2(extension_bytes));
			align_mask = extension_bytes - 1;
			align_offset =
				extension_bytes -
				(reg->initial_commit << PAGE_SHIFT);
#endif /* !MALI_USE_CSF */
		} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
			is_same_4gb_page = true;
		}
		kbase_gpu_vm_unlock(kctx);
#ifndef CONFIG_64BIT
	} else {
		return current->mm->get_unmapped_area(
			kctx->filp, addr, len, pgoff, flags);
#endif
	}

	info.flags = 0;
	info.length = len;
	info.low_limit = low_limit;
	info.high_limit = high_limit;
	info.align_offset = align_offset;
	info.align_mask = align_mask;

	ret = kbase_unmapped_area_topdown(&info, is_shader_code,
			is_same_4gb_page);

	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
	    high_limit < same_va_end_addr) {
#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
		/* Retry above TASK_UNMAPPED_BASE */
		info.low_limit = TASK_UNMAPPED_BASE;
		info.high_limit = min_t(u64, mmap_end, same_va_end_addr);
#else
		/* Retry above mmap_base */
		info.low_limit = mm->mmap_base;
		info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
#endif

		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
				is_same_4gb_page);
	}

	return ret;
}