summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobin Peng <robinpeng@google.com>2022-12-02 06:52:11 +0000
committerRobin Peng <robinpeng@google.com>2022-12-02 06:52:13 +0000
commit7c092b1c1812a0b223467f59f3b91f8765855085 (patch)
treef19f8b6412314091c870bab760aa97085cf99c1f
parent341f2795f010fc080dab418c12c5e19318a99828 (diff)
parent02b5dfab573c04bd46100461c4998818fe9dd89c (diff)
downloadgs-android-gs-raviole-5.10-android13-qpr1.tar.gz
Merge SHA: 02b5dfab573c UPSTREAM: binder: Gracefully handle BINDER_TYPE_FDA objects with num_fds=0 Bug: 233569354 Bug: 137131904 (ACK) Bug: 161210518 (ACK) Bug: 245928838 (ACK) Bug: 257443051 (ACK) Bug: 257685302 (ACK) Bug: 260704658 (ACK) Change-Id: I31e4d0399a3b8f1ae0e1996749e6888ea714b1e2 Signed-off-by: Robin Peng <robinpeng@google.com>
-rw-r--r--drivers/android/binder.c437
-rw-r--r--drivers/video/fbdev/pxa3xx-gcu.c2
-rw-r--r--include/linux/mm.h6
-rw-r--r--mm/Kconfig2
-rw-r--r--mm/filemap.c63
-rw-r--r--mm/khugepaged.c8
-rw-r--r--mm/madvise.c6
-rw-r--r--mm/memory.c58
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mremap.c12
10 files changed, 494 insertions, 102 deletions
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 07330027e6c5..f0d8989a2130 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -1813,15 +1813,21 @@ static void binder_cleanup_transaction(struct binder_transaction *t,
/**
* binder_get_object() - gets object and checks for valid metadata
* @proc: binder_proc owning the buffer
+ * @u: sender's user pointer to base of buffer
* @buffer: binder_buffer that we're parsing.
* @offset: offset in the @buffer at which to validate an object.
* @object: struct binder_object to read into
*
- * Return: If there's a valid metadata object at @offset in @buffer, the
+ * Copy the binder object at the given offset into @object. If @u is
+ * provided then the copy is from the sender's buffer. If not, then
+ * it is copied from the target's @buffer.
+ *
+ * Return: If there's a valid metadata object at @offset, the
* size of that object. Otherwise, it returns zero. The object
* is read into the struct binder_object pointed to by @object.
*/
static size_t binder_get_object(struct binder_proc *proc,
+ const void __user *u,
struct binder_buffer *buffer,
unsigned long offset,
struct binder_object *object)
@@ -1831,10 +1837,16 @@ static size_t binder_get_object(struct binder_proc *proc,
size_t object_size = 0;
read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset);
- if (offset > buffer->data_size || read_size < sizeof(*hdr) ||
- binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
- offset, read_size))
+ if (offset > buffer->data_size || read_size < sizeof(*hdr))
return 0;
+ if (u) {
+ if (copy_from_user(object, u + offset, read_size))
+ return 0;
+ } else {
+ if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer,
+ offset, read_size))
+ return 0;
+ }
/* Ok, now see if we read a complete object. */
hdr = &object->hdr;
@@ -1907,7 +1919,7 @@ static struct binder_buffer_object *binder_validate_ptr(
b, buffer_offset,
sizeof(object_offset)))
return NULL;
- object_size = binder_get_object(proc, b, object_offset, object);
+ object_size = binder_get_object(proc, NULL, b, object_offset, object);
if (!object_size || object->hdr.type != BINDER_TYPE_PTR)
return NULL;
if (object_offsetp)
@@ -1972,7 +1984,8 @@ static bool binder_validate_fixup(struct binder_proc *proc,
unsigned long buffer_offset;
struct binder_object last_object;
struct binder_buffer_object *last_bbo;
- size_t object_size = binder_get_object(proc, b, last_obj_offset,
+ size_t object_size = binder_get_object(proc, NULL, b,
+ last_obj_offset,
&last_object);
if (object_size != sizeof(*last_bbo))
return false;
@@ -2087,7 +2100,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc,
if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset,
buffer, buffer_offset,
sizeof(object_offset)))
- object_size = binder_get_object(proc, buffer,
+ object_size = binder_get_object(proc, NULL, buffer,
object_offset, &object);
if (object_size == 0) {
pr_err("transaction release %d bad object at offset %lld, size %zd\n",
@@ -2425,16 +2438,266 @@ err_fd_not_accepted:
return ret;
}
-static int binder_translate_fd_array(struct binder_fd_array_object *fda,
+/**
+ * struct binder_ptr_fixup - data to be fixed-up in target buffer
+ * @offset offset in target buffer to fixup
+ * @skip_size bytes to skip in copy (fixup will be written later)
+ * @fixup_data data to write at fixup offset
+ * @node list node
+ *
+ * This is used for the pointer fixup list (pf) which is created and consumed
+ * during binder_transaction() and is only accessed locally. No
+ * locking is necessary.
+ *
+ * The list is ordered by @offset.
+ */
+struct binder_ptr_fixup {
+ binder_size_t offset;
+ size_t skip_size;
+ binder_uintptr_t fixup_data;
+ struct list_head node;
+};
+
+/**
+ * struct binder_sg_copy - scatter-gather data to be copied
+ * @offset offset in target buffer
+ * @sender_uaddr user address in source buffer
+ * @length bytes to copy
+ * @node list node
+ *
+ * This is used for the sg copy list (sgc) which is created and consumed
+ * during binder_transaction() and is only accessed locally. No
+ * locking is necessary.
+ *
+ * The list is ordered by @offset.
+ */
+struct binder_sg_copy {
+ binder_size_t offset;
+ const void __user *sender_uaddr;
+ size_t length;
+ struct list_head node;
+};
+
+/**
+ * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data
+ * @alloc: binder_alloc associated with @buffer
+ * @buffer: binder buffer in target process
+ * @sgc_head: list_head of scatter-gather copy list
+ * @pf_head: list_head of pointer fixup list
+ *
+ * Processes all elements of @sgc_head, applying fixups from @pf_head
+ * and copying the scatter-gather data from the source process' user
+ * buffer to the target's buffer. It is expected that the list creation
+ * and processing all occurs during binder_transaction() so these lists
+ * are only accessed in local context.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_do_deferred_txn_copies(struct binder_alloc *alloc,
+ struct binder_buffer *buffer,
+ struct list_head *sgc_head,
+ struct list_head *pf_head)
+{
+ int ret = 0;
+ struct binder_sg_copy *sgc, *tmpsgc;
+ struct binder_ptr_fixup *tmppf;
+ struct binder_ptr_fixup *pf =
+ list_first_entry_or_null(pf_head, struct binder_ptr_fixup,
+ node);
+
+ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
+ size_t bytes_copied = 0;
+
+ while (bytes_copied < sgc->length) {
+ size_t copy_size;
+ size_t bytes_left = sgc->length - bytes_copied;
+ size_t offset = sgc->offset + bytes_copied;
+
+ /*
+ * We copy up to the fixup (pointed to by pf)
+ */
+ copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset)
+ : bytes_left;
+ if (!ret && copy_size)
+ ret = binder_alloc_copy_user_to_buffer(
+ alloc, buffer,
+ offset,
+ sgc->sender_uaddr + bytes_copied,
+ copy_size);
+ bytes_copied += copy_size;
+ if (copy_size != bytes_left) {
+ BUG_ON(!pf);
+ /* we stopped at a fixup offset */
+ if (pf->skip_size) {
+ /*
+ * we are just skipping. This is for
+ * BINDER_TYPE_FDA where the translated
+ * fds will be fixed up when we get
+ * to target context.
+ */
+ bytes_copied += pf->skip_size;
+ } else {
+ /* apply the fixup indicated by pf */
+ if (!ret)
+ ret = binder_alloc_copy_to_buffer(
+ alloc, buffer,
+ pf->offset,
+ &pf->fixup_data,
+ sizeof(pf->fixup_data));
+ bytes_copied += sizeof(pf->fixup_data);
+ }
+ list_del(&pf->node);
+ kfree(pf);
+ pf = list_first_entry_or_null(pf_head,
+ struct binder_ptr_fixup, node);
+ }
+ }
+ list_del(&sgc->node);
+ kfree(sgc);
+ }
+ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
+ BUG_ON(pf->skip_size == 0);
+ list_del(&pf->node);
+ kfree(pf);
+ }
+ BUG_ON(!list_empty(sgc_head));
+
+ return ret > 0 ? -EINVAL : ret;
+}
+
+/**
+ * binder_cleanup_deferred_txn_lists() - free specified lists
+ * @sgc_head: list_head of scatter-gather copy list
+ * @pf_head: list_head of pointer fixup list
+ *
+ * Called to clean up @sgc_head and @pf_head if there is an
+ * error.
+ */
+static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head,
+ struct list_head *pf_head)
+{
+ struct binder_sg_copy *sgc, *tmpsgc;
+ struct binder_ptr_fixup *pf, *tmppf;
+
+ list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) {
+ list_del(&sgc->node);
+ kfree(sgc);
+ }
+ list_for_each_entry_safe(pf, tmppf, pf_head, node) {
+ list_del(&pf->node);
+ kfree(pf);
+ }
+}
+
+/**
+ * binder_defer_copy() - queue a scatter-gather buffer for copy
+ * @sgc_head: list_head of scatter-gather copy list
+ * @offset: binder buffer offset in target process
+ * @sender_uaddr: user address in source process
+ * @length: bytes to copy
+ *
+ * Specify a scatter-gather block to be copied. The actual copy must
+ * be deferred until all the needed fixups are identified and queued.
+ * Then the copy and fixups are done together so un-translated values
+ * from the source are never visible in the target buffer.
+ *
+ * We are guaranteed that repeated calls to this function will have
+ * monotonically increasing @offset values so the list will naturally
+ * be ordered.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset,
+ const void __user *sender_uaddr, size_t length)
+{
+ struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL);
+
+ if (!bc)
+ return -ENOMEM;
+
+ bc->offset = offset;
+ bc->sender_uaddr = sender_uaddr;
+ bc->length = length;
+ INIT_LIST_HEAD(&bc->node);
+
+ /*
+ * We are guaranteed that the deferred copies are in-order
+ * so just add to the tail.
+ */
+ list_add_tail(&bc->node, sgc_head);
+
+ return 0;
+}
+
+/**
+ * binder_add_fixup() - queue a fixup to be applied to sg copy
+ * @pf_head: list_head of binder ptr fixup list
+ * @offset: binder buffer offset in target process
+ * @fixup: bytes to be copied for fixup
+ * @skip_size: bytes to skip when copying (fixup will be applied later)
+ *
+ * Add the specified fixup to a list ordered by @offset. When copying
+ * the scatter-gather buffers, the fixup will be copied instead of
+ * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup
+ * will be applied later (in target process context), so we just skip
+ * the bytes specified by @skip_size. If @skip_size is 0, we copy the
+ * value in @fixup.
+ *
+ * This function is called *mostly* in @offset order, but there are
+ * exceptions. Since out-of-order inserts are relatively uncommon,
+ * we insert the new element by searching backward from the tail of
+ * the list.
+ *
+ * Return: 0=success, else -errno
+ */
+static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset,
+ binder_uintptr_t fixup, size_t skip_size)
+{
+ struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL);
+ struct binder_ptr_fixup *tmppf;
+
+ if (!pf)
+ return -ENOMEM;
+
+ pf->offset = offset;
+ pf->fixup_data = fixup;
+ pf->skip_size = skip_size;
+ INIT_LIST_HEAD(&pf->node);
+
+ /* Fixups are *mostly* added in-order, but there are some
+ * exceptions. Look backwards through list for insertion point.
+ */
+ list_for_each_entry_reverse(tmppf, pf_head, node) {
+ if (tmppf->offset < pf->offset) {
+ list_add(&pf->node, &tmppf->node);
+ return 0;
+ }
+ }
+ /*
+ * if we get here, then the new offset is the lowest so
+ * insert at the head
+ */
+ list_add(&pf->node, pf_head);
+ return 0;
+}
+
+static int binder_translate_fd_array(struct list_head *pf_head,
+ struct binder_fd_array_object *fda,
+ const void __user *sender_ubuffer,
struct binder_buffer_object *parent,
+ struct binder_buffer_object *sender_uparent,
struct binder_transaction *t,
struct binder_thread *thread,
struct binder_transaction *in_reply_to)
{
binder_size_t fdi, fd_buf_size;
binder_size_t fda_offset;
+ const void __user *sender_ufda_base;
struct binder_proc *proc = thread->proc;
- struct binder_proc *target_proc = t->to_proc;
+ int ret;
+
+ if (fda->num_fds == 0)
+ return 0;
fd_buf_size = sizeof(u32) * fda->num_fds;
if (fda->num_fds >= SIZE_MAX / sizeof(u32)) {
@@ -2458,19 +2721,25 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
*/
fda_offset = (parent->buffer - (uintptr_t)t->buffer->user_data) +
fda->parent_offset;
- if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32))) {
+ sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer +
+ fda->parent_offset;
+
+ if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) ||
+ !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) {
binder_user_error("%d:%d parent offset not aligned correctly.\n",
proc->pid, thread->pid);
return -EINVAL;
}
+ ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32));
+ if (ret)
+ return ret;
+
for (fdi = 0; fdi < fda->num_fds; fdi++) {
u32 fd;
- int ret;
binder_size_t offset = fda_offset + fdi * sizeof(fd);
+ binder_size_t sender_uoffset = fdi * sizeof(fd);
- ret = binder_alloc_copy_from_buffer(&target_proc->alloc,
- &fd, t->buffer,
- offset, sizeof(fd));
+ ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd));
if (!ret)
ret = binder_translate_fd(fd, offset, t, thread,
in_reply_to);
@@ -2480,7 +2749,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda,
return 0;
}
-static int binder_fixup_parent(struct binder_transaction *t,
+static int binder_fixup_parent(struct list_head *pf_head,
+ struct binder_transaction *t,
struct binder_thread *thread,
struct binder_buffer_object *bp,
binder_size_t off_start_offset,
@@ -2526,14 +2796,7 @@ static int binder_fixup_parent(struct binder_transaction *t,
}
buffer_offset = bp->parent_offset +
(uintptr_t)parent->buffer - (uintptr_t)b->user_data;
- if (binder_alloc_copy_to_buffer(&target_proc->alloc, b, buffer_offset,
- &bp->buffer, sizeof(bp->buffer))) {
- binder_user_error("%d:%d got transaction with invalid parent offset\n",
- proc->pid, thread->pid);
- return -EINVAL;
- }
-
- return 0;
+ return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0);
}
/**
@@ -2744,6 +3007,7 @@ static void binder_transaction(struct binder_proc *proc,
binder_size_t off_start_offset, off_end_offset;
binder_size_t off_min;
binder_size_t sg_buf_offset, sg_buf_end_offset;
+ binder_size_t user_offset = 0;
struct binder_proc *target_proc = NULL;
struct binder_thread *target_thread = NULL;
struct binder_node *target_node = NULL;
@@ -2759,6 +3023,12 @@ static void binder_transaction(struct binder_proc *proc,
char *secctx = NULL;
u32 secctx_sz = 0;
bool is_nested = false;
+ struct list_head sgc_head;
+ struct list_head pf_head;
+ const void __user *user_buffer = (const void __user *)
+ (uintptr_t)tr->data.ptr.buffer;
+ INIT_LIST_HEAD(&sgc_head);
+ INIT_LIST_HEAD(&pf_head);
e = binder_transaction_log_add(&binder_transaction_log);
e->debug_id = t_debug_id;
@@ -3103,19 +3373,6 @@ static void binder_transaction(struct binder_proc *proc,
if (binder_alloc_copy_user_to_buffer(
&target_proc->alloc,
- t->buffer, 0,
- (const void __user *)
- (uintptr_t)tr->data.ptr.buffer,
- tr->data_size)) {
- binder_user_error("%d:%d got transaction with invalid data ptr\n",
- proc->pid, thread->pid);
- return_error = BR_FAILED_REPLY;
- return_error_param = -EFAULT;
- return_error_line = __LINE__;
- goto err_copy_data_failed;
- }
- if (binder_alloc_copy_user_to_buffer(
- &target_proc->alloc,
t->buffer,
ALIGN(tr->data_size, sizeof(void *)),
(const void __user *)
@@ -3158,6 +3415,7 @@ static void binder_transaction(struct binder_proc *proc,
size_t object_size;
struct binder_object object;
binder_size_t object_offset;
+ binder_size_t copy_size;
if (binder_alloc_copy_from_buffer(&target_proc->alloc,
&object_offset,
@@ -3169,8 +3427,27 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_offset;
}
- object_size = binder_get_object(target_proc, t->buffer,
- object_offset, &object);
+
+ /*
+ * Copy the source user buffer up to the next object
+ * that will be processed.
+ */
+ copy_size = object_offset - user_offset;
+ if (copy_size && (user_offset > object_offset ||
+ binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer, user_offset,
+ user_buffer + user_offset,
+ copy_size))) {
+ binder_user_error("%d:%d got transaction with invalid data ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EFAULT;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+ object_size = binder_get_object(target_proc, user_buffer,
+ t->buffer, object_offset, &object);
if (object_size == 0 || object_offset < off_min) {
binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n",
proc->pid, thread->pid,
@@ -3182,6 +3459,11 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_offset;
}
+ /*
+ * Set offset to the next buffer fragment to be
+ * copied
+ */
+ user_offset = object_offset + object_size;
hdr = &object.hdr;
off_min = object_offset + object_size;
@@ -3244,6 +3526,8 @@ static void binder_transaction(struct binder_proc *proc,
case BINDER_TYPE_FDA: {
struct binder_object ptr_object;
binder_size_t parent_offset;
+ struct binder_object user_object;
+ size_t user_parent_size;
struct binder_fd_array_object *fda =
to_binder_fd_array_object(hdr);
size_t num_valid = (buffer_offset - off_start_offset) /
@@ -3275,11 +3559,35 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_parent;
}
- ret = binder_translate_fd_array(fda, parent, t, thread,
- in_reply_to);
- if (ret < 0) {
+ /*
+ * We need to read the user version of the parent
+ * object to get the original user offset
+ */
+ user_parent_size =
+ binder_get_object(proc, user_buffer, t->buffer,
+ parent_offset, &user_object);
+ if (user_parent_size != sizeof(user_object.bbo)) {
+ binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n",
+ proc->pid, thread->pid,
+ user_parent_size,
+ sizeof(user_object.bbo));
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EINVAL;
+ return_error_line = __LINE__;
+ goto err_bad_parent;
+ }
+ ret = binder_translate_fd_array(&pf_head, fda,
+ user_buffer, parent,
+ &user_object.bbo, t,
+ thread, in_reply_to);
+ if (!ret)
+ ret = binder_alloc_copy_to_buffer(&target_proc->alloc,
+ t->buffer,
+ object_offset,
+ fda, sizeof(*fda));
+ if (ret) {
return_error = BR_FAILED_REPLY;
- return_error_param = ret;
+ return_error_param = ret > 0 ? -EINVAL : ret;
return_error_line = __LINE__;
goto err_translate_failed;
}
@@ -3301,19 +3609,14 @@ static void binder_transaction(struct binder_proc *proc,
return_error_line = __LINE__;
goto err_bad_offset;
}
- if (binder_alloc_copy_user_to_buffer(
- &target_proc->alloc,
- t->buffer,
- sg_buf_offset,
- (const void __user *)
- (uintptr_t)bp->buffer,
- bp->length)) {
- binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
- proc->pid, thread->pid);
- return_error_param = -EFAULT;
+ ret = binder_defer_copy(&sgc_head, sg_buf_offset,
+ (const void __user *)(uintptr_t)bp->buffer,
+ bp->length);
+ if (ret) {
return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
return_error_line = __LINE__;
- goto err_copy_data_failed;
+ goto err_translate_failed;
}
/* Fixup buffer pointer to target proc address space */
bp->buffer = (uintptr_t)
@@ -3322,7 +3625,8 @@ static void binder_transaction(struct binder_proc *proc,
num_valid = (buffer_offset - off_start_offset) /
sizeof(binder_size_t);
- ret = binder_fixup_parent(t, thread, bp,
+ ret = binder_fixup_parent(&pf_head, t,
+ thread, bp,
off_start_offset,
num_valid,
last_fixup_obj_off,
@@ -3349,6 +3653,30 @@ static void binder_transaction(struct binder_proc *proc,
goto err_bad_object_type;
}
}
+ /* Done processing objects, copy the rest of the buffer */
+ if (binder_alloc_copy_user_to_buffer(
+ &target_proc->alloc,
+ t->buffer, user_offset,
+ user_buffer + user_offset,
+ tr->data_size - user_offset)) {
+ binder_user_error("%d:%d got transaction with invalid data ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = -EFAULT;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
+
+ ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer,
+ &sgc_head, &pf_head);
+ if (ret) {
+ binder_user_error("%d:%d got transaction with invalid offsets ptr\n",
+ proc->pid, thread->pid);
+ return_error = BR_FAILED_REPLY;
+ return_error_param = ret;
+ return_error_line = __LINE__;
+ goto err_copy_data_failed;
+ }
if (t->buffer->oneway_spam_suspect)
tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT;
else
@@ -3430,6 +3758,7 @@ err_bad_object_type:
err_bad_offset:
err_bad_parent:
err_copy_data_failed:
+ binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head);
binder_free_txn_fixups(t);
trace_binder_transaction_failed_buffer_release(t->buffer);
binder_transaction_buffer_release(target_proc, NULL, t->buffer,
diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c
index 4279e13a3b58..3f2306419947 100644
--- a/drivers/video/fbdev/pxa3xx-gcu.c
+++ b/drivers/video/fbdev/pxa3xx-gcu.c
@@ -381,7 +381,7 @@ pxa3xx_gcu_write(struct file *file, const char *buff,
struct pxa3xx_gcu_batch *buffer;
struct pxa3xx_gcu_priv *priv = to_pxa3xx_gcu_priv(file);
- int words = count / 4;
+ size_t words = count / 4;
/* Does not need to be atomic. There's a lock in user space,
* but anyhow, this is just for statistics. */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c93e896ab4d8..37e346e14819 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1759,6 +1759,12 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
#ifdef CONFIG_SPECULATIVE_PAGE_FAULT
static inline void vm_write_begin(struct vm_area_struct *vma)
{
+ /*
+ * Isolated vma might be freed without exclusive mmap_lock but
+ * speculative page fault handler still needs to know it was changed.
+ */
+ if (!RB_EMPTY_NODE(&vma->vm_rb))
+ mmap_assert_write_locked(vma->vm_mm);
/*
* The reads never spins and preemption
* disablement is not required.
diff --git a/mm/Kconfig b/mm/Kconfig
index 27cdf23dd46e..68456c420da5 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -843,7 +843,7 @@ config SPECULATIVE_PAGE_FAULT
bool "Speculative page faults"
default y
depends on ARCH_SUPPORTS_SPECULATIVE_PAGE_FAULT
- depends on MMU && SMP
+ depends on MMU && SMP && !NUMA
help
Try to handle user space page faults without holding the mmap_sem.
diff --git a/mm/filemap.c b/mm/filemap.c
index 632734e9f8f1..9a42edafb5fb 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2706,7 +2706,9 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
* it in the page cache, and handles the special cases reasonably without
* having a lot of duplicated code.
*
- * vma->vm_mm->mmap_lock must be held on entry.
+ * If FAULT_FLAG_SPECULATIVE is set, this function runs with elevated vma
+ * refcount and with mmap lock not held.
+ * Otherwise, vma->vm_mm->mmap_lock must be held on entry.
*
* If our return value has VM_FAULT_RETRY set, it's because the mmap_lock
* may be dropped before doing I/O or by lock_page_maybe_drop_mmap().
@@ -2732,6 +2734,47 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
vm_fault_t ret = 0;
bool retry = false;
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+ page = find_get_page(mapping, offset);
+ if (unlikely(!page) || unlikely(PageReadahead(page)))
+ return VM_FAULT_RETRY;
+
+ if (!trylock_page(page))
+ return VM_FAULT_RETRY;
+
+ if (unlikely(compound_head(page)->mapping != mapping))
+ goto page_unlock;
+ VM_BUG_ON_PAGE(page_to_pgoff(page) != offset, page);
+ if (unlikely(!PageUptodate(page)))
+ goto page_unlock;
+
+ max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+ if (unlikely(offset >= max_off))
+ goto page_unlock;
+
+ /*
+ * Update readahead mmap_miss statistic.
+ *
+ * Note that we are not sure if finish_fault() will
+ * manage to complete the transaction. If it fails,
+ * we'll come back to filemap_fault() non-speculative
+ * case which will update mmap_miss a second time.
+ * This is not ideal, we would prefer to guarantee the
+ * update will happen exactly once.
+ */
+ if (!(vmf->vma->vm_flags & VM_RAND_READ) && ra->ra_pages) {
+ unsigned int mmap_miss = READ_ONCE(ra->mmap_miss);
+ if (mmap_miss)
+ WRITE_ONCE(ra->mmap_miss, --mmap_miss);
+ }
+
+ vmf->page = page;
+ return VM_FAULT_LOCKED;
+page_unlock:
+ unlock_page(page);
+ return VM_FAULT_RETRY;
+ }
+
max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (unlikely(offset >= max_off))
return VM_FAULT_SIGBUS;
@@ -2874,11 +2917,6 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page)
}
if (pmd_none(*vmf->pmd)) {
- if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
- unlock_page(page);
- put_page(page);
- return true;
- }
vmf->ptl = pmd_lock(mm, vmf->pmd);
if (likely(pmd_none(*vmf->pmd))) {
mm_inc_nr_ptes(mm);
@@ -2976,20 +3014,16 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
XA_STATE(xas, &mapping->i_pages, start_pgoff);
struct page *head, *page;
unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
- vm_fault_t ret = 0;
+ vm_fault_t ret = (vmf->flags & FAULT_FLAG_SPECULATIVE) ?
+ VM_FAULT_RETRY : 0;
rcu_read_lock();
head = first_map_page(mapping, &xas, end_pgoff);
if (!head)
goto out;
- if (filemap_map_pmd(vmf, head)) {
- if (pmd_none(*vmf->pmd) &&
- vmf->flags & FAULT_FLAG_SPECULATIVE) {
- ret = VM_FAULT_RETRY;
- goto out;
- }
-
+ if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
+ filemap_map_pmd(vmf, head)) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -2998,7 +3032,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
if (!pte_map_lock_addr(vmf, addr)) {
unlock_page(head);
put_page(head);
- ret = VM_FAULT_RETRY;
goto out;
}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 8008e6c2714e..b40bd0efd37e 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1472,6 +1472,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
if (!pmd)
goto drop_hpage;
+ vm_write_begin(vma);
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
/* step 1: check all mapped PTEs are to the right huge page */
@@ -1521,6 +1522,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
ptl = pmd_lock(vma->vm_mm, pmd);
_pmd = pmdp_collapse_flush(vma, haddr, pmd);
spin_unlock(ptl);
+ vm_write_end(vma);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
@@ -1531,6 +1533,7 @@ drop_hpage:
abort:
pte_unmap_unlock(start_pte, ptl);
+ vm_write_end(vma);
goto drop_hpage;
}
@@ -1602,10 +1605,13 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
*/
if (mmap_write_trylock(mm)) {
if (!khugepaged_test_exit(mm)) {
- spinlock_t *ptl = pmd_lock(mm, pmd);
+ spinlock_t *ptl;
+ vm_write_begin(vma);
+ ptl = pmd_lock(mm, pmd);
/* assume page table is clear */
_pmd = pmdp_collapse_flush(vma, addr, pmd);
spin_unlock(ptl);
+ vm_write_end(vma);
mm_dec_nr_ptes(mm);
pte_free(mm, pmd_pgtable(_pmd));
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 8920a7125389..efb279cb6396 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -497,11 +497,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
.tlb = tlb,
};
- vm_write_begin(vma);
tlb_start_vma(tlb, vma);
walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
tlb_end_vma(tlb, vma);
- vm_write_end(vma);
}
static long madvise_cold(struct vm_area_struct *vma,
@@ -532,11 +530,9 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
.tlb = tlb,
};
- vm_write_begin(vma);
tlb_start_vma(tlb, vma);
walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, &walk_private);
tlb_end_vma(tlb, vma);
- vm_write_end(vma);
}
static inline bool can_do_pageout(struct vm_area_struct *vma)
@@ -739,12 +735,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
update_hiwater_rss(mm);
mmu_notifier_invalidate_range_start(&range);
- vm_write_begin(vma);
tlb_start_vma(&tlb, vma);
walk_page_range(vma->vm_mm, range.start, range.end,
&madvise_free_walk_ops, &tlb);
tlb_end_vma(&tlb, vma);
- vm_write_end(vma);
mmu_notifier_invalidate_range_end(&range);
tlb_finish_mmu(&tlb, range.start, range.end);
diff --git a/mm/memory.c b/mm/memory.c
index a038d72a8110..95a43d08e240 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1457,7 +1457,6 @@ void unmap_page_range(struct mmu_gather *tlb,
unsigned long next;
BUG_ON(addr >= end);
- vm_write_begin(vma);
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
do {
@@ -1467,7 +1466,6 @@ void unmap_page_range(struct mmu_gather *tlb,
next = zap_p4d_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
- vm_write_end(vma);
}
@@ -3348,6 +3346,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
if (userfaultfd_pte_wp(vma, *vmf->pte)) {
pte_unmap_unlock(vmf->pte, vmf->ptl);
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+ return VM_FAULT_RETRY;
return handle_userfault(vmf, VM_UFFD_WP);
}
@@ -3560,6 +3560,11 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
vm_fault_t ret;
void *shadow = NULL;
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE) {
+ pte_unmap(vmf->pte);
+ return VM_FAULT_RETRY;
+ }
+
ret = pte_unmap_same(vmf);
if (ret) {
/*
@@ -3818,6 +3823,10 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
if (vmf->vma_flags & VM_SHARED)
return VM_FAULT_SIGBUS;
+ /* Do not check unstable pmd, if it's changed will retry later */
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+ goto skip_pmd_checks;
+
/*
* Use pte_alloc() instead of pte_alloc_map(). We can't run
* pte_offset_map() on pmds where a huge pmd might be created
@@ -3835,6 +3844,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
if (unlikely(pmd_trans_unstable(vmf->pmd)))
return 0;
+skip_pmd_checks:
/* Use the zero-page for reads */
if (!(vmf->flags & FAULT_FLAG_WRITE) &&
!mm_forbids_zeropage(vma->vm_mm)) {
@@ -3942,6 +3952,10 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
vm_fault_t ret;
+ /* Do not check unstable pmd, if it's changed will retry later */
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+ goto skip_pmd_checks;
+
/*
* Preallocate pte before we take page_lock because this might lead to
* deadlocks for memcg reclaim which waits for pages under writeback:
@@ -3964,6 +3978,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
smp_wmb(); /* See comment in __pte_alloc() */
}
+skip_pmd_checks:
ret = vma->vm_ops->fault(vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
VM_FAULT_DONE_COW)))
@@ -4130,7 +4145,11 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
return ret;
}
- if (pmd_none(*vmf->pmd) && !(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
+ /* Do not check unstable pmd, if it's changed will retry later */
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+ goto skip_pmd_checks;
+
+ if (pmd_none(*vmf->pmd)) {
if (PageTransCompound(page)) {
ret = do_set_pmd(vmf, page);
if (ret != VM_FAULT_FALLBACK)
@@ -4154,6 +4173,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
if (pmd_devmap_trans_unstable(vmf->pmd))
return 0;
+skip_pmd_checks:
if (!pte_map_lock(vmf))
return VM_FAULT_RETRY;
@@ -4253,7 +4273,8 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1,
start_pgoff + nr_pages - 1);
- if (pmd_none(*vmf->pmd)) {
+ if (!(vmf->flags & FAULT_FLAG_SPECULATIVE) &&
+ pmd_none(*vmf->pmd)) {
vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm);
if (!vmf->prealloc_pte)
return VM_FAULT_OOM;
@@ -4610,24 +4631,19 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
pte_t entry;
vm_fault_t ret = 0;
+ /* Do not check unstable pmd, if it's changed will retry later */
+ if (vmf->flags & FAULT_FLAG_SPECULATIVE)
+ goto skip_pmd_checks;
+
if (unlikely(pmd_none(*vmf->pmd))) {
/*
- * In the case of the speculative page fault handler we abort
- * the speculative path immediately as the pmd is probably
- * in the way to be converted in a huge one. We will try
- * again holding the mmap_sem (which implies that the collapse
- * operation is done).
- */
- if (vmf->flags & FAULT_FLAG_SPECULATIVE)
- return VM_FAULT_RETRY;
- /*
* Leave __pte_alloc() until later: because vm_ops->fault may
* want to allocate huge page, and if we expose page table
* for an instant, it will be difficult to retract from
* concurrent faults and from rmap lookups.
*/
vmf->pte = NULL;
- } else if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) {
+ } else {
/*
* If a huge pmd materialized under us just retry later. Use
* pmd_trans_unstable() via pmd_devmap_trans_unstable() instead
@@ -4669,6 +4685,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
}
}
+skip_pmd_checks:
if (!vmf->pte) {
if (vma_is_anonymous(vmf->vma))
return do_anonymous_page(vmf);
@@ -4953,11 +4970,13 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
vmf.vma_flags = READ_ONCE(vmf.vma->vm_flags);
vmf.vma_page_prot = READ_ONCE(vmf.vma->vm_page_prot);
+#ifdef CONFIG_USERFAULTFD
/* Can't call userland page fault handler in the speculative path */
- if (unlikely(vmf.vma_flags & VM_UFFD_MISSING)) {
+ if (unlikely(vmf.vma_flags & __VM_UFFD_FLAGS)) {
trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
return VM_FAULT_RETRY;
}
+#endif
if (vmf.vma_flags & VM_GROWSDOWN || vmf.vma_flags & VM_GROWSUP) {
/*
@@ -4996,11 +5015,10 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm,
pol = __get_vma_policy(vmf.vma, address);
if (!pol)
pol = get_task_policy(current);
- if (!pol)
- if (pol && pol->mode == MPOL_INTERLEAVE) {
- trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
- return VM_FAULT_RETRY;
- }
+ if (pol && pol->mode == MPOL_INTERLEAVE) {
+ trace_spf_vma_notsup(_RET_IP_, vmf.vma, address);
+ return VM_FAULT_RETRY;
+ }
#endif
/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3b2584088d28..f91327ebd167 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -657,11 +657,9 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
{
int nr_updated;
- vm_write_begin(vma);
nr_updated = change_protection(vma, addr, end, PAGE_NONE, MM_CP_PROT_NUMA);
if (nr_updated)
count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
- vm_write_end(vma);
return nr_updated;
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 12ca03d66311..3847f5875c81 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -210,7 +210,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
drop_rmap_locks(vma);
}
-#ifdef CONFIG_HAVE_MOVE_PMD
+/*
+ * Speculative page fault handlers will not detect page table changes done
+ * without ptl locking.
+ */
+#if defined(CONFIG_HAVE_MOVE_PMD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT)
static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
{
@@ -277,7 +281,11 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma,
}
#endif
-#ifdef CONFIG_HAVE_MOVE_PUD
+/*
+ * Speculative page fault handlers will not detect page table changes done
+ * without ptl locking.
+ */
+#if defined(CONFIG_HAVE_MOVE_PUD) && !defined(CONFIG_SPECULATIVE_PAGE_FAULT)
static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, pud_t *old_pud, pud_t *new_pud)
{