summaryrefslogtreecommitdiff
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c91
1 files changed, 45 insertions, 46 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9efe88ef9702..7ea8da990b9d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1259,12 +1259,12 @@ int do_huge_pmd_numa_page(struct fault_env *fe, pmd_t pmd)
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
+ page_nid = -1;
if (!get_page_unless_zero(page))
goto out_unlock;
spin_unlock(fe->ptl);
wait_on_page_locked(page);
put_page(page);
- page_nid = -1;
goto out;
}
@@ -1445,7 +1445,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
unsigned long new_addr, unsigned long old_end,
- pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+ pmd_t *old_pmd, pmd_t *new_pmd)
{
spinlock_t *old_ptl, *new_ptl;
pmd_t pmd;
@@ -1476,7 +1476,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
- if (pmd_present(pmd) && pmd_dirty(pmd))
+ if (pmd_present(pmd))
force_flush = true;
VM_BUG_ON(!pmd_none(*new_pmd));
@@ -1487,12 +1487,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
}
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
- if (new_ptl != old_ptl)
- spin_unlock(new_ptl);
if (force_flush)
flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
- else
- *need_flush = true;
+ if (new_ptl != old_ptl)
+ spin_unlock(new_ptl);
spin_unlock(old_ptl);
return true;
}
@@ -1841,7 +1839,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
}
}
-static void freeze_page(struct page *page)
+static void unmap_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
TTU_RMAP_LOCKED;
@@ -1864,7 +1862,7 @@ static void freeze_page(struct page *page)
VM_BUG_ON_PAGE(ret, page + i - 1);
}
-static void unfreeze_page(struct page *page)
+static void remap_page(struct page *page)
{
int i;
@@ -1878,26 +1876,13 @@ static void __split_huge_page_tail(struct page *head, int tail,
struct page *page_tail = head + tail;
VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
- VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
/*
- * tail_page->_refcount is zero and not changing from under us. But
- * get_page_unless_zero() may be running from under us on the
- * tail_page. If we used atomic_set() below instead of atomic_inc() or
- * atomic_add(), we would then run atomic_set() concurrently with
- * get_page_unless_zero(), and atomic_set() is implemented in C not
- * using locked ops. spin_unlock on x86 sometime uses locked ops
- * because of PPro errata 66, 92, so unless somebody can guarantee
- * atomic_set() here would be safe on all archs (and not only on x86),
- * it's safer to use atomic_inc()/atomic_add().
+ * Clone page flags before unfreezing refcount.
+ *
+ * After successful get_page_unless_zero() might follow flags change,
+ * for exmaple lock_page() which set PG_waiters.
*/
- if (PageAnon(head)) {
- page_ref_inc(page_tail);
- } else {
- /* Additional pin to radix tree */
- page_ref_add(page_tail, 2);
- }
-
page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
page_tail->flags |= (head->flags &
((1L << PG_referenced) |
@@ -1909,36 +1894,42 @@ static void __split_huge_page_tail(struct page *head, int tail,
(1L << PG_unevictable) |
(1L << PG_dirty)));
- /*
- * After clearing PageTail the gup refcount can be released.
- * Page flags also must be visible before we make the page non-compound.
- */
+ /* ->mapping in first tail page is compound_mapcount */
+ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
+ page_tail);
+ page_tail->mapping = head->mapping;
+ page_tail->index = head->index + tail;
+
+ /* Page flags must be visible before we make the page non-compound. */
smp_wmb();
+ /*
+ * Clear PageTail before unfreezing page refcount.
+ *
+ * After successful get_page_unless_zero() might follow put_page()
+ * which needs correct compound_head().
+ */
clear_compound_head(page_tail);
+ /* Finally unfreeze refcount. Additional reference from page cache. */
+ page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
+ PageSwapCache(head)));
+
if (page_is_young(head))
set_page_young(page_tail);
if (page_is_idle(head))
set_page_idle(page_tail);
- /* ->mapping in first tail page is compound_mapcount */
- VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
- page_tail);
- page_tail->mapping = head->mapping;
-
- page_tail->index = head->index + tail;
page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
lru_add_page_tail(head, page_tail, lruvec, list);
}
static void __split_huge_page(struct page *page, struct list_head *list,
- unsigned long flags)
+ pgoff_t end, unsigned long flags)
{
struct page *head = compound_head(page);
struct zone *zone = page_zone(head);
struct lruvec *lruvec;
- pgoff_t end = -1;
int i;
lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
@@ -1946,9 +1937,6 @@ static void __split_huge_page(struct page *page, struct list_head *list,
/* complete memcg works before add pages to LRU */
mem_cgroup_split_huge_fixup(head);
- if (!PageAnon(page))
- end = DIV_ROUND_UP(i_size_read(head->mapping->host), PAGE_SIZE);
-
for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
__split_huge_page_tail(head, i, lruvec, list);
/* Some pages can be beyond i_size: drop them from page cache */
@@ -1973,7 +1961,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
- unfreeze_page(head);
+ remap_page(head);
for (i = 0; i < HPAGE_PMD_NR; i++) {
struct page *subpage = head + i;
@@ -2101,6 +2089,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
int count, mapcount, extra_pins, ret;
bool mlocked;
unsigned long flags;
+ pgoff_t end;
VM_BUG_ON_PAGE(is_huge_zero_page(page), page);
VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -2122,6 +2111,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
goto out;
}
extra_pins = 0;
+ end = -1;
mapping = NULL;
anon_vma_lock_write(anon_vma);
} else {
@@ -2137,10 +2127,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
extra_pins = HPAGE_PMD_NR;
anon_vma = NULL;
i_mmap_lock_read(mapping);
+
+ /*
+ *__split_huge_page() may need to trim off pages beyond EOF:
+ * but on 32-bit, i_size_read() takes an irq-unsafe seqlock,
+ * which cannot be nested inside the page tree lock. So note
+ * end now: i_size itself may be changed at any moment, but
+ * head page lock is good enough to serialize the trimming.
+ */
+ end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE);
}
/*
- * Racy check if we can split the page, before freeze_page() will
+ * Racy check if we can split the page, before unmap_page() will
* split PMDs
*/
if (total_mapcount(head) != page_count(head) - extra_pins - 1) {
@@ -2149,7 +2148,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
}
mlocked = PageMlocked(page);
- freeze_page(head);
+ unmap_page(head);
VM_BUG_ON_PAGE(compound_mapcount(head), head);
/* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -2186,7 +2185,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mapping)
__dec_node_page_state(page, NR_SHMEM_THPS);
spin_unlock(&pgdata->split_queue_lock);
- __split_huge_page(page, list, flags);
+ __split_huge_page(page, list, end, flags);
ret = 0;
} else {
if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
@@ -2201,7 +2200,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
fail: if (mapping)
spin_unlock(&mapping->tree_lock);
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
- unfreeze_page(head);
+ remap_page(head);
ret = -EBUSY;
}