@@ -7,6 +7,8 @@
#define VRANGE_NONVOLATILE 0
#define VRANGE_VOLATILE 1
+extern int discard_vpage(struct page *page);
+
static inline swp_entry_t swp_entry_mk_vrange_purged(void)
{
return swp_entry(SWP_VRANGE_PURGED, 0);
@@ -225,10 +225,8 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern unsigned long vma_address(struct page *page,
struct vm_area_struct *vma);
-#endif
#else /* !CONFIG_MMU */
static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p)
{
@@ -60,6 +60,7 @@
#include <linux/migrate.h>
#include <linux/string.h>
#include <linux/dma-debug.h>
+#include <linux/vrange.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
@@ -3643,6 +3644,8 @@ static int handle_pte_fault(struct mm_struct *mm,
entry = *pte;
if (!pte_present(entry)) {
+ swp_entry_t vrange_entry;
+retry:
if (pte_none(entry)) {
if (vma->vm_ops) {
if (likely(vma->vm_ops->fault))
@@ -3652,6 +3655,24 @@ static int handle_pte_fault(struct mm_struct *mm,
return do_anonymous_page(mm, vma, address,
pte, pmd, flags);
}
+
+ vrange_entry = pte_to_swp_entry(entry);
+ if (unlikely(entry_is_vrange_purged(vrange_entry))) {
+ if (vma->vm_flags & VM_VOLATILE)
+ return VM_FAULT_SIGBUS;
+
+ /* zap pte */
+ ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
+ if (unlikely(!pte_same(*pte, entry)))
+ goto unlock;
+ flush_cache_page(vma, address, pte_pfn(*pte));
+ ptep_clear_flush(vma, address, pte);
+ pte_unmap_unlock(pte, ptl);
+ goto retry;
+ }
+
+
if (pte_file(entry))
return do_nonlinear_fault(mm, vma, address,
pte, pmd, flags, entry);
@@ -728,6 +728,11 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
referenced++;
}
pte_unmap_unlock(pte, ptl);
+ if (vma->vm_flags & VM_VOLATILE) {
+ pra->mapcount = 0;
+ pra->vm_flags |= VM_VOLATILE;
+ return SWAP_FAIL;
+ }
}
if (referenced) {
@@ -43,6 +43,7 @@
#include <linux/sysctl.h>
#include <linux/oom.h>
#include <linux/prefetch.h>
+#include <linux/vrange.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -683,6 +684,7 @@ enum page_references {
PAGEREF_RECLAIM,
PAGEREF_RECLAIM_CLEAN,
PAGEREF_KEEP,
+ PAGEREF_DISCARD,
PAGEREF_ACTIVATE,
};
@@ -703,6 +705,13 @@ static enum page_references page_check_references(struct page *page,
if (vm_flags & VM_LOCKED)
return PAGEREF_RECLAIM;
+ /*
+ * If volatile page is reached on LRU's tail, we discard the
+ * page without considering recycle the page.
+ */
+ if (vm_flags & VM_VOLATILE)
+ return PAGEREF_DISCARD;
+
if (referenced_ptes) {
if (PageSwapBacked(page))
return PAGEREF_ACTIVATE;
@@ -930,6 +939,9 @@ static unsigned long shrink_page_list(struct list_head *page_list,
switch (references) {
case PAGEREF_ACTIVATE:
goto activate_locked;
+ case PAGEREF_DISCARD:
+ if (may_enter_fs && discard_vpage(page) == 0)
+ goto free_it;
case PAGEREF_KEEP:
goto keep_locked;
case PAGEREF_RECLAIM:
@@ -207,3 +207,100 @@ SYSCALL_DEFINE4(vrange, unsigned long, start,
out:
return ret;
}
+
+static void try_to_discard_one(struct page *page, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t *pte;
+ pte_t pteval;
+ spinlock_t *ptl;
+ unsigned long addr;
+
+ VM_BUG_ON(!PageLocked(page));
+
+ addr = vma_address(page, vma);
+ pte = page_check_address(page, mm, addr, &ptl, 0);
+ if (!pte)
+ return;
+
+ BUG_ON(vma->vm_flags & (VM_SPECIAL|VM_LOCKED|VM_MIXEDMAP|VM_HUGETLB));
+
+ flush_cache_page(vma, addr, page_to_pfn(page));
+ pteval = ptep_clear_flush(vma, addr, pte);
+
+ update_hiwater_rss(mm);
+ if (PageAnon(page))
+ dec_mm_counter(mm, MM_ANONPAGES);
+ else
+ dec_mm_counter(mm, MM_FILEPAGES);
+
+ page_remove_rmap(page);
+ page_cache_release(page);
+
+ set_pte_at(mm, addr, pte,
+ swp_entry_to_pte(swp_entry_mk_vrange_purged()));
+
+ pte_unmap_unlock(pte, ptl);
+ mmu_notifier_invalidate_page(mm, addr);
+
+}
+
+
+static int try_to_discard_anon_vpage(struct page *page)
+{
+ struct anon_vma *anon_vma;
+ struct anon_vma_chain *avc;
+ pgoff_t pgoff;
+
+ anon_vma = page_lock_anon_vma_read(page);
+ if (!anon_vma)
+ return -1;
+
+ pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ /*
+ * During interating the loop, some processes could see a page as
+ * purged while others could see a page as not-purged because we have
+ * no global lock between parent and child for protecting vrange system
+ * call during this loop. But it's not a problem because the page is
+ * not *SHARED* page but *COW* page so parent and child can see other
+ * data anytime. The worst case by this race is a page was purged
+ * but couldn't be discarded so it makes unnecessary page fault but
+ * it wouldn't be severe.
+ */
+ anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
+ struct vm_area_struct *vma = avc->vma;
+
+ if (!(vma->vm_flags & VM_VOLATILE))
+ continue;
+ try_to_discard_one(page, vma);
+ }
+ page_unlock_anon_vma_read(anon_vma);
+ return 0;
+}
+
+
+static int try_to_discard_vpage(struct page *page)
+{
+ if (PageAnon(page))
+ return try_to_discard_anon_vpage(page);
+ return -1;
+}
+
+
+int discard_vpage(struct page *page)
+{
+ VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(PageLRU(page));
+
+ if (!try_to_discard_vpage(page)) {
+ if (PageSwapCache(page))
+ try_to_free_swap(page);
+
+ if (page_freeze_refs(page, 1)) {
+ unlock_page(page);
+ return 0;
+ }
+ }
+
+ return 1;
+}
This patch adds the hooks in the vmscan logic to discard volatile pages and mark their pte as purged. This is a simplified implementation that uses some of the logic from Minchan's earlier efforts, so credit to Minchan for his work. Signed-off-by: John Stultz <john.stultz@linaro.org> --- include/linux/vrange.h | 2 ++ mm/internal.h | 2 -- mm/memory.c | 21 +++++++++++ mm/rmap.c | 5 +++ mm/vmscan.c | 12 +++++++ mm/vrange.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 137 insertions(+), 2 deletions(-)