@@ -223,7 +223,8 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
return -EINVAL;
/* Return error if mode is not supported */
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_MARK_VOLATILE | FALLOC_FL_UNMARK_VOLATILE))
return -EOPNOTSUPP;
/* Punch hole must have keep size set */
@@ -1,9 +1,10 @@
#ifndef _FALLOC_H_
#define _FALLOC_H_
-#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
-#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
-
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
+#define FALLOC_FL_MARK_VOLATILE 0x04 /* mark range volatile */
+#define FALLOC_FL_UNMARK_VOLATILE 0x08 /* mark range non-volatile */
#ifdef __KERNEL__
/*
@@ -64,6 +64,7 @@ static struct vfsmount *shm_mnt;
#include <linux/highmem.h>
#include <linux/seq_file.h>
#include <linux/magic.h>
+#include <linux/volatile.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -624,11 +625,79 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
return error;
}
+static DEFINE_VOLATILE_FS_HEAD(shmem_volatile_head);
+
+static int shmem_mark_volatile(struct inode *inode, loff_t offset, loff_t len)
+{
+ loff_t lstart, lend;
+ int ret;
+
+ lstart = offset >> PAGE_CACHE_SHIFT;
+ lend = (offset+len) >> PAGE_CACHE_SHIFT;
+
+ volatile_range_lock(&shmem_volatile_head);
+ ret = volatile_range_add(&shmem_volatile_head, &inode->i_data,
+ lstart, lend);
+ if (ret > 0) { /* immdiately purge */
+ shmem_truncate_range(inode, lstart<<PAGE_CACHE_SHIFT,
+ ((lend+1)<<PAGE_CACHE_SHIFT)-1);
+ ret = 0;
+ }
+ volatile_range_unlock(&shmem_volatile_head);
+
+ return ret;
+}
+
+static int shmem_unmark_volatile(struct inode *inode, loff_t offset, loff_t len)
+{
+ loff_t lstart, lend;
+ int ret;
+
+ lstart = offset >> PAGE_CACHE_SHIFT;
+ lend = (offset+len) >> PAGE_CACHE_SHIFT;
+
+ volatile_range_lock(&shmem_volatile_head);
+ ret = volatile_range_remove(&shmem_volatile_head,
+ &inode->i_data,
+ lstart, lend);
+ volatile_range_unlock(&shmem_volatile_head);
+
+ return ret;
+}
+
+static void shmem_clear_volatile(struct inode *inode)
+{
+ volatile_range_lock(&shmem_volatile_head);
+ volatile_range_clear(&shmem_volatile_head, &inode->i_data);
+ volatile_range_unlock(&shmem_volatile_head);
+}
+
+
+
+static int shmem_purge_one_volatile_range(void)
+{
+ struct address_space *mapping;
+ loff_t start, end;
+ int ret;
+
+ volatile_range_lock(&shmem_volatile_head);
+ ret = volatile_ranges_pluck_lru(&shmem_volatile_head, &mapping,
+ &start, &end);
+ if (ret)
+ shmem_truncate_range(mapping->host, start<<PAGE_CACHE_SHIFT,
+ (end<<PAGE_CACHE_SHIFT)-1);
+ volatile_range_unlock(&shmem_volatile_head);
+ return ret;
+}
+
+
static void shmem_evict_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_xattr *xattr, *nxattr;
+ shmem_clear_volatile(inode);
+
if (inode->i_mapping->a_ops == &shmem_aops) {
shmem_unacct_size(info->flags, inode->i_size);
inode->i_size = 0;
@@ -776,6 +845,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
struct inode *inode;
swp_entry_t swap;
pgoff_t index;
+ int purged;
BUG_ON(!PageLocked(page));
mapping = page->mapping;
@@ -830,6 +900,12 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
SetPageUptodate(page);
}
+
+ /* If we have volatile pages, try purging them first */
+ purged = shmem_purge_one_volatile_range();
+ if (purged)
+ goto redirty;
+
swap = get_swap_page();
if (!swap.val)
goto redirty;
@@ -1789,6 +1865,14 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
/* No need to unmap again: hole-punching leaves COWed pages */
error = 0;
goto out;
+ } else if (mode & FALLOC_FL_MARK_VOLATILE) {
+ /* Mark pages volatile, sort of delayed hole punching */
+ error = shmem_mark_volatile(inode, offset, len);
+ goto out;
+ } else if (mode & FALLOC_FL_UNMARK_VOLATILE) {
+ /* Mark pages non-volatile, return error if pages were purged */
+ error = shmem_unmark_volatile(inode, offset, len);
+ goto out;
}
/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
This patch enables FALLOC_FL_MARK_VOLATILE/UNMARK_VOLATILE functionality for tmpfs making use of the volatile range management code. Conceptually, FALLOC_FL_MARK_VOLATILE is like a delayed FALLOC_FL_PUNCH_HOLE. This allows applications that have data caches that can be re-created to tell the kernel that some memory contains data that is useful in the future, but can be recreated if needed, so if the kernel needs, it can zap the memory without having to swap it out. In use, applications use FALLOC_FL_MARK_VOLATILE to mark page ranges as volatile when they are not in use. Then later if they wants to reuse the data, they use FALLOC_FL_UNMARK_VOLATILE, which will return an error if the data has been purged. This is very much influenced by the Android Ashmem interface by Robert Love so credits to him and the Android developers. In many cases the code & logic come directly from the ashmem patch. The intent of this patch is to allow for ashmem-like behavior, but embeds the idea a little deeper into the VM code. This is a reworked version of the fadvise volatile idea submitted earlier to the list. Thanks to Dave Chinner for suggesting to rework the idea in this fashion. Also thanks to Dmitry Adamushko for continued review and bug reporting, and Dave Hansen for help with the original design and mentoring me in the VM code. v3: * Fix off by one issue when truncating page ranges * Use Dave Hansesn's suggestion to use shmem_writepage to trigger range purging instead of using a shrinker. CC: Andrew Morton <akpm@linux-foundation.org> CC: Android Kernel Team <kernel-team@android.com> CC: Robert Love <rlove@google.com> CC: Mel Gorman <mel@csn.ul.ie> CC: Hugh Dickins <hughd@google.com> CC: Dave Hansen <dave@linux.vnet.ibm.com> CC: Rik van Riel <riel@redhat.com> CC: Dmitry Adamushko <dmitry.adamushko@gmail.com> CC: Dave Chinner <david@fromorbit.com> CC: Neil Brown <neilb@suse.de> CC: Andrea Righi <andrea@betterlinux.com> CC: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> CC: Taras Glek <tgek@mozilla.com> CC: Mike Hommey <mh@glandium.org> CC: Jan Kara <jack@suse.cz> CC: KOSAKI Motohiro <kosaki.motohiro@gmail.com> Signed-off-by: John Stultz <john.stultz@linaro.org> --- fs/open.c | 3 +- include/linux/falloc.h | 7 ++-- mm/shmem.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 4 deletions(-)