@@ -777,6 +777,36 @@ static int gfs2_fsync(struct file *file,
return ret ? ret : ret1;
}
+static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
+ size_t *prev_count,
+ size_t *window_size)
+{
+ char __user *p = i->iov[0].iov_base + i->iov_offset;
+ size_t count = iov_iter_count(i);
+ int pages = 1;
+
+ if (likely(!count))
+ return false;
+ if (ret <= 0 && ret != -EFAULT)
+ return false;
+ if (!iter_is_iovec(i))
+ return false;
+
+ if (*prev_count != count || !*window_size) {
+ int pages, nr_dirtied;
+
+ pages = min_t(int, BIO_MAX_VECS,
+ DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE));
+ nr_dirtied = max(current->nr_dirtied_pause -
+ current->nr_dirtied, 1);
+ pages = min(pages, nr_dirtied);
+ }
+
+ *prev_count = count;
+ *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
+ return true;
+}
+
static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
struct gfs2_holder *gh)
{
@@ -841,9 +871,17 @@ static ssize_t gfs2_file_read_iter(struc
{
struct gfs2_inode *ip;
struct gfs2_holder gh;
+ size_t prev_count = 0, window_size = 0;
size_t written = 0;
ssize_t ret;
+ /*
+ * In this function, we disable page faults when we're holding the
+ * inode glock while doing I/O. If a page fault occurs, we indicate
+ * that the inode glock may be dropped, fault in the pages manually,
+ * and retry.
+ */
+
if (iocb->ki_flags & IOCB_DIRECT) {
ret = gfs2_file_direct_read(iocb, to, &gh);
if (likely(ret != -ENOTBLK))
@@ -865,13 +903,34 @@ static ssize_t gfs2_file_read_iter(struc
}
ip = GFS2_I(iocb->ki_filp->f_mapping->host);
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+retry:
ret = gfs2_glock_nq(&gh);
if (ret)
goto out_uninit;
+retry_under_glock:
+ pagefault_disable();
ret = generic_file_read_iter(iocb, to);
+ pagefault_enable();
if (ret > 0)
written += ret;
- gfs2_glock_dq(&gh);
+
+ if (should_fault_in_pages(ret, to, &prev_count, &window_size)) {
+ size_t leftover;
+
+ gfs2_holder_allow_demote(&gh);
+ leftover = fault_in_iov_iter_writeable(to, window_size);
+ gfs2_holder_disallow_demote(&gh);
+ if (leftover != window_size) {
+ if (!gfs2_holder_queued(&gh)) {
+ if (written)
+ goto out_uninit;
+ goto retry;
+ }
+ goto retry_under_glock;
+ }
+ }
+ if (gfs2_holder_queued(&gh))
+ gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
return written ? written : ret;
@@ -886,8 +945,17 @@ static ssize_t gfs2_file_buffered_write(
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder *statfs_gh = NULL;
+ size_t prev_count = 0, window_size = 0;
+ size_t read = 0;
ssize_t ret;
+ /*
+ * In this function, we disable page faults when we're holding the
+ * inode glock while doing I/O. If a page fault occurs, we indicate
+ * that the inode glock may be dropped, fault in the pages manually,
+ * and retry.
+ */
+
if (inode == sdp->sd_rindex) {
statfs_gh = kmalloc(sizeof(*statfs_gh), GFP_NOFS);
if (!statfs_gh)
@@ -895,10 +963,11 @@ static ssize_t gfs2_file_buffered_write(
}
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, gh);
+retry:
ret = gfs2_glock_nq(gh);
if (ret)
goto out_uninit;
-
+retry_under_glock:
if (inode == sdp->sd_rindex) {
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -909,21 +978,41 @@ static ssize_t gfs2_file_buffered_write(
}
current->backing_dev_info = inode_to_bdi(inode);
+ pagefault_disable();
ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+ pagefault_enable();
current->backing_dev_info = NULL;
- if (ret > 0)
+ if (ret > 0) {
iocb->ki_pos += ret;
+ read += ret;
+ }
if (inode == sdp->sd_rindex)
gfs2_glock_dq_uninit(statfs_gh);
+ if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
+ size_t leftover;
+
+ gfs2_holder_allow_demote(gh);
+ leftover = fault_in_iov_iter_readable(from, window_size);
+ gfs2_holder_disallow_demote(gh);
+ if (leftover != window_size) {
+ if (!gfs2_holder_queued(gh)) {
+ if (read)
+ goto out_uninit;
+ goto retry;
+ }
+ goto retry_under_glock;
+ }
+ }
out_unlock:
- gfs2_glock_dq(gh);
+ if (gfs2_holder_queued(gh))
+ gfs2_glock_dq(gh);
out_uninit:
gfs2_holder_uninit(gh);
if (statfs_gh)
kfree(statfs_gh);
- return ret;
+ return read ? read : ret;
}
/**