Message ID | 163977198611.2082978.4748242515627648240.stgit@warthog.procyon.org.uk |
---|---|
State | Superseded |
Headers | show |
Series | ceph: Make ceph_netfs_issue_op() handle inlined data (untested) | expand |
On Fri, 2021-12-17 at 20:13 +0000, David Howells wrote: > Here's a first stab at making ceph_netfs_issue_op() handle inlined data on > page 0. The code that's upstream *ought* to be doing this in > ceph_readpage() as the page isn't pinned and could get discarded under > memory pressure from what I can see. > > Signed-off-by: David Howells <dhowells@redhat.com> > cc: ceph-devel@vger.kernel.org > --- > > fs/ceph/addr.c | 79 ++++++++++++++++++++++++++++++++++++++++++++------------ > 1 file changed, 62 insertions(+), 17 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index 553e2b5653f3..b72f77fe32f2 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -244,6 +244,61 @@ static void finish_netfs_read(struct ceph_osd_request *req) > iput(req->r_inode); > } > > +static bool ceph_netfs_issue_op_inline(struct netfs_read_subrequest *subreq) > +{ > + struct netfs_read_request *rreq = subreq->rreq; > + struct inode *inode = rreq->inode; > + struct ceph_mds_reply_info_parsed *rinfo; > + struct ceph_mds_reply_info_in *iinfo; > + struct ceph_mds_request *req; > + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); > + struct ceph_inode_info *ci = ceph_inode(inode); > + struct iov_iter iter; > + ssize_t err = 0; > + size_t len; > + > + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); > + __clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); > + > + if (subreq->start >= inode->i_size || subreq->start >= 4096) > + goto out; > + > + /* We need to fetch the inline data. */ > + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); > + if (IS_ERR(req)) { > + err = PTR_ERR(req); > + goto out; > + } > + req->r_ino1 = ci->i_vino; > + req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); > + req->r_num_caps = 2; > + > + err = ceph_mdsc_do_request(mdsc, NULL, req); > + if (err < 0) > + goto out; > + > + rinfo = &req->r_reply_info; > + iinfo = &rinfo->targeti; > + if (iinfo->inline_version == CEPH_INLINE_NONE) { > + /* The data got uninlined */ > + ceph_mdsc_put_request(req); > + return false; > + } > + > + len = min_t(size_t, 4096 - subreq->start, iinfo->inline_len); > + iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); > + > + err = copy_to_iter(iinfo->inline_data, len, &iter); > + if (err == 0) > + err = -EFAULT; > + > + ceph_mdsc_put_request(req); > + > +out: > + netfs_subreq_terminated(subreq, err, false); > + return true; > +} > + > static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) > { > struct netfs_read_request *rreq = subreq->rreq; > @@ -258,6 +313,10 @@ static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) > int err = 0; > u64 len = subreq->len; > > + if (ci->i_inline_version != CEPH_INLINE_NONE && > + ceph_netfs_issue_op_inline(subreq)) > + return; > + > req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len, > 0, 1, CEPH_OSD_OP_READ, > CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, > @@ -331,23 +390,9 @@ static int ceph_readpage(struct file *file, struct page *subpage) > size_t len = folio_size(folio); > u64 off = folio_file_pos(folio); > > - if (ci->i_inline_version != CEPH_INLINE_NONE) { > - /* > - * Uptodate inline data should have been added > - * into page cache while getting Fcr caps. > - */ > - if (off == 0) { > - folio_unlock(folio); > - return -EINVAL; > - } > - zero_user_segment(&folio->page, 0, folio_size(folio)); > - folio_mark_uptodate(folio); > - folio_unlock(folio); > - return 0; > - } > - > - dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n", > - vino.ino, vino.snap, file, off, len, folio, folio_index(folio)); > + if (ci->i_inline_version == CEPH_INLINE_NONE) > + dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n", > + vino.ino, vino.snap, file, off, len, folio, folio_index(folio)); > > return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL); > } > > This also looks good to me. I'll plan to do some testing with it on top of the other patches you sent and see how it goes. It may be best to just toss these into a branch based on top of your fscache-rewrite branch, and I can pull that into an integration branch for testing locally. I'll have to work out a test environment with inline support too, and make sure we can exercise this codepath.
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 553e2b5653f3..b72f77fe32f2 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -244,6 +244,61 @@ static void finish_netfs_read(struct ceph_osd_request *req) iput(req->r_inode); } +static bool ceph_netfs_issue_op_inline(struct netfs_read_subrequest *subreq) +{ + struct netfs_read_request *rreq = subreq->rreq; + struct inode *inode = rreq->inode; + struct ceph_mds_reply_info_parsed *rinfo; + struct ceph_mds_reply_info_in *iinfo; + struct ceph_mds_request *req; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_inode_info *ci = ceph_inode(inode); + struct iov_iter iter; + ssize_t err = 0; + size_t len; + + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + __clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); + + if (subreq->start >= inode->i_size || subreq->start >= 4096) + goto out; + + /* We need to fetch the inline data. */ + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } + req->r_ino1 = ci->i_vino; + req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); + req->r_num_caps = 2; + + err = ceph_mdsc_do_request(mdsc, NULL, req); + if (err < 0) + goto out; + + rinfo = &req->r_reply_info; + iinfo = &rinfo->targeti; + if (iinfo->inline_version == CEPH_INLINE_NONE) { + /* The data got uninlined */ + ceph_mdsc_put_request(req); + return false; + } + + len = min_t(size_t, 4096 - subreq->start, iinfo->inline_len); + iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); + + err = copy_to_iter(iinfo->inline_data, len, &iter); + if (err == 0) + err = -EFAULT; + + ceph_mdsc_put_request(req); + +out: + netfs_subreq_terminated(subreq, err, false); + return true; +} + static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) { struct netfs_read_request *rreq = subreq->rreq; @@ -258,6 +313,10 @@ static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) int err = 0; u64 len = subreq->len; + if (ci->i_inline_version != CEPH_INLINE_NONE && + ceph_netfs_issue_op_inline(subreq)) + return; + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len, 0, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, @@ -331,23 +390,9 @@ static int ceph_readpage(struct file *file, struct page *subpage) size_t len = folio_size(folio); u64 off = folio_file_pos(folio); - if (ci->i_inline_version != CEPH_INLINE_NONE) { - /* - * Uptodate inline data should have been added - * into page cache while getting Fcr caps. - */ - if (off == 0) { - folio_unlock(folio); - return -EINVAL; - } - zero_user_segment(&folio->page, 0, folio_size(folio)); - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; - } - - dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n", - vino.ino, vino.snap, file, off, len, folio, folio_index(folio)); + if (ci->i_inline_version == CEPH_INLINE_NONE) + dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n", + vino.ino, vino.snap, file, off, len, folio, folio_index(folio)); return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL); }
Here's a first stab at making ceph_netfs_issue_op() handle inlined data on page 0. The code that's upstream *ought* to be doing this in ceph_readpage() as the page isn't pinned and could get discarded under memory pressure from what I can see. Signed-off-by: David Howells <dhowells@redhat.com> cc: ceph-devel@vger.kernel.org --- fs/ceph/addr.c | 79 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 17 deletions(-)