Message ID | 20250116052317.485356-1-viro@zeniv.linux.org.uk |
---|---|
State | Superseded |
Headers | show |
Series | [v2,01/20] make sure that DNAME_INLINE_LEN is a multiple of word size | expand |
On Thu 16-01-25 05:23:01, Al Viro wrote: > kept separate from the previous commit to keep the noise separate > from actual changes... > > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Looks good. Feel free to add: Reviewed-by: Jan Kara <jack@suse.cz> Honza > --- > fs/dcache.c | 22 ++++++++++------------ > 1 file changed, 10 insertions(+), 12 deletions(-) > > diff --git a/fs/dcache.c b/fs/dcache.c > index f387dc97df86..6f36d3e8c739 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -296,10 +296,8 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c > } > > struct external_name { > - struct { > - atomic_t count; // ->count and ->head can't be combined > - struct rcu_head head; // see take_dentry_name_snapshot() > - } u; > + struct rcu_head head; // ->head and ->count can't be combined > + atomic_t count; // see take_dentry_name_snapshot() > unsigned char name[]; > }; > > @@ -344,7 +342,7 @@ void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry > struct external_name *p; > p = container_of(s, struct external_name, name[0]); > // get a valid reference > - if (unlikely(!atomic_inc_not_zero(&p->u.count))) > + if (unlikely(!atomic_inc_not_zero(&p->count))) > goto retry; > name->name.name = s; > } > @@ -361,8 +359,8 @@ void release_dentry_name_snapshot(struct name_snapshot *name) > if (unlikely(name->name.name != name->inline_name.string)) { > struct external_name *p; > p = container_of(name->name.name, struct external_name, name[0]); > - if (unlikely(atomic_dec_and_test(&p->u.count))) > - kfree_rcu(p, u.head); > + if (unlikely(atomic_dec_and_test(&p->count))) > + kfree_rcu(p, head); > } > } > EXPORT_SYMBOL(release_dentry_name_snapshot); > @@ -400,7 +398,7 @@ static void dentry_free(struct dentry *dentry) > WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); > if (unlikely(dname_external(dentry))) { > struct external_name *p = external_name(dentry); > - if (likely(atomic_dec_and_test(&p->u.count))) { > + if (likely(atomic_dec_and_test(&p->count))) { > call_rcu(&dentry->d_u.d_rcu, __d_free_external); > return; > } > @@ -1681,7 +1679,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) > kmem_cache_free(dentry_cache, dentry); > return NULL; > } > - atomic_set(&p->u.count, 1); > + atomic_set(&p->count, 1); > dname = p->name; > } else { > dname = dentry->d_shortname.string; > @@ -2774,15 +2772,15 @@ static void copy_name(struct dentry *dentry, struct dentry *target) > if (unlikely(dname_external(dentry))) > old_name = external_name(dentry); > if (unlikely(dname_external(target))) { > - atomic_inc(&external_name(target)->u.count); > + atomic_inc(&external_name(target)->count); > dentry->d_name = target->d_name; > } else { > dentry->d_shortname = target->d_shortname; > dentry->d_name.name = dentry->d_shortname.string; > dentry->d_name.hash_len = target->d_name.hash_len; > } > - if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) > - kfree_rcu(old_name, u.head); > + if (old_name && likely(atomic_dec_and_test(&old_name->count))) > + kfree_rcu(old_name, head); > } > > /* > -- > 2.39.5 >
Al Viro <viro@zeniv.linux.org.uk> writes: > ... and check the "name might be unstable" predicate > the right way. > > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> > --- > fs/libfs.c | 15 ++++++++------- > 1 file changed, 8 insertions(+), 7 deletions(-) > > diff --git a/fs/libfs.c b/fs/libfs.c > index 748ac5923154..3ad1b1b7fed6 100644 > --- a/fs/libfs.c > +++ b/fs/libfs.c > @@ -1789,7 +1789,7 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, > { > const struct dentry *parent; > const struct inode *dir; > - char strbuf[DNAME_INLINE_LEN]; > + union shortname_store strbuf; > struct qstr qstr; > > /* > @@ -1809,22 +1809,23 @@ int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, > if (!dir || !IS_CASEFOLDED(dir)) > return 1; > > + qstr.len = len; > + qstr.name = str; > /* > * If the dentry name is stored in-line, then it may be concurrently > * modified by a rename. If this happens, the VFS will eventually retry > * the lookup, so it doesn't matter what ->d_compare() returns. > * However, it's unsafe to call utf8_strncasecmp() with an unstable > * string. Therefore, we have to copy the name into a temporary buffer. This part of the comment needs updating since there is no more copying. > + * As above, len is guaranteed to match str, so the shortname case > + * is exactly when str points to ->d_shortname. > */ > - if (len <= DNAME_INLINE_LEN - 1) { > - memcpy(strbuf, str, len); > - strbuf[len] = 0; > - str = strbuf; > + if (qstr.name == dentry->d_shortname.string) { > + strbuf = dentry->d_shortname; // NUL is guaranteed to be in there > + qstr.name = strbuf.string; > /* prevent compiler from optimizing out the temporary buffer */ > barrier(); If I read the code correctly, I admit I don't understand how this guarantees the stability. Aren't you just assigning qstr.name back the same value it had in case of an inlined name through a bounce pointer? The previous implementation made sense to me, since the memcpy only accessed each character once, and we guaranteed the terminating character explicitly, but I'm having a hard time with this version. > } > - qstr.len = len; > - qstr.name = str; > > return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr); > }
Al Viro <viro@zeniv.linux.org.uk> writes: > On Thu, Jan 16, 2025 at 10:38:53AM -0500, Gabriel Krisman Bertazi wrote: >> > * If the dentry name is stored in-line, then it may be concurrently >> > * modified by a rename. If this happens, the VFS will eventually retry >> > * the lookup, so it doesn't matter what ->d_compare() returns. >> > * However, it's unsafe to call utf8_strncasecmp() with an unstable >> > * string. Therefore, we have to copy the name into a temporary buffer. >> >> This part of the comment needs updating since there is no more copying. >> >> > + * As above, len is guaranteed to match str, so the shortname case >> > + * is exactly when str points to ->d_shortname. >> > */ >> > - if (len <= DNAME_INLINE_LEN - 1) { >> > - memcpy(strbuf, str, len); >> > - strbuf[len] = 0; >> > - str = strbuf; >> > + if (qstr.name == dentry->d_shortname.string) { >> > + strbuf = dentry->d_shortname; // NUL is guaranteed to be in there >> > + qstr.name = strbuf.string; >> > /* prevent compiler from optimizing out the temporary buffer */ >> > barrier(); >> >> If I read the code correctly, I admit I don't understand how this >> guarantees the stability. Aren't you just assigning qstr.name back the >> same value it had in case of an inlined name through a bounce pointer? >> The previous implementation made sense to me, since the memcpy only >> accessed each character once, and we guaranteed the terminating >> character explicitly, but I'm having a hard time with this version. > > This > strbuf = dentry->d_shortname; // NUL is guaranteed to be in there > copies the entire array. No bounce pointers of any sort; we copy > the array contents, all 40 bytes of it. And yes, struct (or union, > in this case) assignment generates better code than manual memcpy() > here. Ah. I read that as: unsigned char *strbuf = &dentry->d_shortname Thanks for explaining. Makes sense to me: Reviewed-by: Gabriel Krisman Bertazi <gabriel@krisman.be>
On Thu, 2025-01-16 at 05:23 +0000, Al Viro wrote: > Pass the stable name all the way down to ->rpc_ops->lookup() instances. > > Note that passing &dentry->d_name is safe in e.g. nfs_lookup() - it *is* > stable there, as it is in ->create() et.al. > > dget_parent() in nfs_instantiate() should be redundant - it'd better be > stable there; if it's not, we have more trouble, since ->d_name would > also be unsafe in such case. > > nfs_submount() and nfs4_submount() may or may not require fixes - if > they ever get moved on server with fhandle preserved, we are in trouble > there... > > UAF window is fairly narrow here and exfiltration requires the ability > to watch the traffic. > > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> > --- > fs/nfs/dir.c | 14 ++++++++------ > fs/nfs/namespace.c | 2 +- > fs/nfs/nfs3proc.c | 5 ++--- > fs/nfs/nfs4proc.c | 20 ++++++++++---------- > fs/nfs/proc.c | 6 +++--- > include/linux/nfs_xdr.h | 2 +- > 6 files changed, 25 insertions(+), 24 deletions(-) > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > index c28983ee75ca..2b04038b0e40 100644 > --- a/fs/nfs/dir.c > +++ b/fs/nfs/dir.c > @@ -1672,7 +1672,7 @@ nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, > return nfs_lookup_revalidate_done(dir, dentry, inode, 1); > } > > -static int nfs_lookup_revalidate_dentry(struct inode *dir, > +static int nfs_lookup_revalidate_dentry(struct inode *dir, const struct qstr *name, > struct dentry *dentry, > struct inode *inode, unsigned int flags) > { > @@ -1690,7 +1690,7 @@ static int nfs_lookup_revalidate_dentry(struct inode *dir, > goto out; > > dir_verifier = nfs_save_change_attribute(dir); > - ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); > + ret = NFS_PROTO(dir)->lookup(dir, dentry, name, fhandle, fattr); > if (ret < 0) > goto out; > > @@ -1775,7 +1775,7 @@ nfs_do_lookup_revalidate(struct inode *dir, const struct qstr *name, > if (NFS_STALE(inode)) > goto out_bad; > > - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); > + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); > out_valid: > return nfs_lookup_revalidate_done(dir, dentry, inode, 1); > out_bad: > @@ -1970,7 +1970,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in > > dir_verifier = nfs_save_change_attribute(dir); > trace_nfs_lookup_enter(dir, dentry, flags); > - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); > + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, > + fhandle, fattr); > if (error == -ENOENT) { > if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) > dir_verifier = inode_peek_iversion_raw(dir); > @@ -2246,7 +2247,7 @@ nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, > reval_dentry: > if (flags & LOOKUP_RCU) > return -ECHILD; > - return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags); > + return nfs_lookup_revalidate_dentry(dir, name, dentry, inode, flags); > > full_reval: > return nfs_do_lookup_revalidate(dir, name, dentry, flags); > @@ -2305,7 +2306,8 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, > d_drop(dentry); > > if (fhandle->size == 0) { > - error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); > + error = NFS_PROTO(dir)->lookup(dir, dentry, &dentry->d_name, > + fhandle, fattr); > if (error) > goto out_error; > } > diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c > index 2d53574da605..973aed9cc5fe 100644 > --- a/fs/nfs/namespace.c > +++ b/fs/nfs/namespace.c > @@ -308,7 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server) > int err; > > /* Look it up again to get its attributes */ > - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, > + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, &dentry->d_name, > ctx->mntfh, ctx->clone_data.fattr); > dput(parent); > if (err != 0) > diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c > index 1566163c6d85..ce70768e0201 100644 > --- a/fs/nfs/nfs3proc.c > +++ b/fs/nfs/nfs3proc.c > @@ -192,7 +192,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, > } > > static int > -nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, > +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, > struct nfs_fh *fhandle, struct nfs_fattr *fattr) > { > unsigned short task_flags = 0; > @@ -202,8 +202,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, > task_flags |= RPC_TASK_TIMEOUT; > > dprintk("NFS call lookup %pd2\n", dentry); > - return __nfs3_proc_lookup(dir, dentry->d_name.name, > - dentry->d_name.len, fhandle, fattr, > + return __nfs3_proc_lookup(dir, name->name, name->len, fhandle, fattr, > task_flags); > } > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index 405f17e6e0b4..4d85068e820d 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -4536,15 +4536,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, > } > > static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, > - struct dentry *dentry, struct nfs_fh *fhandle, > - struct nfs_fattr *fattr) > + struct dentry *dentry, const struct qstr *name, > + struct nfs_fh *fhandle, struct nfs_fattr *fattr) > { > struct nfs_server *server = NFS_SERVER(dir); > int status; > struct nfs4_lookup_arg args = { > .bitmask = server->attr_bitmask, > .dir_fh = NFS_FH(dir), > - .name = &dentry->d_name, > + .name = name, > }; > struct nfs4_lookup_res res = { > .server = server, > @@ -4586,17 +4586,16 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) > } > > static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, > - struct dentry *dentry, struct nfs_fh *fhandle, > - struct nfs_fattr *fattr) > + struct dentry *dentry, const struct qstr *name, > + struct nfs_fh *fhandle, struct nfs_fattr *fattr) > { > struct nfs4_exception exception = { > .interruptible = true, > }; > struct rpc_clnt *client = *clnt; > - const struct qstr *name = &dentry->d_name; > int err; > do { > - err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr); > + err = _nfs4_proc_lookup(client, dir, dentry, name, fhandle, fattr); > trace_nfs4_lookup(dir, name, err); > switch (err) { > case -NFS4ERR_BADNAME: > @@ -4631,13 +4630,13 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, > return err; > } > > -static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, > +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, > struct nfs_fh *fhandle, struct nfs_fattr *fattr) > { > int status; > struct rpc_clnt *client = NFS_CLIENT(dir); > > - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); > + status = nfs4_proc_lookup_common(&client, dir, dentry, name, fhandle, fattr); > if (client != NFS_CLIENT(dir)) { > rpc_shutdown_client(client); > nfs_fixup_secinfo_attributes(fattr); > @@ -4652,7 +4651,8 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, > struct rpc_clnt *client = NFS_CLIENT(dir); > int status; > > - status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr); > + status = nfs4_proc_lookup_common(&client, dir, dentry, &dentry->d_name, > + fhandle, fattr); > if (status < 0) > return ERR_PTR(status); > return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; > diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c > index 6c09cd090c34..77920a2e3cef 100644 > --- a/fs/nfs/proc.c > +++ b/fs/nfs/proc.c > @@ -153,13 +153,13 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, > } > > static int > -nfs_proc_lookup(struct inode *dir, struct dentry *dentry, > +nfs_proc_lookup(struct inode *dir, struct dentry *dentry, const struct qstr *name, > struct nfs_fh *fhandle, struct nfs_fattr *fattr) > { > struct nfs_diropargs arg = { > .fh = NFS_FH(dir), > - .name = dentry->d_name.name, > - .len = dentry->d_name.len > + .name = name->name, > + .len = name->len > }; > struct nfs_diropok res = { > .fh = fhandle, > diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h > index 559273a0f16d..08b62bbf59f0 100644 > --- a/include/linux/nfs_xdr.h > +++ b/include/linux/nfs_xdr.h > @@ -1785,7 +1785,7 @@ struct nfs_rpc_ops { > struct nfs_fattr *, struct inode *); > int (*setattr) (struct dentry *, struct nfs_fattr *, > struct iattr *); > - int (*lookup) (struct inode *, struct dentry *, > + int (*lookup) (struct inode *, struct dentry *, const struct qstr *, > struct nfs_fh *, struct nfs_fattr *); > int (*lookupp) (struct inode *, struct nfs_fh *, > struct nfs_fattr *); Reviewed-by: Jeff Layton <jlayton@kernel.org>
On Thu, 2025-01-16 at 05:23 +0000, Al Viro wrote: > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> > --- > fs/fat/namei_vfat.c | 13 ++++--------- > 1 file changed, 4 insertions(+), 9 deletions(-) > > diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c > index f9cbd5c6f932..926c26e90ef8 100644 > --- a/fs/fat/namei_vfat.c > +++ b/fs/fat/namei_vfat.c > @@ -43,14 +43,9 @@ static inline void vfat_d_version_set(struct dentry *dentry, > * If it happened, the negative dentry isn't actually negative > * anymore. So, drop it. > */ > -static int vfat_revalidate_shortname(struct dentry *dentry) > +static bool vfat_revalidate_shortname(struct dentry *dentry, struct inode *dir) > { > - int ret = 1; > - spin_lock(&dentry->d_lock); > - if (!inode_eq_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry))) > - ret = 0; > - spin_unlock(&dentry->d_lock); > - return ret; > + return inode_eq_iversion(dir, vfat_d_version(dentry)); > } > > static int vfat_revalidate(struct inode *dir, const struct qstr *name, > @@ -62,7 +57,7 @@ static int vfat_revalidate(struct inode *dir, const struct qstr *name, > /* This is not negative dentry. Always valid. */ > if (d_really_is_positive(dentry)) > return 1; > - return vfat_revalidate_shortname(dentry); > + return vfat_revalidate_shortname(dentry, dir); > } > > static int vfat_revalidate_ci(struct inode *dir, const struct qstr *name, > @@ -99,7 +94,7 @@ static int vfat_revalidate_ci(struct inode *dir, const struct qstr *name, > if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) > return 0; > > - return vfat_revalidate_shortname(dentry); > + return vfat_revalidate_shortname(dentry, dir); > } > > /* returns the length of a struct qstr, ignoring trailing dots */ Reviewed-by: Jeff Layton <jlayton@kernel.org> Al, you can also add my R-b to 1-12 too. Nice work!
On Thu, 2025-01-16 at 05:23 +0000, Al Viro wrote: > No need to mess with the boilerplate for obtaining what we already > have. Note that ceph is one of the "will want a path from filesystem > root if we want to talk to server" cases, so the name of the last > component is of little use - it is passed to fscrypt_d_revalidate() > and it's used to deal with (also crypt-related) case in request > marshalling, when encrypted name turns out to be too long. The former > is not a problem, but the latter is racy; that part will be handled > in the next commit. > > Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com> > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> > --- > fs/ceph/dir.c | 22 ++++------------------ > 1 file changed, 4 insertions(+), 18 deletions(-) > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > index c4c71c24221b..dc5f55bebad7 100644 > --- a/fs/ceph/dir.c > +++ b/fs/ceph/dir.c > @@ -1940,30 +1940,19 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, > /* > * Check if cached dentry can be trusted. > */ > -static int ceph_d_revalidate(struct inode *parent_dir, const struct qstr *name, > +static int ceph_d_revalidate(struct inode *dir, const struct qstr *name, > struct dentry *dentry, unsigned int flags) > { > struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; > struct ceph_client *cl = mdsc->fsc->client; > int valid = 0; > - struct dentry *parent; > - struct inode *dir, *inode; > + struct inode *inode; > > - valid = fscrypt_d_revalidate(parent_dir, name, dentry, flags); > + valid = fscrypt_d_revalidate(dir, name, dentry, flags); > if (valid <= 0) > return valid; > > - if (flags & LOOKUP_RCU) { > - parent = READ_ONCE(dentry->d_parent); > - dir = d_inode_rcu(parent); > - if (!dir) > - return -ECHILD; > - inode = d_inode_rcu(dentry); > - } else { > - parent = dget_parent(dentry); > - dir = d_inode(parent); > - inode = d_inode(dentry); > - } > + inode = d_inode_rcu(dentry); > > doutc(cl, "%p '%pd' inode %p offset 0x%llx nokey %d\n", > dentry, dentry, inode, ceph_dentry(dentry)->offset, > @@ -2039,9 +2028,6 @@ static int ceph_d_revalidate(struct inode *parent_dir, const struct qstr *name, > doutc(cl, "%p '%pd' %s\n", dentry, dentry, valid ? "valid" : "invalid"); > if (!valid) > ceph_dir_clear_complete(dir); > - > - if (!(flags & LOOKUP_RCU)) > - dput(parent); > return valid; > } > Reviewed-by: Jeff Layton <jlayton@kernel.org>
On Thu, 2025-01-16 at 05:23 +0000, Al Viro wrote: > ->d_revalidate() often needs to access dentry parent and name; that has > to be done carefully, since the locking environment varies from caller > to caller. We are not guaranteed that dentry in question will not be > moved right under us - not unless the filesystem is such that nothing > on it ever gets renamed. > > It can be dealt with, but that results in boilerplate code that isn't > even needed - the callers normally have just found the dentry via dcache > lookup and want to verify that it's in the right place; they already > have the values of ->d_parent and ->d_name stable. There is a couple > of exceptions (overlayfs and, to less extent, ecryptfs), but for the > majority of calls that song and dance is not needed at all. > > It's easier to make ecryptfs and overlayfs find and pass those values if > there's a ->d_revalidate() instance to be called, rather than doing that > in the instances. > > This commit only changes the calling conventions; making use of supplied > values is left to followups. > > NOTE: some instances need more than just the parent - things like CIFS > may need to build an entire path from filesystem root, so they need > more precautions than the usual boilerplate. This series doesn't > do anything to that need - these filesystems have to keep their locking > mechanisms (rename_lock loops, use of dentry_path_raw(), private rwsem > a-la v9fs). > > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> > --- > Documentation/filesystems/locking.rst | 3 ++- > Documentation/filesystems/porting.rst | 13 +++++++++++++ > Documentation/filesystems/vfs.rst | 3 ++- > fs/9p/vfs_dentry.c | 10 ++++++++-- > fs/afs/dir.c | 6 ++++-- > fs/ceph/dir.c | 5 +++-- > fs/coda/dir.c | 3 ++- > fs/crypto/fname.c | 3 ++- > fs/ecryptfs/dentry.c | 18 ++++++++++++++---- > fs/exfat/namei.c | 3 ++- > fs/fat/namei_vfat.c | 6 ++++-- > fs/fuse/dir.c | 3 ++- > fs/gfs2/dentry.c | 7 +++++-- > fs/hfs/sysdep.c | 3 ++- > fs/jfs/namei.c | 3 ++- > fs/kernfs/dir.c | 3 ++- > fs/namei.c | 18 ++++++++++-------- > fs/nfs/dir.c | 9 ++++++--- > fs/ocfs2/dcache.c | 3 ++- > fs/orangefs/dcache.c | 3 ++- > fs/overlayfs/super.c | 22 ++++++++++++++++++++-- > fs/proc/base.c | 6 ++++-- > fs/proc/fd.c | 3 ++- > fs/proc/generic.c | 6 ++++-- > fs/proc/proc_sysctl.c | 3 ++- > fs/smb/client/dir.c | 3 ++- > fs/tracefs/inode.c | 3 ++- > fs/vboxsf/dir.c | 3 ++- > include/linux/dcache.h | 3 ++- > include/linux/fscrypt.h | 7 ++++--- > 30 files changed, 133 insertions(+), 51 deletions(-) > > diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst > index f5e3676db954..146e7d8aa736 100644 > --- a/Documentation/filesystems/locking.rst > +++ b/Documentation/filesystems/locking.rst > @@ -17,7 +17,8 @@ dentry_operations > > prototypes:: > > - int (*d_revalidate)(struct dentry *, unsigned int); > + int (*d_revalidate)(struct inode *, const struct qstr *, > + struct dentry *, unsigned int); > int (*d_weak_revalidate)(struct dentry *, unsigned int); > int (*d_hash)(const struct dentry *, struct qstr *); > int (*d_compare)(const struct dentry *, > diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst > index 9ab2a3d6f2b4..b50c3ce36ef2 100644 > --- a/Documentation/filesystems/porting.rst > +++ b/Documentation/filesystems/porting.rst > @@ -1141,3 +1141,16 @@ pointer are gone. > > set_blocksize() takes opened struct file instead of struct block_device now > and it *must* be opened exclusive. > + > +--- > + > +** mandatory** > + > +->d_revalidate() gets two extra arguments - inode of parent directory and > +name our dentry is expected to have. Both are stable (dir is pinned in > +non-RCU case and will stay around during the call in RCU case, and name > +is guaranteed to stay unchanging). Your instance doesn't have to use > +either, but it often helps to avoid a lot of painful boilerplate. > +NOTE: if you need something like full path from the root of filesystem, > +you are still on your own - this assists with simple cases, but it's not > +magic. > diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst > index 0b18af3f954e..7c352ebaae98 100644 > --- a/Documentation/filesystems/vfs.rst > +++ b/Documentation/filesystems/vfs.rst > @@ -1251,7 +1251,8 @@ defined: > .. code-block:: c > > struct dentry_operations { > - int (*d_revalidate)(struct dentry *, unsigned int); > + int (*d_revalidate)(struct inode *, const struct qstr *, > + struct dentry *, unsigned int); > int (*d_weak_revalidate)(struct dentry *, unsigned int); > int (*d_hash)(const struct dentry *, struct qstr *); > int (*d_compare)(const struct dentry *, > diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c > index 01338d4c2d9e..872c1abe3295 100644 > --- a/fs/9p/vfs_dentry.c > +++ b/fs/9p/vfs_dentry.c > @@ -61,7 +61,7 @@ static void v9fs_dentry_release(struct dentry *dentry) > p9_fid_put(hlist_entry(p, struct p9_fid, dlist)); > } > > -static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) > +static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) > { > struct p9_fid *fid; > struct inode *inode; > @@ -99,9 +99,15 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) > return 1; > } > > +static int v9fs_lookup_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > +{ > + return __v9fs_lookup_revalidate(dentry, flags); > +} > + > const struct dentry_operations v9fs_cached_dentry_operations = { > .d_revalidate = v9fs_lookup_revalidate, > - .d_weak_revalidate = v9fs_lookup_revalidate, > + .d_weak_revalidate = __v9fs_lookup_revalidate, > .d_delete = v9fs_cached_dentry_delete, > .d_release = v9fs_dentry_release, > }; > diff --git a/fs/afs/dir.c b/fs/afs/dir.c > index ada363af5aab..9780013cd83a 100644 > --- a/fs/afs/dir.c > +++ b/fs/afs/dir.c > @@ -22,7 +22,8 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, > unsigned int flags); > static int afs_dir_open(struct inode *inode, struct file *file); > static int afs_readdir(struct file *file, struct dir_context *ctx); > -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); > +static int afs_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags); > static int afs_d_delete(const struct dentry *dentry); > static void afs_d_iput(struct dentry *dentry, struct inode *inode); > static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int nlen, > @@ -1093,7 +1094,8 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) > * - NOTE! the hit can be a negative hit too, so we can't assume we have an > * inode > */ > -static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct afs_vnode *vnode, *dir; > struct afs_fid fid; > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > index 0bf388e07a02..c4c71c24221b 100644 > --- a/fs/ceph/dir.c > +++ b/fs/ceph/dir.c > @@ -1940,7 +1940,8 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry, > /* > * Check if cached dentry can be trusted. > */ > -static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int ceph_d_revalidate(struct inode *parent_dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct ceph_mds_client *mdsc = ceph_sb_to_fs_client(dentry->d_sb)->mdsc; > struct ceph_client *cl = mdsc->fsc->client; > @@ -1948,7 +1949,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) > struct dentry *parent; > struct inode *dir, *inode; > > - valid = fscrypt_d_revalidate(dentry, flags); > + valid = fscrypt_d_revalidate(parent_dir, name, dentry, flags); > if (valid <= 0) > return valid; > > diff --git a/fs/coda/dir.c b/fs/coda/dir.c > index 4e552ba7bd43..a3e2dfeedfbf 100644 > --- a/fs/coda/dir.c > +++ b/fs/coda/dir.c > @@ -445,7 +445,8 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) > } > > /* called when a cache lookup succeeds */ > -static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) > +static int coda_dentry_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *de, unsigned int flags) > { > struct inode *inode; > struct coda_inode_info *cii; > diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c > index 0ad52fbe51c9..389f5b2bf63b 100644 > --- a/fs/crypto/fname.c > +++ b/fs/crypto/fname.c > @@ -574,7 +574,8 @@ EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); > * Validate dentries in encrypted directories to make sure we aren't potentially > * caching stale dentries after a key has been added. > */ > -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) > +int fscrypt_d_revalidate(struct inode *parent_dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct dentry *dir; > int err; > diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c > index acaa0825e9bb..1dfd5b81d831 100644 > --- a/fs/ecryptfs/dentry.c > +++ b/fs/ecryptfs/dentry.c > @@ -17,7 +17,9 @@ > > /** > * ecryptfs_d_revalidate - revalidate an ecryptfs dentry > - * @dentry: The ecryptfs dentry > + * @dir: inode of expected parent > + * @name: expected name > + * @dentry: dentry to revalidate > * @flags: lookup flags > * > * Called when the VFS needs to revalidate a dentry. This > @@ -28,7 +30,8 @@ > * Returns 1 if valid, 0 otherwise. > * > */ > -static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int ecryptfs_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); > int rc = 1; > @@ -36,8 +39,15 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) > if (flags & LOOKUP_RCU) > return -ECHILD; > > - if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) > - rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); > + if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE) { > + struct inode *lower_dir = ecryptfs_inode_to_lower(dir); > + struct name_snapshot n; > + > + take_dentry_name_snapshot(&n, lower_dentry); > + rc = lower_dentry->d_op->d_revalidate(lower_dir, &n.name, > + lower_dentry, flags); > + release_dentry_name_snapshot(&n); > + } > > if (d_really_is_positive(dentry)) { > struct inode *inode = d_inode(dentry); > diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c > index 97d2774760fe..e3b4feccba07 100644 > --- a/fs/exfat/namei.c > +++ b/fs/exfat/namei.c > @@ -31,7 +31,8 @@ static inline void exfat_d_version_set(struct dentry *dentry, > * If it happened, the negative dentry isn't actually negative anymore. So, > * drop it. > */ > -static int exfat_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int exfat_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > int ret; > > diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c > index 15bf32c21ac0..f9cbd5c6f932 100644 > --- a/fs/fat/namei_vfat.c > +++ b/fs/fat/namei_vfat.c > @@ -53,7 +53,8 @@ static int vfat_revalidate_shortname(struct dentry *dentry) > return ret; > } > > -static int vfat_revalidate(struct dentry *dentry, unsigned int flags) > +static int vfat_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > if (flags & LOOKUP_RCU) > return -ECHILD; > @@ -64,7 +65,8 @@ static int vfat_revalidate(struct dentry *dentry, unsigned int flags) > return vfat_revalidate_shortname(dentry); > } > > -static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags) > +static int vfat_revalidate_ci(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > if (flags & LOOKUP_RCU) > return -ECHILD; > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index 494ac372ace0..d9e9f26917eb 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -192,7 +192,8 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, > * the lookup once more. If the lookup results in the same inode, > * then refresh the attributes, timeouts and mark the dentry valid. > */ > -static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) > +static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *entry, unsigned int flags) > { > struct inode *inode; > struct dentry *parent; > diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c > index 2e215e8c3c88..86c338901fab 100644 > --- a/fs/gfs2/dentry.c > +++ b/fs/gfs2/dentry.c > @@ -21,7 +21,9 @@ > > /** > * gfs2_drevalidate - Check directory lookup consistency > - * @dentry: the mapping to check > + * @dir: expected parent directory inode > + * @name: expexted name > + * @dentry: dentry to check > * @flags: lookup flags > * > * Check to make sure the lookup necessary to arrive at this inode from its > @@ -30,7 +32,8 @@ > * Returns: 1 if the dentry is ok, 0 if it isn't > */ > > -static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) > +static int gfs2_drevalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct dentry *parent; > struct gfs2_sbd *sdp; > diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c > index 76fa02e3835b..ef54fc8093cf 100644 > --- a/fs/hfs/sysdep.c > +++ b/fs/hfs/sysdep.c > @@ -13,7 +13,8 @@ > > /* dentry case-handling: just lowercase everything */ > > -static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags) > +static int hfs_revalidate_dentry(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct inode *inode; > int diff; > diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c > index d68a4e6ac345..fc8ede43afde 100644 > --- a/fs/jfs/namei.c > +++ b/fs/jfs/namei.c > @@ -1576,7 +1576,8 @@ static int jfs_ci_compare(const struct dentry *dentry, > return result; > } > > -static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags) > +static int jfs_ci_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > /* > * This is not negative dentry. Always valid. > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c > index 458519e416fe..5f0f8b95f44c 100644 > --- a/fs/kernfs/dir.c > +++ b/fs/kernfs/dir.c > @@ -1109,7 +1109,8 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, > return ERR_PTR(rc); > } > > -static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) > +static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct kernfs_node *kn; > struct kernfs_root *root; > diff --git a/fs/namei.c b/fs/namei.c > index 9d30c7aa9aa6..77e5d136faaf 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -921,10 +921,11 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry) > return false; > } > > -static inline int d_revalidate(struct dentry *dentry, unsigned int flags) > +static inline int d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) > - return dentry->d_op->d_revalidate(dentry, flags); > + return dentry->d_op->d_revalidate(dir, name, dentry, flags); I know I sent a R-b for this, but one question: Suppose we get back a positive result (dentry is still good), but the name and dentry->d_name no longer match. Do we need to do any special handling in that case? > else > return 1; > } > @@ -1652,7 +1653,7 @@ static struct dentry *lookup_dcache(const struct qstr *name, > { > struct dentry *dentry = d_lookup(dir, name); > if (dentry) { > - int error = d_revalidate(dentry, flags); > + int error = d_revalidate(dir->d_inode, name, dentry, flags); > if (unlikely(error <= 0)) { > if (!error) > d_invalidate(dentry); > @@ -1737,19 +1738,20 @@ static struct dentry *lookup_fast(struct nameidata *nd) > if (read_seqcount_retry(&parent->d_seq, nd->seq)) > return ERR_PTR(-ECHILD); > > - status = d_revalidate(dentry, nd->flags); > + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); > if (likely(status > 0)) > return dentry; > if (!try_to_unlazy_next(nd, dentry)) > return ERR_PTR(-ECHILD); > if (status == -ECHILD) > /* we'd been told to redo it in non-rcu mode */ > - status = d_revalidate(dentry, nd->flags); > + status = d_revalidate(nd->inode, &nd->last, > + dentry, nd->flags); > } else { > dentry = __d_lookup(parent, &nd->last); > if (unlikely(!dentry)) > return NULL; > - status = d_revalidate(dentry, nd->flags); > + status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); > } > if (unlikely(status <= 0)) { > if (!status) > @@ -1777,7 +1779,7 @@ static struct dentry *__lookup_slow(const struct qstr *name, > if (IS_ERR(dentry)) > return dentry; > if (unlikely(!d_in_lookup(dentry))) { > - int error = d_revalidate(dentry, flags); > + int error = d_revalidate(inode, name, dentry, flags); > if (unlikely(error <= 0)) { > if (!error) { > d_invalidate(dentry); > @@ -3575,7 +3577,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, > if (d_in_lookup(dentry)) > break; > > - error = d_revalidate(dentry, nd->flags); > + error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags); > if (likely(error > 0)) > break; > if (error) > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > index 492cffd9d3d8..9910d9796f4c 100644 > --- a/fs/nfs/dir.c > +++ b/fs/nfs/dir.c > @@ -1814,7 +1814,8 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, > return ret; > } > > -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) > +static int nfs_lookup_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); > } > @@ -2025,7 +2026,8 @@ void nfs_d_prune_case_insensitive_aliases(struct inode *inode) > EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases); > > #if IS_ENABLED(CONFIG_NFS_V4) > -static int nfs4_lookup_revalidate(struct dentry *, unsigned int); > +static int nfs4_lookup_revalidate(struct inode *, const struct qstr *, > + struct dentry *, unsigned int); > > const struct dentry_operations nfs4_dentry_operations = { > .d_revalidate = nfs4_lookup_revalidate, > @@ -2260,7 +2262,8 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, > return nfs_do_lookup_revalidate(dir, dentry, flags); > } > > -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) > +static int nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > return __nfs_lookup_revalidate(dentry, flags, > nfs4_do_lookup_revalidate); > diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c > index a9b8688aaf30..ecb1ce6301c4 100644 > --- a/fs/ocfs2/dcache.c > +++ b/fs/ocfs2/dcache.c > @@ -32,7 +32,8 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) > } > > > -static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) > +static int ocfs2_dentry_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct inode *inode; > int ret = 0; /* if all else fails, just return false */ > diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c > index 395a00ed8ac7..c32c9a86e8d0 100644 > --- a/fs/orangefs/dcache.c > +++ b/fs/orangefs/dcache.c > @@ -92,7 +92,8 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) > * > * Should return 1 if dentry can still be trusted, else 0. > */ > -static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int orangefs_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > int ret; > unsigned long time = (unsigned long) dentry->d_fsdata; > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index fe511192f83c..86ae6f6da36b 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -91,7 +91,24 @@ static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak) > if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE) > ret = d->d_op->d_weak_revalidate(d, flags); > } else if (d->d_flags & DCACHE_OP_REVALIDATE) { > - ret = d->d_op->d_revalidate(d, flags); > + struct dentry *parent; > + struct inode *dir; > + struct name_snapshot n; > + > + if (flags & LOOKUP_RCU) { > + parent = READ_ONCE(d->d_parent); > + dir = d_inode_rcu(parent); > + if (!dir) > + return -ECHILD; > + } else { > + parent = dget_parent(d); > + dir = d_inode(parent); > + } > + take_dentry_name_snapshot(&n, d); > + ret = d->d_op->d_revalidate(dir, &n.name, d, flags); > + release_dentry_name_snapshot(&n); > + if (!(flags & LOOKUP_RCU)) > + dput(parent); > if (!ret) { > if (!(flags & LOOKUP_RCU)) > d_invalidate(d); > @@ -127,7 +144,8 @@ static int ovl_dentry_revalidate_common(struct dentry *dentry, > return ret; > } > > -static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags) > +static int ovl_dentry_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > return ovl_dentry_revalidate_common(dentry, flags, false); > } > diff --git a/fs/proc/base.c b/fs/proc/base.c > index 0edf14a9840e..fb5493d0edf0 100644 > --- a/fs/proc/base.c > +++ b/fs/proc/base.c > @@ -2058,7 +2058,8 @@ void pid_update_inode(struct task_struct *task, struct inode *inode) > * performed a setuid(), etc. > * > */ > -static int pid_revalidate(struct dentry *dentry, unsigned int flags) > +static int pid_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct inode *inode; > struct task_struct *task; > @@ -2191,7 +2192,8 @@ static int dname_to_vma_addr(struct dentry *dentry, > return 0; > } > > -static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int map_files_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > unsigned long vm_start, vm_end; > bool exact_vma_exists = false; > diff --git a/fs/proc/fd.c b/fs/proc/fd.c > index 24baf23e864f..37aa778d1af7 100644 > --- a/fs/proc/fd.c > +++ b/fs/proc/fd.c > @@ -140,7 +140,8 @@ static void tid_fd_update_inode(struct task_struct *task, struct inode *inode, > security_task_to_inode(task, inode); > } > > -static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) > +static int tid_fd_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct task_struct *task; > struct inode *inode; > diff --git a/fs/proc/generic.c b/fs/proc/generic.c > index dbe82cf23ee4..8ec90826a49e 100644 > --- a/fs/proc/generic.c > +++ b/fs/proc/generic.c > @@ -216,7 +216,8 @@ void proc_free_inum(unsigned int inum) > ida_free(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); > } > > -static int proc_misc_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int proc_misc_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > if (flags & LOOKUP_RCU) > return -ECHILD; > @@ -343,7 +344,8 @@ static const struct file_operations proc_dir_operations = { > .iterate_shared = proc_readdir, > }; > > -static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int proc_net_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > return 0; > } > diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c > index 27a283d85a6e..cc9d74a06ff0 100644 > --- a/fs/proc/proc_sysctl.c > +++ b/fs/proc/proc_sysctl.c > @@ -884,7 +884,8 @@ static const struct inode_operations proc_sys_dir_operations = { > .getattr = proc_sys_getattr, > }; > > -static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) > +static int proc_sys_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > if (flags & LOOKUP_RCU) > return -ECHILD; > diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c > index 864b194dbaa0..8c5d44ee91ed 100644 > --- a/fs/smb/client/dir.c > +++ b/fs/smb/client/dir.c > @@ -737,7 +737,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, > } > > static int > -cifs_d_revalidate(struct dentry *direntry, unsigned int flags) > +cifs_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *direntry, unsigned int flags) > { > struct inode *inode; > int rc; > diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c > index cfc614c638da..53214499e384 100644 > --- a/fs/tracefs/inode.c > +++ b/fs/tracefs/inode.c > @@ -457,7 +457,8 @@ static void tracefs_d_release(struct dentry *dentry) > eventfs_d_release(dentry); > } > > -static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) > +static int tracefs_d_revalidate(struct inode *inode, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > struct eventfs_inode *ei = dentry->d_fsdata; > > diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c > index 5f1a14d5b927..a859ac9b74ba 100644 > --- a/fs/vboxsf/dir.c > +++ b/fs/vboxsf/dir.c > @@ -192,7 +192,8 @@ const struct file_operations vboxsf_dir_fops = { > * This is called during name resolution/lookup to check if the @dentry in > * the cache is still valid. the job is handled by vboxsf_inode_revalidate. > */ > -static int vboxsf_dentry_revalidate(struct dentry *dentry, unsigned int flags) > +static int vboxsf_dentry_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > if (flags & LOOKUP_RCU) > return -ECHILD; > diff --git a/include/linux/dcache.h b/include/linux/dcache.h > index 8bc567a35718..4a6bdadf2f29 100644 > --- a/include/linux/dcache.h > +++ b/include/linux/dcache.h > @@ -144,7 +144,8 @@ enum d_real_type { > }; > > struct dentry_operations { > - int (*d_revalidate)(struct dentry *, unsigned int); > + int (*d_revalidate)(struct inode *, const struct qstr *, > + struct dentry *, unsigned int); > int (*d_weak_revalidate)(struct dentry *, unsigned int); > int (*d_hash)(const struct dentry *, struct qstr *); > int (*d_compare)(const struct dentry *, > diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h > index 772f822dc6b8..18855cb44b1c 100644 > --- a/include/linux/fscrypt.h > +++ b/include/linux/fscrypt.h > @@ -192,7 +192,8 @@ struct fscrypt_operations { > unsigned int *num_devs); > }; > > -int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); > +int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags); > > static inline struct fscrypt_inode_info * > fscrypt_get_inode_info(const struct inode *inode) > @@ -711,8 +712,8 @@ static inline u64 fscrypt_fname_siphash(const struct inode *dir, > return 0; > } > > -static inline int fscrypt_d_revalidate(struct dentry *dentry, > - unsigned int flags) > +static inline int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, > + struct dentry *dentry, unsigned int flags) > { > return 1; > }
On Fri, Jan 17, 2025 at 01:55:01PM -0500, Jeff Layton wrote: > > +static inline int d_revalidate(struct inode *dir, const struct qstr *name, > > + struct dentry *dentry, unsigned int flags) > > { > > if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) > > - return dentry->d_op->d_revalidate(dentry, flags); > > + return dentry->d_op->d_revalidate(dir, name, dentry, flags); > > I know I sent a R-b for this, but one question: > > Suppose we get back a positive result (dentry is still good), but the > name and dentry->d_name no longer match. Do we need to do any special > handling in that case? Not really - it's the same situation we'd have if it got renamed right after we'd concluded that everything's fine, after all. We can't spin there indefinitely, rechecking the name for changes ;-) In RCU mode ->d_seq mismatch guaranteed to catch that, in non-RCU... well, there's really nothing we could do - rename could've bloody well happened just as we'd completed pathname resolution.
On Wed, Jan 22, 2025 at 08:27:41PM +0000, David Howells wrote: > Al Viro <viro@zeniv.linux.org.uk> wrote: > > > - _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); > > + _enter("{%lu},{%s},", dir->i_ino, name->name); > > I don't think that name->name is guaranteed to be NUL-terminated after > name->len characters. The following: > > _enter("{%lu},{%*s},", dir->i_ino, name->len, name->name); > > might be better, though: > > _enter("{%lu},{%*.*s},", dir->i_ino, name->len, name->len, name->name); > > might be necessary. Good catch (and that definitely needs to be documented in previous commit), but what's wrong with _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name); After looking through the rest of the series, fuse and orangefs patches need to be adjusted. Not caught in testing since there similar braino manifests as stray invalidates ;-/
On Wed, Jan 22, 2025 at 09:01:24PM +0000, Al Viro wrote: > On Wed, Jan 22, 2025 at 08:27:41PM +0000, David Howells wrote: > > Al Viro <viro@zeniv.linux.org.uk> wrote: > > > > > - _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); > > > + _enter("{%lu},{%s},", dir->i_ino, name->name); > > > > I don't think that name->name is guaranteed to be NUL-terminated after > > name->len characters. The following: > > > > _enter("{%lu},{%*s},", dir->i_ino, name->len, name->name); > > > > might be better, though: > > > > _enter("{%lu},{%*.*s},", dir->i_ino, name->len, name->len, name->name); > > > > might be necessary. > > Good catch (and that definitely needs to be documented in previous commit), > but what's wrong with > _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name); IOW, are you OK with the following? commit bf61e4013ab1cb9a819303faca018e7b7cbaf3e7 Author: Al Viro <viro@zeniv.linux.org.uk> Date: Fri Jan 3 00:27:27 2025 -0500 afs_d_revalidate(): use stable name and parent inode passed by caller No need to bother with boilerplate for obtaining the latter and for the former we really should not count upon ->d_name.name remaining stable under us. Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 9780013cd83a..e04cffe4beb1 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -607,19 +607,19 @@ static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, * Do a lookup of a single name in a directory * - just returns the FID the dentry name maps to if found */ -static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, +static int afs_do_lookup_one(struct inode *dir, const struct qstr *name, struct afs_fid *fid, struct key *key, afs_dataversion_t *_dir_version) { struct afs_super_info *as = dir->i_sb->s_fs_info; struct afs_lookup_one_cookie cookie = { .ctx.actor = afs_lookup_one_filldir, - .name = dentry->d_name, + .name = *name, .fid.vid = as->volume->vid }; int ret; - _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); + _enter("{%lu},{%.*s},", dir->i_ino, name->len, name->name); /* search the directory */ ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version); @@ -1052,21 +1052,12 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, /* * Check the validity of a dentry under RCU conditions. */ -static int afs_d_revalidate_rcu(struct dentry *dentry) +static int afs_d_revalidate_rcu(struct afs_vnode *dvnode, struct dentry *dentry) { - struct afs_vnode *dvnode; - struct dentry *parent; - struct inode *dir; long dir_version, de_version; _enter("%p", dentry); - /* Check the parent directory is still valid first. */ - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - dvnode = AFS_FS_I(dir); if (test_bit(AFS_VNODE_DELETED, &dvnode->flags)) return -ECHILD; @@ -1097,9 +1088,8 @@ static int afs_d_revalidate_rcu(struct dentry *dentry) static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { - struct afs_vnode *vnode, *dir; + struct afs_vnode *vnode, *dir = AFS_FS_I(parent_dir); struct afs_fid fid; - struct dentry *parent; struct inode *inode; struct key *key; afs_dataversion_t dir_version, invalid_before; @@ -1107,7 +1097,7 @@ static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, int ret; if (flags & LOOKUP_RCU) - return afs_d_revalidate_rcu(dentry); + return afs_d_revalidate_rcu(dir, dentry); if (d_really_is_positive(dentry)) { vnode = AFS_FS_I(d_inode(dentry)); @@ -1122,14 +1112,9 @@ static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, if (IS_ERR(key)) key = NULL; - /* Hold the parent dentry so we can peer at it */ - parent = dget_parent(dentry); - dir = AFS_FS_I(d_inode(parent)); - /* validate the parent directory */ ret = afs_validate(dir, key); if (ret == -ERESTARTSYS) { - dput(parent); key_put(key); return ret; } @@ -1157,7 +1142,7 @@ static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, afs_stat_v(dir, n_reval); /* search the directory for this vnode */ - ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version); + ret = afs_do_lookup_one(&dir->netfs.inode, name, &fid, key, &dir_version); switch (ret) { case 0: /* the filename maps to something */ @@ -1201,22 +1186,19 @@ static int afs_d_revalidate(struct inode *parent_dir, const struct qstr *name, goto out_valid; default: - _debug("failed to iterate dir %pd: %d", - parent, ret); + _debug("failed to iterate parent %pd2: %d", dentry, ret); goto not_found; } out_valid: dentry->d_fsdata = (void *)(unsigned long)dir_version; out_valid_noupdate: - dput(parent); key_put(key); _leave(" = 1 [valid]"); return 1; not_found: _debug("dropping dentry %pd2", dentry); - dput(parent); key_put(key); _leave(" = 0 [bad]");
Al Viro <viro@zeniv.linux.org.uk> wrote: > IOW, are you OK with the following? > > commit bf61e4013ab1cb9a819303faca018e7b7cbaf3e7 > Author: Al Viro <viro@zeniv.linux.org.uk> > Date: Fri Jan 3 00:27:27 2025 -0500 > > afs_d_revalidate(): use stable name and parent inode passed by caller > > No need to bother with boilerplate for obtaining the latter and for > the former we really should not count upon ->d_name.name remaining > stable under us. > > Reviewed-by: Jeff Layton <jlayton@kernel.org> > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk> Acked-by: David Howells <dhowells@redhat.com>
diff --git a/fs/dcache.c b/fs/dcache.c index b4d5e9e1e43d..ea0f0bea511b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2748,9 +2748,7 @@ static void swap_names(struct dentry *dentry, struct dentry *target) /* * Both are internal. */ - unsigned int i; - BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); - for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { + for (int i = 0; i < DNAME_INLINE_WORDS; i++) { swap(((long *) &dentry->d_iname)[i], ((long *) &target->d_iname)[i]); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index bff956f7b2b9..42dd89beaf4e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -68,15 +68,17 @@ extern const struct qstr dotdot_name; * large memory footprint increase). */ #ifdef CONFIG_64BIT -# define DNAME_INLINE_LEN 40 /* 192 bytes */ +# define DNAME_INLINE_WORDS 5 /* 192 bytes */ #else # ifdef CONFIG_SMP -# define DNAME_INLINE_LEN 36 /* 128 bytes */ +# define DNAME_INLINE_WORDS 9 /* 128 bytes */ # else -# define DNAME_INLINE_LEN 44 /* 128 bytes */ +# define DNAME_INLINE_WORDS 11 /* 128 bytes */ # endif #endif +#define DNAME_INLINE_LEN (DNAME_INLINE_WORDS*sizeof(unsigned long)) + #define d_lock d_lockref.lock struct dentry {