diff mbox series

[2/2,v3] ceph: add ceph_lock_info support for file_lock

Message ID 20221118020642.472484-3-xiubli@redhat.com
State New
Headers show
Series ceph: fix the use-after-free bug for file_lock | expand

Commit Message

Xiubo Li Nov. 18, 2022, 2:06 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

When ceph releasing the file_lock it will try to get the inode pointer
from the fl->fl_file, which the memory could already be released by
another thread in filp_close(). Because in VFS layer the fl->fl_file
doesn't increase the file's reference counter.

Will switch to use ceph dedicate lock info to track the inode.

And in ceph_fl_release_lock() we should skip all the operations if
the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
inserting it to the inode lock list, which is when copying the lock.

Cc: stable@vger.kernel.org
Cc: Jeff Layton <jlayton@kernel.org>
URL: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/locks.c                 | 20 ++++++++++++++++++--
 include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
 include/linux/fs.h              |  2 ++
 3 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/ceph/ceph_fs_fl.h

Comments

Ilya Dryomov Dec. 12, 2022, 5:56 p.m. UTC | #1
On Fri, Nov 18, 2022 at 3:07 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> When ceph releasing the file_lock it will try to get the inode pointer
> from the fl->fl_file, which the memory could already be released by
> another thread in filp_close(). Because in VFS layer the fl->fl_file
> doesn't increase the file's reference counter.
>
> Will switch to use ceph dedicate lock info to track the inode.
>
> And in ceph_fl_release_lock() we should skip all the operations if
> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
> inserting it to the inode lock list, which is when copying the lock.
>
> Cc: stable@vger.kernel.org
> Cc: Jeff Layton <jlayton@kernel.org>
> URL: https://tracker.ceph.com/issues/57986
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/locks.c                 | 20 ++++++++++++++++++--
>  include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
>  include/linux/fs.h              |  2 ++
>  3 files changed, 37 insertions(+), 2 deletions(-)
>  create mode 100644 include/linux/ceph/ceph_fs_fl.h
>
> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> index b191426bf880..621f38f10a88 100644
> --- a/fs/ceph/locks.c
> +++ b/fs/ceph/locks.c
> @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>  {
>         struct inode *inode = file_inode(dst->fl_file);
>         atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> +       dst->fl_u.ceph_fl.fl_inode = igrab(inode);
>  }
>
> +/*
> + * Do not use the 'fl->fl_file' in release function, which
> + * is possibly already released by another thread.
> + */
>  static void ceph_fl_release_lock(struct file_lock *fl)
>  {
> -       struct inode *inode = file_inode(fl->fl_file);
> -       struct ceph_inode_info *ci = ceph_inode(inode);
> +       struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
> +       struct ceph_inode_info *ci;
> +
> +       /*
> +        * If inode is NULL it should be a request file_lock,
> +        * nothing we can do.
> +        */
> +       if (!inode)
> +               return;
> +
> +       ci = ceph_inode(inode);
>         if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>                 /* clear error when all locks are released */
>                 spin_lock(&ci->i_ceph_lock);
>                 ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
>                 spin_unlock(&ci->i_ceph_lock);
>         }
> +       fl->fl_u.ceph_fl.fl_inode = NULL;
> +       iput(inode);
>  }
>
>  static const struct file_lock_operations ceph_fl_lock_ops = {
> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
> new file mode 100644
> index 000000000000..ad1cf96329f9
> --- /dev/null
> +++ b/include/linux/ceph/ceph_fs_fl.h
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * ceph_fs_fl.h - Ceph lock info
> + *
> + * LGPL2
> + */
> +
> +#ifndef CEPH_FS_FL_H
> +#define CEPH_FS_FL_H
> +
> +#include <linux/fs.h>
> +
> +struct ceph_lock_info {
> +       struct inode *fl_inode;
> +};
> +
> +#endif
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index d6cb42b7e91c..2b03d5e375d7 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>
>  /* that will die - we need it for nfs_lock_info */
>  #include <linux/nfs_fs_i.h>
> +#include <linux/ceph/ceph_fs_fl.h>
>
>  /*
>   * struct file_lock represents a generic "file lock". It's used to represent
> @@ -1119,6 +1120,7 @@ struct file_lock {
>                         int state;              /* state of grant or error if -ve */
>                         unsigned int    debug_id;
>                 } afs;
> +               struct ceph_lock_info   ceph_fl;

Hi Xiubo and Jeff,

Xiubo, instead of defining struct ceph_lock_info and including
a CephFS-specific header file in linux/fs.h, I think we should repeat
what was done for AFS -- particularly given that ceph_lock_info ends up
being a dummy type that isn't mentioned anywhere else.

Jeff, could you please ack this with your file locking hat on?

Thanks,

                Ilya
Jeff Layton Dec. 12, 2022, 6:02 p.m. UTC | #2
On Mon, 2022-12-12 at 18:56 +0100, Ilya Dryomov wrote:
> On Fri, Nov 18, 2022 at 3:07 AM <xiubli@redhat.com> wrote:
> > 
> > From: Xiubo Li <xiubli@redhat.com>
> > 
> > When ceph releasing the file_lock it will try to get the inode pointer
> > from the fl->fl_file, which the memory could already be released by
> > another thread in filp_close(). Because in VFS layer the fl->fl_file
> > doesn't increase the file's reference counter.
> > 
> > Will switch to use ceph dedicate lock info to track the inode.
> > 
> > And in ceph_fl_release_lock() we should skip all the operations if
> > the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
> > the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
> > inserting it to the inode lock list, which is when copying the lock.
> > 
> > Cc: stable@vger.kernel.org
> > Cc: Jeff Layton <jlayton@kernel.org>
> > URL: https://tracker.ceph.com/issues/57986
> > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > ---
> >  fs/ceph/locks.c                 | 20 ++++++++++++++++++--
> >  include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
> >  include/linux/fs.h              |  2 ++
> >  3 files changed, 37 insertions(+), 2 deletions(-)
> >  create mode 100644 include/linux/ceph/ceph_fs_fl.h
> > 
> > diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
> > index b191426bf880..621f38f10a88 100644
> > --- a/fs/ceph/locks.c
> > +++ b/fs/ceph/locks.c
> > @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
> >  {
> >         struct inode *inode = file_inode(dst->fl_file);
> >         atomic_inc(&ceph_inode(inode)->i_filelock_ref);
> > +       dst->fl_u.ceph_fl.fl_inode = igrab(inode);
> >  }
> > 
> > +/*
> > + * Do not use the 'fl->fl_file' in release function, which
> > + * is possibly already released by another thread.
> > + */
> >  static void ceph_fl_release_lock(struct file_lock *fl)
> >  {
> > -       struct inode *inode = file_inode(fl->fl_file);
> > -       struct ceph_inode_info *ci = ceph_inode(inode);
> > +       struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
> > +       struct ceph_inode_info *ci;
> > +
> > +       /*
> > +        * If inode is NULL it should be a request file_lock,
> > +        * nothing we can do.
> > +        */
> > +       if (!inode)
> > +               return;
> > +
> > +       ci = ceph_inode(inode);
> >         if (atomic_dec_and_test(&ci->i_filelock_ref)) {
> >                 /* clear error when all locks are released */
> >                 spin_lock(&ci->i_ceph_lock);
> >                 ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
> >                 spin_unlock(&ci->i_ceph_lock);
> >         }
> > +       fl->fl_u.ceph_fl.fl_inode = NULL;
> > +       iput(inode);
> >  }
> > 
> >  static const struct file_lock_operations ceph_fl_lock_ops = {
> > diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
> > new file mode 100644
> > index 000000000000..ad1cf96329f9
> > --- /dev/null
> > +++ b/include/linux/ceph/ceph_fs_fl.h
> > @@ -0,0 +1,17 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +/*
> > + * ceph_fs_fl.h - Ceph lock info
> > + *
> > + * LGPL2
> > + */
> > +
> > +#ifndef CEPH_FS_FL_H
> > +#define CEPH_FS_FL_H
> > +
> > +#include <linux/fs.h>
> > +
> > +struct ceph_lock_info {
> > +       struct inode *fl_inode;
> > +};
> > +
> > +#endif
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index d6cb42b7e91c..2b03d5e375d7 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
> > 
> >  /* that will die - we need it for nfs_lock_info */
> >  #include <linux/nfs_fs_i.h>
> > +#include <linux/ceph/ceph_fs_fl.h>
> > 
> >  /*
> >   * struct file_lock represents a generic "file lock". It's used to represent
> > @@ -1119,6 +1120,7 @@ struct file_lock {
> >                         int state;              /* state of grant or error if -ve */
> >                         unsigned int    debug_id;
> >                 } afs;
> > +               struct ceph_lock_info   ceph_fl;
> 
> Hi Xiubo and Jeff,
> 
> Xiubo, instead of defining struct ceph_lock_info and including
> a CephFS-specific header file in linux/fs.h, I think we should repeat
> what was done for AFS -- particularly given that ceph_lock_info ends up
> being a dummy type that isn't mentioned anywhere else.
> 
> Jeff, could you please ack this with your file locking hat on?
> 

ACK. I think that would be cleaner.

Thanks
Xiubo Li Dec. 13, 2022, 1:25 a.m. UTC | #3
On 13/12/2022 02:02, Jeff Layton wrote:
> On Mon, 2022-12-12 at 18:56 +0100, Ilya Dryomov wrote:
>> On Fri, Nov 18, 2022 at 3:07 AM <xiubli@redhat.com> wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> When ceph releasing the file_lock it will try to get the inode pointer
>>> from the fl->fl_file, which the memory could already be released by
>>> another thread in filp_close(). Because in VFS layer the fl->fl_file
>>> doesn't increase the file's reference counter.
>>>
>>> Will switch to use ceph dedicate lock info to track the inode.
>>>
>>> And in ceph_fl_release_lock() we should skip all the operations if
>>> the fl->fl_u.ceph_fl.fl_inode is not set, which should come from
>>> the request file_lock. And we will set fl->fl_u.ceph_fl.fl_inode when
>>> inserting it to the inode lock list, which is when copying the lock.
>>>
>>> Cc: stable@vger.kernel.org
>>> Cc: Jeff Layton <jlayton@kernel.org>
>>> URL: https://tracker.ceph.com/issues/57986
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>   fs/ceph/locks.c                 | 20 ++++++++++++++++++--
>>>   include/linux/ceph/ceph_fs_fl.h | 17 +++++++++++++++++
>>>   include/linux/fs.h              |  2 ++
>>>   3 files changed, 37 insertions(+), 2 deletions(-)
>>>   create mode 100644 include/linux/ceph/ceph_fs_fl.h
>>>
>>> diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
>>> index b191426bf880..621f38f10a88 100644
>>> --- a/fs/ceph/locks.c
>>> +++ b/fs/ceph/locks.c
>>> @@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
>>>   {
>>>          struct inode *inode = file_inode(dst->fl_file);
>>>          atomic_inc(&ceph_inode(inode)->i_filelock_ref);
>>> +       dst->fl_u.ceph_fl.fl_inode = igrab(inode);
>>>   }
>>>
>>> +/*
>>> + * Do not use the 'fl->fl_file' in release function, which
>>> + * is possibly already released by another thread.
>>> + */
>>>   static void ceph_fl_release_lock(struct file_lock *fl)
>>>   {
>>> -       struct inode *inode = file_inode(fl->fl_file);
>>> -       struct ceph_inode_info *ci = ceph_inode(inode);
>>> +       struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
>>> +       struct ceph_inode_info *ci;
>>> +
>>> +       /*
>>> +        * If inode is NULL it should be a request file_lock,
>>> +        * nothing we can do.
>>> +        */
>>> +       if (!inode)
>>> +               return;
>>> +
>>> +       ci = ceph_inode(inode);
>>>          if (atomic_dec_and_test(&ci->i_filelock_ref)) {
>>>                  /* clear error when all locks are released */
>>>                  spin_lock(&ci->i_ceph_lock);
>>>                  ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
>>>                  spin_unlock(&ci->i_ceph_lock);
>>>          }
>>> +       fl->fl_u.ceph_fl.fl_inode = NULL;
>>> +       iput(inode);
>>>   }
>>>
>>>   static const struct file_lock_operations ceph_fl_lock_ops = {
>>> diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
>>> new file mode 100644
>>> index 000000000000..ad1cf96329f9
>>> --- /dev/null
>>> +++ b/include/linux/ceph/ceph_fs_fl.h
>>> @@ -0,0 +1,17 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * ceph_fs_fl.h - Ceph lock info
>>> + *
>>> + * LGPL2
>>> + */
>>> +
>>> +#ifndef CEPH_FS_FL_H
>>> +#define CEPH_FS_FL_H
>>> +
>>> +#include <linux/fs.h>
>>> +
>>> +struct ceph_lock_info {
>>> +       struct inode *fl_inode;
>>> +};
>>> +
>>> +#endif
>>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>>> index d6cb42b7e91c..2b03d5e375d7 100644
>>> --- a/include/linux/fs.h
>>> +++ b/include/linux/fs.h
>>> @@ -1066,6 +1066,7 @@ bool opens_in_grace(struct net *);
>>>
>>>   /* that will die - we need it for nfs_lock_info */
>>>   #include <linux/nfs_fs_i.h>
>>> +#include <linux/ceph/ceph_fs_fl.h>
>>>
>>>   /*
>>>    * struct file_lock represents a generic "file lock". It's used to represent
>>> @@ -1119,6 +1120,7 @@ struct file_lock {
>>>                          int state;              /* state of grant or error if -ve */
>>>                          unsigned int    debug_id;
>>>                  } afs;
>>> +               struct ceph_lock_info   ceph_fl;
>> Hi Xiubo and Jeff,
>>
>> Xiubo, instead of defining struct ceph_lock_info and including
>> a CephFS-specific header file in linux/fs.h, I think we should repeat
>> what was done for AFS -- particularly given that ceph_lock_info ends up
>> being a dummy type that isn't mentioned anywhere else.
>>
>> Jeff, could you please ack this with your file locking hat on?
>>
> ACK. I think that would be cleaner.

Sure, will fix this.

Thanks,

- Xiubo


> Thanks
diff mbox series

Patch

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index b191426bf880..621f38f10a88 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -34,18 +34,34 @@  static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
 	struct inode *inode = file_inode(dst->fl_file);
 	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	dst->fl_u.ceph_fl.fl_inode = igrab(inode);
 }
 
+/*
+ * Do not use the 'fl->fl_file' in release function, which
+ * is possibly already released by another thread.
+ */
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
-	struct inode *inode = file_inode(fl->fl_file);
-	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct inode *inode = fl->fl_u.ceph_fl.fl_inode;
+	struct ceph_inode_info *ci;
+
+	/*
+	 * If inode is NULL it should be a request file_lock,
+	 * nothing we can do.
+	 */
+	if (!inode)
+		return;
+
+	ci = ceph_inode(inode);
 	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
 		/* clear error when all locks are released */
 		spin_lock(&ci->i_ceph_lock);
 		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
 		spin_unlock(&ci->i_ceph_lock);
 	}
+	fl->fl_u.ceph_fl.fl_inode = NULL;
+	iput(inode);
 }
 
 static const struct file_lock_operations ceph_fl_lock_ops = {
diff --git a/include/linux/ceph/ceph_fs_fl.h b/include/linux/ceph/ceph_fs_fl.h
new file mode 100644
index 000000000000..ad1cf96329f9
--- /dev/null
+++ b/include/linux/ceph/ceph_fs_fl.h
@@ -0,0 +1,17 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ceph_fs_fl.h - Ceph lock info
+ *
+ * LGPL2
+ */
+
+#ifndef CEPH_FS_FL_H
+#define CEPH_FS_FL_H
+
+#include <linux/fs.h>
+
+struct ceph_lock_info {
+	struct inode *fl_inode;
+};
+
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d6cb42b7e91c..2b03d5e375d7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,6 +1066,7 @@  bool opens_in_grace(struct net *);
 
 /* that will die - we need it for nfs_lock_info */
 #include <linux/nfs_fs_i.h>
+#include <linux/ceph/ceph_fs_fl.h>
 
 /*
  * struct file_lock represents a generic "file lock". It's used to represent
@@ -1119,6 +1120,7 @@  struct file_lock {
 			int state;		/* state of grant or error if -ve */
 			unsigned int	debug_id;
 		} afs;
+		struct ceph_lock_info	ceph_fl;
 	} fl_u;
 } __randomize_layout;