Message ID | 20201028142004.GA100353@mtl-vdi-166.wap.labs.mlnx |
---|---|
State | New |
Headers | show |
Series | vhost: Use mutex to protect vq_irq setup | expand |
On 2020/10/28 下午10:20, Eli Cohen wrote: > Both irq_bypass_register_producer() and irq_bypass_unregister_producer() > require process context to run. Change the call context lock from > spinlock to mutex to protect the setup process to avoid deadlocks. > > Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") > Signed-off-by: Eli Cohen <elic@nvidia.com> Hi Eli: During review we spot that the spinlock is not necessary. And it was already protected by vq mutex. So it was removed in this commit: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=86e182fe12ee5869022614457037097c70fe2ed1 Thanks > --- > drivers/vhost/vdpa.c | 10 +++++----- > drivers/vhost/vhost.c | 6 +++--- > drivers/vhost/vhost.h | 3 ++- > 3 files changed, 10 insertions(+), 9 deletions(-) > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > index be783592fe58..0a744f2b6e76 100644 > --- a/drivers/vhost/vdpa.c > +++ b/drivers/vhost/vdpa.c > @@ -98,26 +98,26 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) > return; > > irq = ops->get_vq_irq(vdpa, qid); > - spin_lock(&vq->call_ctx.ctx_lock); > + mutex_lock(&vq->call_ctx.ctx_lock); > irq_bypass_unregister_producer(&vq->call_ctx.producer); > if (!vq->call_ctx.ctx || irq < 0) { > - spin_unlock(&vq->call_ctx.ctx_lock); > + mutex_unlock(&vq->call_ctx.ctx_lock); > return; > } > > vq->call_ctx.producer.token = vq->call_ctx.ctx; > vq->call_ctx.producer.irq = irq; > ret = irq_bypass_register_producer(&vq->call_ctx.producer); > - spin_unlock(&vq->call_ctx.ctx_lock); > + mutex_unlock(&vq->call_ctx.ctx_lock); > } > > static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) > { > struct vhost_virtqueue *vq = &v->vqs[qid]; > > - spin_lock(&vq->call_ctx.ctx_lock); > + mutex_lock(&vq->call_ctx.ctx_lock); > irq_bypass_unregister_producer(&vq->call_ctx.producer); > - spin_unlock(&vq->call_ctx.ctx_lock); > + mutex_unlock(&vq->call_ctx.ctx_lock); > } > > static void vhost_vdpa_reset(struct vhost_vdpa *v) > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index 9ad45e1d27f0..938239e11455 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -302,7 +302,7 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) > { > call_ctx->ctx = NULL; > memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); > - spin_lock_init(&call_ctx->ctx_lock); > + mutex_init(&call_ctx->ctx_lock); > } > > static void vhost_vq_reset(struct vhost_dev *dev, > @@ -1650,9 +1650,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > break; > } > > - spin_lock(&vq->call_ctx.ctx_lock); > + mutex_lock(&vq->call_ctx.ctx_lock); > swap(ctx, vq->call_ctx.ctx); > - spin_unlock(&vq->call_ctx.ctx_lock); > + mutex_unlock(&vq->call_ctx.ctx_lock); > break; > case VHOST_SET_VRING_ERR: > if (copy_from_user(&f, argp, sizeof f)) { > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 9032d3c2a9f4..e8855ea04205 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -64,7 +64,8 @@ enum vhost_uaddr_type { > struct vhost_vring_call { > struct eventfd_ctx *ctx; > struct irq_bypass_producer producer; > - spinlock_t ctx_lock; > + /* protect vq irq setup */ > + struct mutex ctx_lock; > }; > > /* The virtqueue structure describes a queue attached to a device. */
On Thu, Oct 29, 2020 at 03:03:24PM +0800, Jason Wang wrote: > > On 2020/10/28 下午10:20, Eli Cohen wrote: > > Both irq_bypass_register_producer() and irq_bypass_unregister_producer() > > require process context to run. Change the call context lock from > > spinlock to mutex to protect the setup process to avoid deadlocks. > > > > Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") > > Signed-off-by: Eli Cohen <elic@nvidia.com> > > > Hi Eli: > > During review we spot that the spinlock is not necessary. And it was already > protected by vq mutex. So it was removed in this commit: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=86e182fe12ee5869022614457037097c70fe2ed1 > > Thanks > I see, thanks. BTW, while testing irq bypassing, I noticed that qemu started crashing and I fail to boot the VM? Is that a known issue. I checked using updated master branch of qemu updated yesterday. Any ideas how to check this further? Did anyone actually check that irq bypassing works? > > > --- > > drivers/vhost/vdpa.c | 10 +++++----- > > drivers/vhost/vhost.c | 6 +++--- > > drivers/vhost/vhost.h | 3 ++- > > 3 files changed, 10 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > > index be783592fe58..0a744f2b6e76 100644 > > --- a/drivers/vhost/vdpa.c > > +++ b/drivers/vhost/vdpa.c > > @@ -98,26 +98,26 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) > > return; > > irq = ops->get_vq_irq(vdpa, qid); > > - spin_lock(&vq->call_ctx.ctx_lock); > > + mutex_lock(&vq->call_ctx.ctx_lock); > > irq_bypass_unregister_producer(&vq->call_ctx.producer); > > if (!vq->call_ctx.ctx || irq < 0) { > > - spin_unlock(&vq->call_ctx.ctx_lock); > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > return; > > } > > vq->call_ctx.producer.token = vq->call_ctx.ctx; > > vq->call_ctx.producer.irq = irq; > > ret = irq_bypass_register_producer(&vq->call_ctx.producer); > > - spin_unlock(&vq->call_ctx.ctx_lock); > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > } > > static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) > > { > > struct vhost_virtqueue *vq = &v->vqs[qid]; > > - spin_lock(&vq->call_ctx.ctx_lock); > > + mutex_lock(&vq->call_ctx.ctx_lock); > > irq_bypass_unregister_producer(&vq->call_ctx.producer); > > - spin_unlock(&vq->call_ctx.ctx_lock); > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > } > > static void vhost_vdpa_reset(struct vhost_vdpa *v) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > index 9ad45e1d27f0..938239e11455 100644 > > --- a/drivers/vhost/vhost.c > > +++ b/drivers/vhost/vhost.c > > @@ -302,7 +302,7 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) > > { > > call_ctx->ctx = NULL; > > memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); > > - spin_lock_init(&call_ctx->ctx_lock); > > + mutex_init(&call_ctx->ctx_lock); > > } > > static void vhost_vq_reset(struct vhost_dev *dev, > > @@ -1650,9 +1650,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > > break; > > } > > - spin_lock(&vq->call_ctx.ctx_lock); > > + mutex_lock(&vq->call_ctx.ctx_lock); > > swap(ctx, vq->call_ctx.ctx); > > - spin_unlock(&vq->call_ctx.ctx_lock); > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > break; > > case VHOST_SET_VRING_ERR: > > if (copy_from_user(&f, argp, sizeof f)) { > > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > > index 9032d3c2a9f4..e8855ea04205 100644 > > --- a/drivers/vhost/vhost.h > > +++ b/drivers/vhost/vhost.h > > @@ -64,7 +64,8 @@ enum vhost_uaddr_type { > > struct vhost_vring_call { > > struct eventfd_ctx *ctx; > > struct irq_bypass_producer producer; > > - spinlock_t ctx_lock; > > + /* protect vq irq setup */ > > + struct mutex ctx_lock; > > }; > > /* The virtqueue structure describes a queue attached to a device. */ >
On 2020/10/29 下午3:37, Eli Cohen wrote: > On Thu, Oct 29, 2020 at 03:03:24PM +0800, Jason Wang wrote: >> On 2020/10/28 下午10:20, Eli Cohen wrote: >>> Both irq_bypass_register_producer() and irq_bypass_unregister_producer() >>> require process context to run. Change the call context lock from >>> spinlock to mutex to protect the setup process to avoid deadlocks. >>> >>> Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") >>> Signed-off-by: Eli Cohen <elic@nvidia.com> >> >> Hi Eli: >> >> During review we spot that the spinlock is not necessary. And it was already >> protected by vq mutex. So it was removed in this commit: >> >> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=86e182fe12ee5869022614457037097c70fe2ed1 >> >> Thanks >> > I see, thanks. > > BTW, while testing irq bypassing, I noticed that qemu started crashing > and I fail to boot the VM? Is that a known issue. I checked using > updated master branch of qemu updated yesterday. Not known yet. > > Any ideas how to check this further? I would be helpful if you can paste the calltrace here. > Did anyone actually check that irq bypassing works? Yes, Ling Shan tested it via IFCVF driver. Thanks > >>> --- >>> drivers/vhost/vdpa.c | 10 +++++----- >>> drivers/vhost/vhost.c | 6 +++--- >>> drivers/vhost/vhost.h | 3 ++- >>> 3 files changed, 10 insertions(+), 9 deletions(-) >>> >>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c >>> index be783592fe58..0a744f2b6e76 100644 >>> --- a/drivers/vhost/vdpa.c >>> +++ b/drivers/vhost/vdpa.c >>> @@ -98,26 +98,26 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) >>> return; >>> irq = ops->get_vq_irq(vdpa, qid); >>> - spin_lock(&vq->call_ctx.ctx_lock); >>> + mutex_lock(&vq->call_ctx.ctx_lock); >>> irq_bypass_unregister_producer(&vq->call_ctx.producer); >>> if (!vq->call_ctx.ctx || irq < 0) { >>> - spin_unlock(&vq->call_ctx.ctx_lock); >>> + mutex_unlock(&vq->call_ctx.ctx_lock); >>> return; >>> } >>> vq->call_ctx.producer.token = vq->call_ctx.ctx; >>> vq->call_ctx.producer.irq = irq; >>> ret = irq_bypass_register_producer(&vq->call_ctx.producer); >>> - spin_unlock(&vq->call_ctx.ctx_lock); >>> + mutex_unlock(&vq->call_ctx.ctx_lock); >>> } >>> static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) >>> { >>> struct vhost_virtqueue *vq = &v->vqs[qid]; >>> - spin_lock(&vq->call_ctx.ctx_lock); >>> + mutex_lock(&vq->call_ctx.ctx_lock); >>> irq_bypass_unregister_producer(&vq->call_ctx.producer); >>> - spin_unlock(&vq->call_ctx.ctx_lock); >>> + mutex_unlock(&vq->call_ctx.ctx_lock); >>> } >>> static void vhost_vdpa_reset(struct vhost_vdpa *v) >>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c >>> index 9ad45e1d27f0..938239e11455 100644 >>> --- a/drivers/vhost/vhost.c >>> +++ b/drivers/vhost/vhost.c >>> @@ -302,7 +302,7 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) >>> { >>> call_ctx->ctx = NULL; >>> memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); >>> - spin_lock_init(&call_ctx->ctx_lock); >>> + mutex_init(&call_ctx->ctx_lock); >>> } >>> static void vhost_vq_reset(struct vhost_dev *dev, >>> @@ -1650,9 +1650,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg >>> break; >>> } >>> - spin_lock(&vq->call_ctx.ctx_lock); >>> + mutex_lock(&vq->call_ctx.ctx_lock); >>> swap(ctx, vq->call_ctx.ctx); >>> - spin_unlock(&vq->call_ctx.ctx_lock); >>> + mutex_unlock(&vq->call_ctx.ctx_lock); >>> break; >>> case VHOST_SET_VRING_ERR: >>> if (copy_from_user(&f, argp, sizeof f)) { >>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h >>> index 9032d3c2a9f4..e8855ea04205 100644 >>> --- a/drivers/vhost/vhost.h >>> +++ b/drivers/vhost/vhost.h >>> @@ -64,7 +64,8 @@ enum vhost_uaddr_type { >>> struct vhost_vring_call { >>> struct eventfd_ctx *ctx; >>> struct irq_bypass_producer producer; >>> - spinlock_t ctx_lock; >>> + /* protect vq irq setup */ >>> + struct mutex ctx_lock; >>> }; >>> /* The virtqueue structure describes a queue attached to a device. */
On Thu, Oct 29, 2020 at 03:39:24PM +0800, Jason Wang wrote: > > On 2020/10/29 下午3:37, Eli Cohen wrote: > > On Thu, Oct 29, 2020 at 03:03:24PM +0800, Jason Wang wrote: > > > On 2020/10/28 下午10:20, Eli Cohen wrote: > > > > Both irq_bypass_register_producer() and irq_bypass_unregister_producer() > > > > require process context to run. Change the call context lock from > > > > spinlock to mutex to protect the setup process to avoid deadlocks. > > > > > > > > Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") > > > > Signed-off-by: Eli Cohen <elic@nvidia.com> > > > > > > Hi Eli: > > > > > > During review we spot that the spinlock is not necessary. And it was already > > > protected by vq mutex. So it was removed in this commit: > > > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=86e182fe12ee5869022614457037097c70fe2ed1 > > > > > > Thanks > > > > > I see, thanks. > > > > BTW, while testing irq bypassing, I noticed that qemu started crashing > > and I fail to boot the VM? Is that a known issue. I checked using > > updated master branch of qemu updated yesterday. > > > Not known yet. > > > > > > Any ideas how to check this further? > > > I would be helpful if you can paste the calltrace here. > I am not too familiar with qemu. Assuming I am using virsh start to boot the VM, how can I get the call trace? > > > Did anyone actually check that irq bypassing works? > > > Yes, Ling Shan tested it via IFCVF driver. > > Thanks > > > > > > > > --- > > > > drivers/vhost/vdpa.c | 10 +++++----- > > > > drivers/vhost/vhost.c | 6 +++--- > > > > drivers/vhost/vhost.h | 3 ++- > > > > 3 files changed, 10 insertions(+), 9 deletions(-) > > > > > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > > > > index be783592fe58..0a744f2b6e76 100644 > > > > --- a/drivers/vhost/vdpa.c > > > > +++ b/drivers/vhost/vdpa.c > > > > @@ -98,26 +98,26 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) > > > > return; > > > > irq = ops->get_vq_irq(vdpa, qid); > > > > - spin_lock(&vq->call_ctx.ctx_lock); > > > > + mutex_lock(&vq->call_ctx.ctx_lock); > > > > irq_bypass_unregister_producer(&vq->call_ctx.producer); > > > > if (!vq->call_ctx.ctx || irq < 0) { > > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > > return; > > > > } > > > > vq->call_ctx.producer.token = vq->call_ctx.ctx; > > > > vq->call_ctx.producer.irq = irq; > > > > ret = irq_bypass_register_producer(&vq->call_ctx.producer); > > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > > } > > > > static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) > > > > { > > > > struct vhost_virtqueue *vq = &v->vqs[qid]; > > > > - spin_lock(&vq->call_ctx.ctx_lock); > > > > + mutex_lock(&vq->call_ctx.ctx_lock); > > > > irq_bypass_unregister_producer(&vq->call_ctx.producer); > > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > > } > > > > static void vhost_vdpa_reset(struct vhost_vdpa *v) > > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > > index 9ad45e1d27f0..938239e11455 100644 > > > > --- a/drivers/vhost/vhost.c > > > > +++ b/drivers/vhost/vhost.c > > > > @@ -302,7 +302,7 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) > > > > { > > > > call_ctx->ctx = NULL; > > > > memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); > > > > - spin_lock_init(&call_ctx->ctx_lock); > > > > + mutex_init(&call_ctx->ctx_lock); > > > > } > > > > static void vhost_vq_reset(struct vhost_dev *dev, > > > > @@ -1650,9 +1650,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > > > > break; > > > > } > > > > - spin_lock(&vq->call_ctx.ctx_lock); > > > > + mutex_lock(&vq->call_ctx.ctx_lock); > > > > swap(ctx, vq->call_ctx.ctx); > > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > > break; > > > > case VHOST_SET_VRING_ERR: > > > > if (copy_from_user(&f, argp, sizeof f)) { > > > > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > > > > index 9032d3c2a9f4..e8855ea04205 100644 > > > > --- a/drivers/vhost/vhost.h > > > > +++ b/drivers/vhost/vhost.h > > > > @@ -64,7 +64,8 @@ enum vhost_uaddr_type { > > > > struct vhost_vring_call { > > > > struct eventfd_ctx *ctx; > > > > struct irq_bypass_producer producer; > > > > - spinlock_t ctx_lock; > > > > + /* protect vq irq setup */ > > > > + struct mutex ctx_lock; > > > > }; > > > > /* The virtqueue structure describes a queue attached to a device. */ >
On 2020/10/29 下午3:50, Eli Cohen wrote: > On Thu, Oct 29, 2020 at 03:39:24PM +0800, Jason Wang wrote: >> On 2020/10/29 下午3:37, Eli Cohen wrote: >>> On Thu, Oct 29, 2020 at 03:03:24PM +0800, Jason Wang wrote: >>>> On 2020/10/28 下午10:20, Eli Cohen wrote: >>>>> Both irq_bypass_register_producer() and irq_bypass_unregister_producer() >>>>> require process context to run. Change the call context lock from >>>>> spinlock to mutex to protect the setup process to avoid deadlocks. >>>>> >>>>> Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") >>>>> Signed-off-by: Eli Cohen<elic@nvidia.com> >>>> Hi Eli: >>>> >>>> During review we spot that the spinlock is not necessary. And it was already >>>> protected by vq mutex. So it was removed in this commit: >>>> >>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=86e182fe12ee5869022614457037097c70fe2ed1 >>>> >>>> Thanks >>>> >>> I see, thanks. >>> >>> BTW, while testing irq bypassing, I noticed that qemu started crashing >>> and I fail to boot the VM? Is that a known issue. I checked using >>> updated master branch of qemu updated yesterday. >> Not known yet. >> >> >>> Any ideas how to check this further? >> I would be helpful if you can paste the calltrace here. >> > I am not too familiar with qemu. Assuming I am using virsh start to boot > the VM, how can I get the call trace? You probably need to configure qemu with --enable-debug. Then after VM is launching, you can use gdb to attach to the qemu process, then gdb may report a calltrace if qemu crashes. Thanks
On Thu, Oct 29, 2020 at 04:08:24PM +0800, Jason Wang wrote: > > On 2020/10/29 下午3:50, Eli Cohen wrote: > > On Thu, Oct 29, 2020 at 03:39:24PM +0800, Jason Wang wrote: > > > On 2020/10/29 下午3:37, Eli Cohen wrote: > > > > On Thu, Oct 29, 2020 at 03:03:24PM +0800, Jason Wang wrote: > > > > > On 2020/10/28 下午10:20, Eli Cohen wrote: > > > > > > Both irq_bypass_register_producer() and irq_bypass_unregister_producer() > > > > > > require process context to run. Change the call context lock from > > > > > > spinlock to mutex to protect the setup process to avoid deadlocks. > > > > > > > > > > > > Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") > > > > > > Signed-off-by: Eli Cohen<elic@nvidia.com> > > > > > Hi Eli: > > > > > > > > > > During review we spot that the spinlock is not necessary. And it was already > > > > > protected by vq mutex. So it was removed in this commit: > > > > > > > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=86e182fe12ee5869022614457037097c70fe2ed1 > > > > > > > > > > Thanks > > > > > > > > > I see, thanks. > > > > > > > > BTW, while testing irq bypassing, I noticed that qemu started crashing > > > > and I fail to boot the VM? Is that a known issue. I checked using > > > > updated master branch of qemu updated yesterday. > > > Not known yet. > > > > > > > > > > Any ideas how to check this further? > > > I would be helpful if you can paste the calltrace here. > > > > > I am not too familiar with qemu. Assuming I am using virsh start to boot > > the VM, how can I get the call trace? > > > You probably need to configure qemu with --enable-debug. Then after VM is > launching, you can use gdb to attach to the qemu process, then gdb may > report a calltrace if qemu crashes. > I run qemu from the console (no virsh) and I get this message: *** stack smashing detected ***: terminated Aborted (core dumped) When I run coredumpctl debug on the core file I see this backtrace: #0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50 #1 0x00007f0ca5b95895 in __GI_abort () at abort.c:79 #2 0x00007f0ca5bf0857 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7f0ca5d01c14 "*** %s ***: terminated\n") at ../sysdeps/posix/libc_fatal.c:155 #3 0x00007f0ca5c8177a in __GI___fortify_fail (msg=msg@entry=0x7f0ca5d01bfc "stack smashing detected") at fortify_fail.c:26 #4 0x00007f0ca5c81746 in __stack_chk_fail () at stack_chk_fail.c:24 #5 0x000055ce01cd4d4e in vhost_vdpa_set_backend_cap (dev=0x55ce03800370) at ../hw/virtio/vhost-vdpa.c:256 #6 0x000055ce01cbc42c in vhost_dev_set_features (dev=dev@entry=0x55ce03800370, enable_log=<optimized out>) at ../hw/virtio/vhost.c:820 #7 0x000055ce01cbf5b8 in vhost_dev_start (hdev=hdev@entry=0x55ce03800370, vdev=vdev@entry=0x55ce045edc70) at ../hw/virtio/vhost.c:1701 #8 0x000055ce01a57eab in vhost_net_start_one (dev=0x55ce045edc70, net=0x55ce03800370) at ../hw/net/vhost_net.c:246 #9 vhost_net_start (dev=dev@entry=0x55ce045edc70, ncs=0x55ce04601510, total_queues=total_queues@entry=1) at ../hw/net/vhost_net.c:351 #10 0x000055ce01cdafbc in virtio_net_vhost_status (status=<optimized out>, n=0x55ce045edc70) at ../hw/net/virtio-net.c:281 #11 virtio_net_set_status (vdev=0x55ce045edc70, status=<optimized out>) at ../hw/net/virtio-net.c:362 #12 0x000055ce01c7015b in virtio_set_status (vdev=vdev@entry=0x55ce045edc70, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1957 #13 0x000055ce01bdf4e8 in virtio_pci_common_write (opaque=0x55ce045e5ae0, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1258 #14 0x000055ce01ce05fc in memory_region_write_accessor (mr=mr@entry=0x55ce045e64c0, addr=20, value=value@entry=0x7f0c9ec6f7b8, size=size@entry=1, shift=<optimized out>, mask=mask@entry=255, attrs=...) at ../softmmu/memory.c:484 #15 0x000055ce01cdf11e in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f0c9ec6f7b8, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn= 0x55ce01ce0570 <memory_region_write_accessor>, mr=0x55ce045e64c0, attrs=...) at ../softmmu/memory.c:545 #16 0x000055ce01ce2933 in memory_region_dispatch_write (mr=mr@entry=0x55ce045e64c0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...) at ../softmmu/memory.c:1494 #17 0x000055ce01c81380 in flatview_write_continue (fv=fv@entry=0x7f0980000b90, addr=addr@entry=4261412884, attrs=attrs@entry=..., ptr=ptr@entry=0x7f0ca674f028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x55ce045e64c0) at /images/eli/src/newgits/qemu/include/qemu/host-utils.h:164 #18 0x000055ce01c842c5 in flatview_write (len=1, buf=0x7f0ca674f028, attrs=..., addr=4261412884, fv=0x7f0980000b90) at ../softmmu/physmem.c:2807 #19 address_space_write (as=0x55ce02740800 <address_space_memory>, addr=4261412884, attrs=..., buf=buf@entry=0x7f0ca674f028, len=1) at ../softmmu/physmem.c:2899 #20 0x000055ce01c8435a in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=..., attrs@entry=..., buf=buf@entry=0x7f0ca674f028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2909 #21 0x000055ce01cb0d76 in kvm_cpu_exec (cpu=cpu@entry=0x55ce03827620) at ../accel/kvm/kvm-all.c:2539 #22 0x000055ce01d2ea75 in kvm_vcpu_thread_fn (arg=arg@entry=0x55ce03827620) at ../accel/kvm/kvm-cpus.c:49 #23 0x000055ce01f05559 in qemu_thread_start (args=0x7f0c9ec6f9b0) at ../util/qemu-thread-posix.c:521 #24 0x00007f0ca5d43432 in start_thread (arg=<optimized out>) at pthread_create.c:477 #25 0x00007f0ca5c71913 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 The assert at frame 5 looks to me false. > Thanks > >
On Thu, Oct 29, 2020 at 09:37:17AM +0200, Eli Cohen wrote: > On Thu, Oct 29, 2020 at 03:03:24PM +0800, Jason Wang wrote: > > > > On 2020/10/28 下午10:20, Eli Cohen wrote: > > > Both irq_bypass_register_producer() and irq_bypass_unregister_producer() > > > require process context to run. Change the call context lock from > > > spinlock to mutex to protect the setup process to avoid deadlocks. > > > > > > Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") > > > Signed-off-by: Eli Cohen <elic@nvidia.com> > > > > > > Hi Eli: > > > > During review we spot that the spinlock is not necessary. And it was already > > protected by vq mutex. So it was removed in this commit: > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=86e182fe12ee5869022614457037097c70fe2ed1 > > > > Thanks > > > > I see, thanks. > > BTW, while testing irq bypassing, I noticed that qemu started crashing > and I fail to boot the VM? Is that a known issue. I checked using > updated master branch of qemu updated yesterday. > > Any ideas how to check this further? > Did anyone actually check that irq bypassing works? Confused. Is the crash related to this patch somehow? > > > > > --- > > > drivers/vhost/vdpa.c | 10 +++++----- > > > drivers/vhost/vhost.c | 6 +++--- > > > drivers/vhost/vhost.h | 3 ++- > > > 3 files changed, 10 insertions(+), 9 deletions(-) > > > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c > > > index be783592fe58..0a744f2b6e76 100644 > > > --- a/drivers/vhost/vdpa.c > > > +++ b/drivers/vhost/vdpa.c > > > @@ -98,26 +98,26 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) > > > return; > > > irq = ops->get_vq_irq(vdpa, qid); > > > - spin_lock(&vq->call_ctx.ctx_lock); > > > + mutex_lock(&vq->call_ctx.ctx_lock); > > > irq_bypass_unregister_producer(&vq->call_ctx.producer); > > > if (!vq->call_ctx.ctx || irq < 0) { > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > return; > > > } > > > vq->call_ctx.producer.token = vq->call_ctx.ctx; > > > vq->call_ctx.producer.irq = irq; > > > ret = irq_bypass_register_producer(&vq->call_ctx.producer); > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > } > > > static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) > > > { > > > struct vhost_virtqueue *vq = &v->vqs[qid]; > > > - spin_lock(&vq->call_ctx.ctx_lock); > > > + mutex_lock(&vq->call_ctx.ctx_lock); > > > irq_bypass_unregister_producer(&vq->call_ctx.producer); > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > } > > > static void vhost_vdpa_reset(struct vhost_vdpa *v) > > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > > > index 9ad45e1d27f0..938239e11455 100644 > > > --- a/drivers/vhost/vhost.c > > > +++ b/drivers/vhost/vhost.c > > > @@ -302,7 +302,7 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) > > > { > > > call_ctx->ctx = NULL; > > > memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); > > > - spin_lock_init(&call_ctx->ctx_lock); > > > + mutex_init(&call_ctx->ctx_lock); > > > } > > > static void vhost_vq_reset(struct vhost_dev *dev, > > > @@ -1650,9 +1650,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg > > > break; > > > } > > > - spin_lock(&vq->call_ctx.ctx_lock); > > > + mutex_lock(&vq->call_ctx.ctx_lock); > > > swap(ctx, vq->call_ctx.ctx); > > > - spin_unlock(&vq->call_ctx.ctx_lock); > > > + mutex_unlock(&vq->call_ctx.ctx_lock); > > > break; > > > case VHOST_SET_VRING_ERR: > > > if (copy_from_user(&f, argp, sizeof f)) { > > > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > > > index 9032d3c2a9f4..e8855ea04205 100644 > > > --- a/drivers/vhost/vhost.h > > > +++ b/drivers/vhost/vhost.h > > > @@ -64,7 +64,8 @@ enum vhost_uaddr_type { > > > struct vhost_vring_call { > > > struct eventfd_ctx *ctx; > > > struct irq_bypass_producer producer; > > > - spinlock_t ctx_lock; > > > + /* protect vq irq setup */ > > > + struct mutex ctx_lock; > > > }; > > > /* The virtqueue structure describes a queue attached to a device. */ > >
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index be783592fe58..0a744f2b6e76 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -98,26 +98,26 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) return; irq = ops->get_vq_irq(vdpa, qid); - spin_lock(&vq->call_ctx.ctx_lock); + mutex_lock(&vq->call_ctx.ctx_lock); irq_bypass_unregister_producer(&vq->call_ctx.producer); if (!vq->call_ctx.ctx || irq < 0) { - spin_unlock(&vq->call_ctx.ctx_lock); + mutex_unlock(&vq->call_ctx.ctx_lock); return; } vq->call_ctx.producer.token = vq->call_ctx.ctx; vq->call_ctx.producer.irq = irq; ret = irq_bypass_register_producer(&vq->call_ctx.producer); - spin_unlock(&vq->call_ctx.ctx_lock); + mutex_unlock(&vq->call_ctx.ctx_lock); } static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) { struct vhost_virtqueue *vq = &v->vqs[qid]; - spin_lock(&vq->call_ctx.ctx_lock); + mutex_lock(&vq->call_ctx.ctx_lock); irq_bypass_unregister_producer(&vq->call_ctx.producer); - spin_unlock(&vq->call_ctx.ctx_lock); + mutex_unlock(&vq->call_ctx.ctx_lock); } static void vhost_vdpa_reset(struct vhost_vdpa *v) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9ad45e1d27f0..938239e11455 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -302,7 +302,7 @@ static void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) { call_ctx->ctx = NULL; memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); - spin_lock_init(&call_ctx->ctx_lock); + mutex_init(&call_ctx->ctx_lock); } static void vhost_vq_reset(struct vhost_dev *dev, @@ -1650,9 +1650,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg break; } - spin_lock(&vq->call_ctx.ctx_lock); + mutex_lock(&vq->call_ctx.ctx_lock); swap(ctx, vq->call_ctx.ctx); - spin_unlock(&vq->call_ctx.ctx_lock); + mutex_unlock(&vq->call_ctx.ctx_lock); break; case VHOST_SET_VRING_ERR: if (copy_from_user(&f, argp, sizeof f)) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 9032d3c2a9f4..e8855ea04205 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -64,7 +64,8 @@ enum vhost_uaddr_type { struct vhost_vring_call { struct eventfd_ctx *ctx; struct irq_bypass_producer producer; - spinlock_t ctx_lock; + /* protect vq irq setup */ + struct mutex ctx_lock; }; /* The virtqueue structure describes a queue attached to a device. */
Both irq_bypass_register_producer() and irq_bypass_unregister_producer() require process context to run. Change the call context lock from spinlock to mutex to protect the setup process to avoid deadlocks. Fixes: 265a0ad8731d ("vhost: introduce vhost_vring_call") Signed-off-by: Eli Cohen <elic@nvidia.com> --- drivers/vhost/vdpa.c | 10 +++++----- drivers/vhost/vhost.c | 6 +++--- drivers/vhost/vhost.h | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-)