Message ID | 1516626103-1969-3-git-send-email-rogerq@ti.com |
---|---|
State | New |
Headers | show |
Series | usb: dwc3: Fix dual role during system suspend/resume | expand |
Hi, On 1/22/2018 6:31 PM, Roger Quadros wrote: > Adding/removing host/gadget controller before .pm_complete() > causes a lock-up. Let's prevent any dual-role state change > between .pm_prepare() and .pm_complete() to fix this. What kind of lock-up are you seeing? Some hardware lockup or software deadlock? IMO using a freezable_wq for drd_work should address that? > > Signed-off-by: Roger Quadros <rogerq@ti.com> > --- > drivers/usb/dwc3/core.c | 31 +++++++++++++++++++++++++++++++ > drivers/usb/dwc3/core.h | 5 +++++ > drivers/usb/dwc3/drd.c | 10 ++++++---- > 3 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c > index 42379cc..85388dd 100644 > --- a/drivers/usb/dwc3/core.c > +++ b/drivers/usb/dwc3/core.c > @@ -1414,6 +1414,33 @@ static int dwc3_runtime_idle(struct device *dev) > #endif /* CONFIG_PM */ > > #ifdef CONFIG_PM_SLEEP > +static int dwc3_prepare(struct device *dev) > +{ > + struct dwc3 *dwc = dev_get_drvdata(dev); > + unsigned long flags; > + > + if (dwc->dr_mode == USB_DR_MODE_OTG) { > + spin_lock_irqsave(&dwc->lock, flags); > + dwc->dr_keep_role = true; > + spin_unlock_irqrestore(&dwc->lock, flags); > + } > + > + return 0; > +} > + > +static void dwc3_complete(struct device *dev) > +{ > + struct dwc3 *dwc = dev_get_drvdata(dev); > + unsigned long flags; > + > + if (dwc->dr_mode == USB_DR_MODE_OTG) { > + spin_lock_irqsave(&dwc->lock, flags); > + dwc->dr_keep_role = false; > + spin_unlock_irqrestore(&dwc->lock, flags); > + dwc3_drd_update(dwc); > + } > +} > + > static int dwc3_suspend(struct device *dev) > { > struct dwc3 *dwc = dev_get_drvdata(dev); > @@ -1451,6 +1478,10 @@ static const struct dev_pm_ops dwc3_dev_pm_ops = { > SET_SYSTEM_SLEEP_PM_OPS(dwc3_suspend, dwc3_resume) > SET_RUNTIME_PM_OPS(dwc3_runtime_suspend, dwc3_runtime_resume, > dwc3_runtime_idle) > +#ifdef CONFIG_PM_SLEEP > + .prepare = dwc3_prepare, > + .complete = dwc3_complete, > +#endif > }; > > #ifdef CONFIG_OF > diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h > index 4a4a4c9..f5eb474 100644 > --- a/drivers/usb/dwc3/core.h > +++ b/drivers/usb/dwc3/core.h > @@ -786,6 +786,7 @@ struct dwc3_scratchpad_array { > * @dr_mode: requested mode of operation > * @current_dr_role: current role of operation when in dual-role mode > * @desired_dr_role: desired role of operation when in dual-role mode > + * @dr_keep_role: keep the current dual-role irrespective of ID changes > * @edev: extcon handle > * @edev_nb: extcon notifier > * @hsphy_mode: UTMI phy mode, one of following: > @@ -901,6 +902,7 @@ struct dwc3 { > enum usb_dr_mode dr_mode; > u32 current_dr_role; > u32 desired_dr_role; > + bool dr_keep_role; > struct extcon_dev *edev; > struct notifier_block edev_nb; > enum usb_phy_interface hsphy_mode; > @@ -1227,11 +1229,14 @@ static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc, > #if IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) > int dwc3_drd_init(struct dwc3 *dwc); > void dwc3_drd_exit(struct dwc3 *dwc); > +void dwc3_drd_update(struct dwc3 *dwc); > #else > static inline int dwc3_drd_init(struct dwc3 *dwc) > { return 0; } > static inline void dwc3_drd_exit(struct dwc3 *dwc) > { } > +static inline void dwc3_drd_update(struct dwc3 *dwc); > +{ } > #endif > > /* power management interface */ > diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c > index cc8ab9a..177a8be 100644 > --- a/drivers/usb/dwc3/drd.c > +++ b/drivers/usb/dwc3/drd.c > @@ -13,7 +13,7 @@ > #include "core.h" > #include "gadget.h" > > -static void dwc3_drd_update(struct dwc3 *dwc) > +void dwc3_drd_update(struct dwc3 *dwc) > { > int id; > > @@ -31,9 +31,11 @@ static int dwc3_drd_notifier(struct notifier_block *nb, > { > struct dwc3 *dwc = container_of(nb, struct dwc3, edev_nb); > > - dwc3_set_mode(dwc, event ? > - DWC3_GCTL_PRTCAP_HOST : > - DWC3_GCTL_PRTCAP_DEVICE); > + if (!dwc->dr_keep_role) { > + dwc3_set_mode(dwc, event ? > + DWC3_GCTL_PRTCAP_HOST : > + DWC3_GCTL_PRTCAP_DEVICE); > + } > > return NOTIFY_DONE; > } -- The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project
Hi Manu, On 23/01/18 05:45, Manu Gautam wrote: > Hi, > > > On 1/22/2018 6:31 PM, Roger Quadros wrote: >> Adding/removing host/gadget controller before .pm_complete() >> causes a lock-up. Let's prevent any dual-role state change >> between .pm_prepare() and .pm_complete() to fix this. > > What kind of lock-up are you seeing? Some hardware lockup or software deadlock? > IMO using a freezable_wq for drd_work should address that? > I was seeing a software deadlock. freezable_wq is a good idea. I'll try it out. > >> <snip> -- cheers, -roger Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
On 23/01/18 14:41, Roger Quadros wrote: > Hi Manu, > > On 23/01/18 05:45, Manu Gautam wrote: >> Hi, >> >> >> On 1/22/2018 6:31 PM, Roger Quadros wrote: >>> Adding/removing host/gadget controller before .pm_complete() >>> causes a lock-up. Let's prevent any dual-role state change >>> between .pm_prepare() and .pm_complete() to fix this. >> >> What kind of lock-up are you seeing? Some hardware lockup or software deadlock? >> IMO using a freezable_wq for drd_work should address that? >> > > I was seeing a software deadlock. freezable_wq is a good idea. I'll try it out. using freezable_wq doesn't get rid of the deadlock. If I use freezable_wq plus add some delay before I do a dwc3_host_init() in the work function then it starts to work. As dependence on delay looks fragile so I'll stick to the current implementation based on .pm_prepare/complete(). -- cheers, -roger Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Hi, On 24/01/18 14:19, Roger Quadros wrote: > On 23/01/18 14:41, Roger Quadros wrote: >> Hi Manu, >> >> On 23/01/18 05:45, Manu Gautam wrote: >>> Hi, >>> >>> >>> On 1/22/2018 6:31 PM, Roger Quadros wrote: >>>> Adding/removing host/gadget controller before .pm_complete() >>>> causes a lock-up. Let's prevent any dual-role state change >>>> between .pm_prepare() and .pm_complete() to fix this. >>> >>> What kind of lock-up are you seeing? Some hardware lockup or software deadlock? >>> IMO using a freezable_wq for drd_work should address that? >>> >> >> I was seeing a software deadlock. freezable_wq is a good idea. I'll try it out. > > using freezable_wq doesn't get rid of the deadlock. > If I use freezable_wq plus add some delay before I do a dwc3_host_init() > in the work function then it starts to work. > > As dependence on delay looks fragile so I'll stick to the current implementation > based on .pm_prepare/complete(). > So I was able to reproduce the lock up with my series as well. On further investigation this is what I see. There are 2 different scenarios. 1) controller in host mode prior to system suspend and switches to device mode during resume. In this case when we call dwc3_host_exit() before tasks are thawed xhci_plat_remove() seems to lock up at the second usb_remove_hcd() call. This issue is resolved by using system_freezable_wq for the _dwc3_set_mode() function. 2) controller in device mode prior to system suspend and switches to host mode during resume. In this case we sleep indefinitely in _dwc3_set_mode due to dwc3_set_mode()->dwc3_gadget_exit()->usb_del_gadget_udc()->udc_stop()->dwc3_gadget_stop()->wait_event_lock_irq() This is not resolved by moving the dwc3_set_mode() call to .pm_complete() nor via the system_freezable_wq. One way I could fix this is like so. Felipe, could you please suggest a better way? Maybe we need to do this in dwc3_gadget_exit() before calling usb_del_gadget_udc() ? diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index b417d9a..0c903c1 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -109,6 +109,7 @@ static void __dwc3_set_mode(struct work_struct *work) struct dwc3 *dwc = work_to_dwc(work); unsigned long flags; int ret; + int epnum; if (!dwc->desired_dr_role) return; @@ -124,6 +125,17 @@ static void __dwc3_set_mode(struct work_struct *work) dwc3_host_exit(dwc); break; case DWC3_GCTL_PRTCAP_DEVICE: + spin_lock_irqsave(&dwc->lock, flags); + for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) { + struct dwc3_ep *dep = dwc->eps[epnum]; + + if (!dep) + continue; + + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; + } + spin_unlock_irqrestore(&dwc->lock, flags); + dwc3_gadget_exit(dwc); dwc3_event_buffers_cleanup(dwc); break; -- -- cheers, -roger Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Hi, Roger Quadros <rogerq@ti.com> writes: > Adding/removing host/gadget controller before .pm_complete() > causes a lock-up. Let's prevent any dual-role state change > between .pm_prepare() and .pm_complete() to fix this. > > Signed-off-by: Roger Quadros <rogerq@ti.com> > --- > drivers/usb/dwc3/core.c | 31 +++++++++++++++++++++++++++++++ > drivers/usb/dwc3/core.h | 5 +++++ > drivers/usb/dwc3/drd.c | 10 ++++++---- > 3 files changed, 42 insertions(+), 4 deletions(-) > > diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c > index 42379cc..85388dd 100644 > --- a/drivers/usb/dwc3/core.c > +++ b/drivers/usb/dwc3/core.c > @@ -1414,6 +1414,33 @@ static int dwc3_runtime_idle(struct device *dev) > #endif /* CONFIG_PM */ > > #ifdef CONFIG_PM_SLEEP > +static int dwc3_prepare(struct device *dev) > +{ > + struct dwc3 *dwc = dev_get_drvdata(dev); > + unsigned long flags; > + > + if (dwc->dr_mode == USB_DR_MODE_OTG) { > + spin_lock_irqsave(&dwc->lock, flags); > + dwc->dr_keep_role = true; > + spin_unlock_irqrestore(&dwc->lock, flags); > + } > + > + return 0; > +} > + > +static void dwc3_complete(struct device *dev) > +{ > + struct dwc3 *dwc = dev_get_drvdata(dev); > + unsigned long flags; > + > + if (dwc->dr_mode == USB_DR_MODE_OTG) { > + spin_lock_irqsave(&dwc->lock, flags); > + dwc->dr_keep_role = false; > + spin_unlock_irqrestore(&dwc->lock, flags); > + dwc3_drd_update(dwc); > + } > +} wouldn't it be far easier to just disable OTG IRQ in prepare? and, perhaps, also flush_work_sync() ? -- balbi
Felipe, On 12/02/18 10:54, Felipe Balbi wrote: > > Hi, > > Roger Quadros <rogerq@ti.com> writes: >> Adding/removing host/gadget controller before .pm_complete() >> causes a lock-up. Let's prevent any dual-role state change >> between .pm_prepare() and .pm_complete() to fix this. >> >> Signed-off-by: Roger Quadros <rogerq@ti.com> >> --- >> drivers/usb/dwc3/core.c | 31 +++++++++++++++++++++++++++++++ >> drivers/usb/dwc3/core.h | 5 +++++ >> drivers/usb/dwc3/drd.c | 10 ++++++---- >> 3 files changed, 42 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c >> index 42379cc..85388dd 100644 >> --- a/drivers/usb/dwc3/core.c >> +++ b/drivers/usb/dwc3/core.c >> @@ -1414,6 +1414,33 @@ static int dwc3_runtime_idle(struct device *dev) >> #endif /* CONFIG_PM */ >> >> #ifdef CONFIG_PM_SLEEP >> +static int dwc3_prepare(struct device *dev) >> +{ >> + struct dwc3 *dwc = dev_get_drvdata(dev); >> + unsigned long flags; >> + >> + if (dwc->dr_mode == USB_DR_MODE_OTG) { >> + spin_lock_irqsave(&dwc->lock, flags); >> + dwc->dr_keep_role = true; >> + spin_unlock_irqrestore(&dwc->lock, flags); >> + } >> + >> + return 0; >> +} >> + >> +static void dwc3_complete(struct device *dev) >> +{ >> + struct dwc3 *dwc = dev_get_drvdata(dev); >> + unsigned long flags; >> + >> + if (dwc->dr_mode == USB_DR_MODE_OTG) { >> + spin_lock_irqsave(&dwc->lock, flags); >> + dwc->dr_keep_role = false; >> + spin_unlock_irqrestore(&dwc->lock, flags); >> + dwc3_drd_update(dwc); >> + } >> +} > > wouldn't it be far easier to just disable OTG IRQ in prepare? and, > perhaps, also flush_work_sync() ? > There was some more discussion on this here [1]. Apparently using system_freezable_wq and a patch mentioned at end of [1] is sufficient as well [1] https://lkml.org/lkml/2018/1/25/384 Could you please share your view there? -- cheers, -roger Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
Felipe, On 25/01/18 18:11, Roger Quadros wrote: > Hi, > > On 24/01/18 14:19, Roger Quadros wrote: >> On 23/01/18 14:41, Roger Quadros wrote: >>> Hi Manu, >>> >>> On 23/01/18 05:45, Manu Gautam wrote: >>>> Hi, >>>> >>>> >>>> On 1/22/2018 6:31 PM, Roger Quadros wrote: >>>>> Adding/removing host/gadget controller before .pm_complete() >>>>> causes a lock-up. Let's prevent any dual-role state change >>>>> between .pm_prepare() and .pm_complete() to fix this. >>>> >>>> What kind of lock-up are you seeing? Some hardware lockup or software deadlock? >>>> IMO using a freezable_wq for drd_work should address that? >>>> >>> >>> I was seeing a software deadlock. freezable_wq is a good idea. I'll try it out. >> >> using freezable_wq doesn't get rid of the deadlock. >> If I use freezable_wq plus add some delay before I do a dwc3_host_init() >> in the work function then it starts to work. >> >> As dependence on delay looks fragile so I'll stick to the current implementation >> based on .pm_prepare/complete(). >> > > So I was able to reproduce the lock up with my series as well. On further investigation > this is what I see. > > There are 2 different scenarios. > > 1) controller in host mode prior to system suspend and switches to device mode during resume. > > In this case when we call dwc3_host_exit() before tasks are thawed > xhci_plat_remove() seems to lock up at the second usb_remove_hcd() call. > This issue is resolved by using system_freezable_wq for the _dwc3_set_mode() function. > > > 2) controller in device mode prior to system suspend and switches to host mode during resume. > > In this case we sleep indefinitely in _dwc3_set_mode due to > dwc3_set_mode()->dwc3_gadget_exit()->usb_del_gadget_udc()->udc_stop()->dwc3_gadget_stop()->wait_event_lock_irq() > > This is not resolved by moving the dwc3_set_mode() call to .pm_complete() nor via the system_freezable_wq. > > One way I could fix this is like so. > > Felipe, could you please suggest a better way? > Maybe we need to do this in dwc3_gadget_exit() before calling usb_del_gadget_udc() ? Once you let me know your opinion I can revise this series. Thanks. > > diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c > index b417d9a..0c903c1 100644 > --- a/drivers/usb/dwc3/core.c > +++ b/drivers/usb/dwc3/core.c > @@ -109,6 +109,7 @@ static void __dwc3_set_mode(struct work_struct *work) > struct dwc3 *dwc = work_to_dwc(work); > unsigned long flags; > int ret; > + int epnum; > > if (!dwc->desired_dr_role) > return; > @@ -124,6 +125,17 @@ static void __dwc3_set_mode(struct work_struct *work) > dwc3_host_exit(dwc); > break; > case DWC3_GCTL_PRTCAP_DEVICE: > + spin_lock_irqsave(&dwc->lock, flags); > + for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) { > + struct dwc3_ep *dep = dwc->eps[epnum]; > + > + if (!dep) > + continue; > + > + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; > + } > + spin_unlock_irqrestore(&dwc->lock, flags); > + > dwc3_gadget_exit(dwc); > dwc3_event_buffers_cleanup(dwc); > break; > -- cheers, -roger Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 42379cc..85388dd 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1414,6 +1414,33 @@ static int dwc3_runtime_idle(struct device *dev) #endif /* CONFIG_PM */ #ifdef CONFIG_PM_SLEEP +static int dwc3_prepare(struct device *dev) +{ + struct dwc3 *dwc = dev_get_drvdata(dev); + unsigned long flags; + + if (dwc->dr_mode == USB_DR_MODE_OTG) { + spin_lock_irqsave(&dwc->lock, flags); + dwc->dr_keep_role = true; + spin_unlock_irqrestore(&dwc->lock, flags); + } + + return 0; +} + +static void dwc3_complete(struct device *dev) +{ + struct dwc3 *dwc = dev_get_drvdata(dev); + unsigned long flags; + + if (dwc->dr_mode == USB_DR_MODE_OTG) { + spin_lock_irqsave(&dwc->lock, flags); + dwc->dr_keep_role = false; + spin_unlock_irqrestore(&dwc->lock, flags); + dwc3_drd_update(dwc); + } +} + static int dwc3_suspend(struct device *dev) { struct dwc3 *dwc = dev_get_drvdata(dev); @@ -1451,6 +1478,10 @@ static const struct dev_pm_ops dwc3_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dwc3_suspend, dwc3_resume) SET_RUNTIME_PM_OPS(dwc3_runtime_suspend, dwc3_runtime_resume, dwc3_runtime_idle) +#ifdef CONFIG_PM_SLEEP + .prepare = dwc3_prepare, + .complete = dwc3_complete, +#endif }; #ifdef CONFIG_OF diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 4a4a4c9..f5eb474 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -786,6 +786,7 @@ struct dwc3_scratchpad_array { * @dr_mode: requested mode of operation * @current_dr_role: current role of operation when in dual-role mode * @desired_dr_role: desired role of operation when in dual-role mode + * @dr_keep_role: keep the current dual-role irrespective of ID changes * @edev: extcon handle * @edev_nb: extcon notifier * @hsphy_mode: UTMI phy mode, one of following: @@ -901,6 +902,7 @@ struct dwc3 { enum usb_dr_mode dr_mode; u32 current_dr_role; u32 desired_dr_role; + bool dr_keep_role; struct extcon_dev *edev; struct notifier_block edev_nb; enum usb_phy_interface hsphy_mode; @@ -1227,11 +1229,14 @@ static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc, #if IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE) int dwc3_drd_init(struct dwc3 *dwc); void dwc3_drd_exit(struct dwc3 *dwc); +void dwc3_drd_update(struct dwc3 *dwc); #else static inline int dwc3_drd_init(struct dwc3 *dwc) { return 0; } static inline void dwc3_drd_exit(struct dwc3 *dwc) { } +static inline void dwc3_drd_update(struct dwc3 *dwc); +{ } #endif /* power management interface */ diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c index cc8ab9a..177a8be 100644 --- a/drivers/usb/dwc3/drd.c +++ b/drivers/usb/dwc3/drd.c @@ -13,7 +13,7 @@ #include "core.h" #include "gadget.h" -static void dwc3_drd_update(struct dwc3 *dwc) +void dwc3_drd_update(struct dwc3 *dwc) { int id; @@ -31,9 +31,11 @@ static int dwc3_drd_notifier(struct notifier_block *nb, { struct dwc3 *dwc = container_of(nb, struct dwc3, edev_nb); - dwc3_set_mode(dwc, event ? - DWC3_GCTL_PRTCAP_HOST : - DWC3_GCTL_PRTCAP_DEVICE); + if (!dwc->dr_keep_role) { + dwc3_set_mode(dwc, event ? + DWC3_GCTL_PRTCAP_HOST : + DWC3_GCTL_PRTCAP_DEVICE); + } return NOTIFY_DONE; }
Adding/removing host/gadget controller before .pm_complete() causes a lock-up. Let's prevent any dual-role state change between .pm_prepare() and .pm_complete() to fix this. Signed-off-by: Roger Quadros <rogerq@ti.com> --- drivers/usb/dwc3/core.c | 31 +++++++++++++++++++++++++++++++ drivers/usb/dwc3/core.h | 5 +++++ drivers/usb/dwc3/drd.c | 10 ++++++---- 3 files changed, 42 insertions(+), 4 deletions(-) -- cheers, -roger Texas Instruments Finland Oy, Porkkalankatu 22, 00180 Helsinki. Y-tunnus/Business ID: 0615521-4. Kotipaikka/Domicile: Helsinki