Message ID | 20220921094244.606948-11-daniel.lezcano@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | Rework the trip points creation | expand |
Hi Daniel, On 21.09.2022 11:42, Daniel Lezcano wrote: > The thermal_zone_get_trip() does the same check as > of_thermal_is_trip_valid(). Replace the call to > of_thermal_is_trip_valid() by thermal_zone_get_trip(). > > Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> > --- This patch landed in linux next-20220923 as commit 4a71bb8005ba ("thermal/drivers/exynos: Replace of_thermal_is_trip_valid() by thermal_zone_get_trip()"). Unfortunately it causes deadlock on all Exynos based board: ============================================ WARNING: possible recursive locking detected 6.0.0-rc1-00062-g4a71bb8005ba #12855 Not tainted -------------------------------------------- swapper/0/1 is trying to acquire lock: c263c394 (&tz->lock){+.+.}-{3:3}, at: thermal_zone_device_update.part.0+0x114/0x538 but task is already holding lock: c263c394 (&tz->lock){+.+.}-{3:3}, at: thermal_zone_device_update.part.0+0x3c/0x538 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&tz->lock); lock(&tz->lock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by swapper/0/1: #0: c1d5248c (&dev->mutex){....}-{3:3}, at: __driver_attach+0xe4/0x1f0 #1: c263c394 (&tz->lock){+.+.}-{3:3}, at: thermal_zone_device_update.part.0+0x3c/0x538 stack backtrace: CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc1-00062-g4a71bb8005ba #12855 Hardware name: Samsung Exynos (Flattened Device Tree) unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x58/0x70 dump_stack_lvl from __lock_acquire+0x146c/0x2a7c __lock_acquire from lock_acquire+0x124/0x3e4 lock_acquire from __mutex_lock+0x90/0x948 __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from thermal_zone_device_update.part.0+0x114/0x538 thermal_zone_device_update.part.0 from thermal_zone_device_set_mode+0x70/0x98 thermal_zone_device_set_mode from thermal_of_zone_register+0x424/0x69c thermal_of_zone_register from devm_thermal_of_zone_register+0x58/0x94 devm_thermal_of_zone_register from exynos_tmu_probe+0x29c/0x728 exynos_tmu_probe from platform_probe+0x5c/0xb8 platform_probe from really_probe+0xe0/0x414 really_probe from __driver_probe_device+0xa0/0x208 __driver_probe_device from driver_probe_device+0x30/0xc0 driver_probe_device from __driver_attach+0xf0/0x1f0 __driver_attach from bus_for_each_dev+0x70/0xb0 bus_for_each_dev from bus_add_driver+0x174/0x218 bus_add_driver from driver_register+0x88/0x11c driver_register from do_one_initcall+0x64/0x380 do_one_initcall from kernel_init_freeable+0x1c0/0x224 kernel_init_freeable from kernel_init+0x18/0x12c kernel_init from ret_from_fork+0x14/0x2c Exception stack(0xf082dfb0 to 0xf082dff8) [deadlock] Something is wrong in locking in the functions from the above stacktrace. > drivers/thermal/samsung/exynos_tmu.c | 9 ++++++--- > 1 file changed, 6 insertions(+), 3 deletions(-) > > diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c > index 91e6860b5ec4..34b460092308 100644 > --- a/drivers/thermal/samsung/exynos_tmu.c > +++ b/drivers/thermal/samsung/exynos_tmu.c > @@ -554,13 +554,14 @@ static void exynos4210_tmu_control(struct platform_device *pdev, bool on) > { > struct exynos_tmu_data *data = platform_get_drvdata(pdev); > struct thermal_zone_device *tz = data->tzd; > + struct thermal_trip trip; > unsigned int con, interrupt_en = 0, i; > > con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); > > if (on) { > for (i = 0; i < data->ntrip; i++) { > - if (!of_thermal_is_trip_valid(tz, i)) > + if (thermal_zone_get_trip(tz, i, &trip)) > continue; > > interrupt_en |= > @@ -584,13 +585,14 @@ static void exynos5433_tmu_control(struct platform_device *pdev, bool on) > { > struct exynos_tmu_data *data = platform_get_drvdata(pdev); > struct thermal_zone_device *tz = data->tzd; > + struct thermal_trip trip; > unsigned int con, interrupt_en = 0, pd_det_en, i; > > con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); > > if (on) { > for (i = 0; i < data->ntrip; i++) { > - if (!of_thermal_is_trip_valid(tz, i)) > + if (thermal_zone_get_trip(tz, i, &trip)) > continue; > > interrupt_en |= > @@ -615,13 +617,14 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) > { > struct exynos_tmu_data *data = platform_get_drvdata(pdev); > struct thermal_zone_device *tz = data->tzd; > + struct thermal_trip trip; > unsigned int con, interrupt_en = 0, i; > > con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); > > if (on) { > for (i = 0; i < data->ntrip; i++) { > - if (!of_thermal_is_trip_valid(tz, i)) > + if (thermal_zone_get_trip(tz, i, &trip)) > continue; > > interrupt_en |= Best regards
Hi Marek, thanks for reporting On 23/09/2022 16:09, Marek Szyprowski wrote: [ ... ] > Exception stack(0xf082dfb0 to 0xf082dff8) > > [deadlock] > > Something is wrong in locking in the functions from the above stacktrace. Are you sure this deadlock is coming from this patch? Does a revert of this patch solve the issue ?
Hi Daniel, On 23.09.2022 19:40, Daniel Lezcano wrote: > > Hi Marek, > > thanks for reporting > > On 23/09/2022 16:09, Marek Szyprowski wrote: > > [ ... ] > >> Exception stack(0xf082dfb0 to 0xf082dff8) >> >> [deadlock] >> >> Something is wrong in locking in the functions from the above >> stacktrace. > > Are you sure this deadlock is coming from this patch? Does a revert of > this patch solve the issue ? > Ups, my fault. It looks that I've copied SHA from the wrong window while preparing the report. The bisection pointed to the 78ffa3e58d93 ("thermal/core: Add a generic thermal_zone_get_trip() function") commit and I've already found how to fix the deadlock. I will report it again under the proper patch. Best regards
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 91e6860b5ec4..34b460092308 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -554,13 +554,14 @@ static void exynos4210_tmu_control(struct platform_device *pdev, bool on) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct thermal_zone_device *tz = data->tzd; + struct thermal_trip trip; unsigned int con, interrupt_en = 0, i; con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); if (on) { for (i = 0; i < data->ntrip; i++) { - if (!of_thermal_is_trip_valid(tz, i)) + if (thermal_zone_get_trip(tz, i, &trip)) continue; interrupt_en |= @@ -584,13 +585,14 @@ static void exynos5433_tmu_control(struct platform_device *pdev, bool on) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct thermal_zone_device *tz = data->tzd; + struct thermal_trip trip; unsigned int con, interrupt_en = 0, pd_det_en, i; con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); if (on) { for (i = 0; i < data->ntrip; i++) { - if (!of_thermal_is_trip_valid(tz, i)) + if (thermal_zone_get_trip(tz, i, &trip)) continue; interrupt_en |= @@ -615,13 +617,14 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct thermal_zone_device *tz = data->tzd; + struct thermal_trip trip; unsigned int con, interrupt_en = 0, i; con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); if (on) { for (i = 0; i < data->ntrip; i++) { - if (!of_thermal_is_trip_valid(tz, i)) + if (thermal_zone_get_trip(tz, i, &trip)) continue; interrupt_en |=
The thermal_zone_get_trip() does the same check as of_thermal_is_trip_valid(). Replace the call to of_thermal_is_trip_valid() by thermal_zone_get_trip(). Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> --- drivers/thermal/samsung/exynos_tmu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)