diff mbox series

[v4,05/23] interconnect: icc-clk: add support for scaling using OPP

Message ID 20230827115033.935089-6-dmitry.baryshkov@linaro.org
State New
Headers show
Series ARM: qcom: apq8064: support CPU frequency scaling | expand

Commit Message

Dmitry Baryshkov Aug. 27, 2023, 11:50 a.m. UTC
Sometimes it might be required to scale the clock using the OPP
framework (e.g. to scale regulators following the required clock rate).
Extend the interconnec-clk framework to handle OPP case in addition to
scaling the clock.

Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/interconnect/icc-clk.c   | 13 +++++++++++--
 include/linux/interconnect-clk.h |  1 +
 2 files changed, 12 insertions(+), 2 deletions(-)

Comments

Dmitry Baryshkov Aug. 28, 2023, 9:18 p.m. UTC | #1
On Mon, 28 Aug 2023 at 21:10, Stephen Boyd <sboyd@kernel.org> wrote:
>
> Quoting Dmitry Baryshkov (2023-08-27 04:50:15)
> > diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c
> > index d787f2ea36d9..45ffb068979d 100644
> > --- a/drivers/interconnect/icc-clk.c
> > +++ b/drivers/interconnect/icc-clk.c
> > @@ -25,12 +28,16 @@ struct icc_clk_provider {
> >  static int icc_clk_set(struct icc_node *src, struct icc_node *dst)
> >  {
> >         struct icc_clk_node *qn = src->data;
> > +       unsigned long rate = icc_units_to_bps(src->peak_bw);
> >         int ret;
> >
> >         if (!qn || !qn->clk)
> >                 return 0;
> >
> > -       if (!src->peak_bw) {
> > +       if (qn->opp)
> > +               return dev_pm_opp_set_rate(qn->dev, rate);
>
> Just curious how does lockdep do with this? Doesn't OPP call into
> interconnect code, so lockdep will complain about ABBA?

Interesting question. I should check it.
Dmitry Baryshkov Oct. 3, 2023, 3:31 p.m. UTC | #2
On Tue, 3 Oct 2023 at 17:17, Stephan Gerhold <stephan@gerhold.net> wrote:
>
> On Tue, Oct 03, 2023 at 04:36:11PM +0300, Dmitry Baryshkov wrote:
> > On Tue, 3 Oct 2023 at 16:02, Stephan Gerhold <stephan@gerhold.net> wrote:
> > >
> > > On Tue, Oct 03, 2023 at 11:30:28AM +0300, Dmitry Baryshkov wrote:
> > > > On 28/08/2023 21:09, Stephen Boyd wrote:
> > > > > Quoting Dmitry Baryshkov (2023-08-27 04:50:15)
> > > > > > diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c
> > > > > > index d787f2ea36d9..45ffb068979d 100644
> > > > > > --- a/drivers/interconnect/icc-clk.c
> > > > > > +++ b/drivers/interconnect/icc-clk.c
> > > > > > @@ -25,12 +28,16 @@ struct icc_clk_provider {
> > > > > >   static int icc_clk_set(struct icc_node *src, struct icc_node *dst)
> > > > > >   {
> > > > > >          struct icc_clk_node *qn = src->data;
> > > > > > +       unsigned long rate = icc_units_to_bps(src->peak_bw);
> > > > > >          int ret;
> > > > > >          if (!qn || !qn->clk)
> > > > > >                  return 0;
> > > > > > -       if (!src->peak_bw) {
> > > > > > +       if (qn->opp)
> > > > > > +               return dev_pm_opp_set_rate(qn->dev, rate);
> > > > >
> > > > > Just curious how does lockdep do with this? Doesn't OPP call into
> > > > > interconnect code, so lockdep will complain about ABBA?
> > > >
> > > > Unfortunately it does. It seems, the icc-clk is not a proper way to go here.
> > > > I will take a look at reusing set_required_opps for this case.
> > > >
> > >
> > > Could you elaborate a bit which locks exactly cause trouble here?
> > > I'm probably missing something here.
> > >
> > > From a quick look at the OPP code I don't see a global lock taken there
> > > for the entire OPP switch sequence, so I'm not sure how this could cause
> > > an ABBA deadlock.
> >
> > For example:
> >
> > [    7.680041] Chain exists of:
> > [    7.680041]   icc_bw_lock --> regulator_ww_class_acquire --> fs_reclaim
> > [    7.680041]
> > [    7.687955]  Possible unsafe locking scenario:
> > [    7.687955]
> > [    7.699039]        CPU0                    CPU1
> > [    7.704752]        ----                    ----
> > [    7.709266]   lock(fs_reclaim);
> > [    7.713779]                                lock(regulator_ww_class_acquire);
> > [    7.716919]                                lock(fs_reclaim);
> > [    7.724204]   lock(icc_bw_lock);
> >
>
> Hm, I'm not entirely sure how to interpret this. Is there really a
> connection between OPP and ICC here? It looks more like ICC <->
> regulator and somehow related to memory allocations (the fs_reclaim).
>
> Was there more output around this?

[    7.362902] ======================================================
[    7.363447] WARNING: possible circular locking dependency detected
[    7.369434] 6.6.0-rc3-next-20230929-08870-g49503b4b9f55 #129 Not tainted
[    7.375604] ------------------------------------------------------
[    7.382453] swapper/0/1 is trying to acquire lock:
[    7.388437] c143445c (icc_bw_lock){+.+.}-{3:3}, at: icc_init+0x0/0x104
[    7.393225]
[    7.393225] but task is already holding lock:
[    7.399730] c1397444 (fs_reclaim){+.+.}-{0:0}, at: icc_init+0x18/0x104
[    7.405547]
[    7.405547] which lock already depends on the new lock.
[    7.405547]
[    7.412077]
[    7.412077] the existing dependency chain (in reverse order) is:
[    7.420310]
[    7.420310] -> #2 (fs_reclaim){+.+.}-{0:0}:
[    7.427767]        fs_reclaim_acquire+0x60/0x94
[    7.433492]        __kmem_cache_alloc_node+0x30/0x2b4
[    7.437742]        __kmalloc_node_track_caller+0x48/0x244
[    7.442954]        kstrdup+0x30/0x58
[    7.448325]        create_regulator+0x70/0x2b8
[    7.451981]        regulator_resolve_supply+0x3bc/0x7f4
[    7.456234]        regulator_register+0x9b0/0xb50
[    7.461352]        devm_regulator_register+0x50/0x8c
[    7.466216]        rpm_reg_probe+0xf4/0x1a0
[    7.471244]        platform_probe+0x5c/0xb0
[    7.475157]        really_probe+0xc8/0x2d8
[    7.479318]        __driver_probe_device+0x88/0x1a0
[    7.483488]        driver_probe_device+0x30/0x108
[    7.488262]        __driver_attach_async_helper+0x4c/0xa0
[    7.493133]        async_run_entry_fn+0x24/0xb0
[    7.498504]        process_one_work+0x208/0x5e4
[    7.502844]        worker_thread+0x1e0/0x4a4
[    7.507356]        kthread+0x100/0x120
[    7.511874]        ret_from_fork+0x14/0x28
[    7.515428]
[    7.515428] -> #1 (regulator_ww_class_acquire){+.+.}-{0:0}:
[    7.519528]        lock_release+0x164/0x340
[    7.526713]        __mutex_unlock_slowpath+0x3c/0x2fc
[    7.530626]        regulator_lock_dependent+0x1a4/0x298
[    7.535839]        regulator_set_voltage+0x30/0xfc
[    7.540866]        krait_l2_set_one_supply+0x28/0x84
[    7.545729]        krait_l2_config_regulators+0x90/0x1c4
[    7.550851]        _set_opp+0xf0/0x438
[    7.556144]        dev_pm_opp_set_rate+0x118/0x224
[    7.559703]        icc_node_add+0xe8/0x15c
[    7.564474]        icc_clk_register+0x150/0x208
[    7.568556]        krait_l2_probe+0x100/0x114
[    7.572988]        platform_probe+0x5c/0xb0
[    7.577495]        really_probe+0xc8/0x2d8
[    7.581487]        __driver_probe_device+0x88/0x1a0
[    7.585658]        driver_probe_device+0x30/0x108
[    7.590433]        __device_attach_driver+0x94/0x10c
[    7.595300]        bus_for_each_drv+0x84/0xd8
[    7.600326]        __device_attach+0xac/0x1a8
[    7.604580]        bus_probe_device+0x8c/0x90
[    7.608919]        device_add+0x5c8/0x7a4
[    7.613265]        of_platform_device_create_pdata+0x90/0xbc
[    7.617260]        qcom_cpufreq_init+0xa8/0x130
[    7.622984]        do_one_initcall+0x68/0x2d8
[    7.627235]        kernel_init_freeable+0x1ac/0x208
[    7.631752]        kernel_init+0x18/0x12c
[    7.636441]        ret_from_fork+0x14/0x28
[    7.640602]
[    7.640602] -> #0 (icc_bw_lock){+.+.}-{3:3}:
[    7.644607]        __lock_acquire+0x1530/0x28fc
[    7.650413]        lock_acquire+0x10c/0x33c
[    7.654757]        icc_init+0x40/0x104
[    7.658917]        do_one_initcall+0x68/0x2d8
[    7.662740]        kernel_init_freeable+0x1ac/0x208
[    7.667168]        kernel_init+0x18/0x12c
[    7.671855]        ret_from_fork+0x14/0x28
[    7.676018]
[    7.676018] other info that might help us debug this:
[    7.676018]
[    7.680041] Chain exists of:
[    7.680041]   icc_bw_lock --> regulator_ww_class_acquire --> fs_reclaim
[    7.680041]
[    7.687955]  Possible unsafe locking scenario:
[    7.687955]
[    7.699039]        CPU0                    CPU1
[    7.704752]        ----                    ----
[    7.709266]   lock(fs_reclaim);
[    7.713779]                                lock(regulator_ww_class_acquire);
[    7.716919]                                lock(fs_reclaim);
[    7.724204]   lock(icc_bw_lock);
[    7.729833]
[    7.729833]  *** DEADLOCK ***
[    7.729833]
[    7.733071] 1 lock held by swapper/0/1:
[    7.738691]  #0: c1397444 (fs_reclaim){+.+.}-{0:0}, at: icc_init+0x18/0x104
[    7.742528]
[    7.742528] stack backtrace:
[    7.749463] CPU: 3 PID: 1 Comm: swapper/0 Not tainted
6.6.0-rc3-next-20230929-08870-g49503b4b9f55 #129
[    7.754010] Hardware name: Generic DT based system
[    7.763183]  unwind_backtrace from show_stack+0x10/0x14
[    7.767957]  show_stack from dump_stack_lvl+0x60/0x90
[    7.773082]  dump_stack_lvl from check_noncircular+0x174/0x18c
[    7.778288]  check_noncircular from __lock_acquire+0x1530/0x28fc
[    7.784018]  __lock_acquire from lock_acquire+0x10c/0x33c
[    7.790178]  lock_acquire from icc_init+0x40/0x104
[    7.795475]  icc_init from do_one_initcall+0x68/0x2d8
[    7.800159]  do_one_initcall from kernel_init_freeable+0x1ac/0x208
[    7.805286]  kernel_init_freeable from kernel_init+0x18/0x12c
[    7.811361]  kernel_init from ret_from_fork+0x14/0x28
[    7.817177] Exception stack(0xf081dfb0 to 0xf081dff8)

> In general, I would expect that adjusting a regulator from an
> interconnect driver should be made possible somehow. Just because the
> RPM firmware or similar typically handles this internally on Qualcomm
> platforms doesn't mean no one else will ever need to do this. If you
> have a higher bandwidth requests, need to increase the clock, which
> again depends on a higher voltage, then you need to be able to do the
> regulator_set_voltage() from the ICC driver somehow.

This kind of dependency is handled by the consumer, not by the ICC
driver. Usually we explicitly put them to the opp tables, matching
rates and volage.

I think I'll still check the required-opps approach. It will require
manually aggregating the L2 rate requirements, but then we will be
safe from such kinds of interdependencies. And maybe then we can also
rewrite msm8996 to use required-opps too.
Stephan Gerhold Oct. 3, 2023, 4:04 p.m. UTC | #3
On Tue, Oct 03, 2023 at 06:31:27PM +0300, Dmitry Baryshkov wrote:
> On Tue, 3 Oct 2023 at 17:17, Stephan Gerhold <stephan@gerhold.net> wrote:
> > On Tue, Oct 03, 2023 at 04:36:11PM +0300, Dmitry Baryshkov wrote:
> > > On Tue, 3 Oct 2023 at 16:02, Stephan Gerhold <stephan@gerhold.net> wrote:
> > > > On Tue, Oct 03, 2023 at 11:30:28AM +0300, Dmitry Baryshkov wrote:
> > > > > On 28/08/2023 21:09, Stephen Boyd wrote:
> > > > > > Quoting Dmitry Baryshkov (2023-08-27 04:50:15)
> > > > > > > diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c
> > > > > > > index d787f2ea36d9..45ffb068979d 100644
> > > > > > > --- a/drivers/interconnect/icc-clk.c
> > > > > > > +++ b/drivers/interconnect/icc-clk.c
> > > > > > > @@ -25,12 +28,16 @@ struct icc_clk_provider {
> > > > > > >   static int icc_clk_set(struct icc_node *src, struct icc_node *dst)
> > > > > > >   {
> > > > > > >          struct icc_clk_node *qn = src->data;
> > > > > > > +       unsigned long rate = icc_units_to_bps(src->peak_bw);
> > > > > > >          int ret;
> > > > > > >          if (!qn || !qn->clk)
> > > > > > >                  return 0;
> > > > > > > -       if (!src->peak_bw) {
> > > > > > > +       if (qn->opp)
> > > > > > > +               return dev_pm_opp_set_rate(qn->dev, rate);
> > > > > >
> > > > > > Just curious how does lockdep do with this? Doesn't OPP call into
> > > > > > interconnect code, so lockdep will complain about ABBA?
> > > > >
> > > > > Unfortunately it does. It seems, the icc-clk is not a proper way to go here.
> > > > > I will take a look at reusing set_required_opps for this case.
> > > > >
> > > >
> > > > Could you elaborate a bit which locks exactly cause trouble here?
> > > > I'm probably missing something here.
> > > >
> > > > From a quick look at the OPP code I don't see a global lock taken there
> > > > for the entire OPP switch sequence, so I'm not sure how this could cause
> > > > an ABBA deadlock.
> > >
> > > For example:
> > >
> > > [    7.680041] Chain exists of:
> > > [    7.680041]   icc_bw_lock --> regulator_ww_class_acquire --> fs_reclaim
> > > [    7.680041]
> > > [    7.687955]  Possible unsafe locking scenario:
> > > [    7.687955]
> > > [    7.699039]        CPU0                    CPU1
> > > [    7.704752]        ----                    ----
> > > [    7.709266]   lock(fs_reclaim);
> > > [    7.713779]                                lock(regulator_ww_class_acquire);
> > > [    7.716919]                                lock(fs_reclaim);
> > > [    7.724204]   lock(icc_bw_lock);
> > >
> >
> > Hm, I'm not entirely sure how to interpret this. Is there really a
> > connection between OPP and ICC here? It looks more like ICC <->
> > regulator and somehow related to memory allocations (the fs_reclaim).
> >
> > Was there more output around this?
> 
> [    7.362902] ======================================================
> [    7.363447] WARNING: possible circular locking dependency detected
> [    7.369434] 6.6.0-rc3-next-20230929-08870-g49503b4b9f55 #129 Not tainted
> [    7.375604] ------------------------------------------------------
> [    7.382453] swapper/0/1 is trying to acquire lock:
> [    7.388437] c143445c (icc_bw_lock){+.+.}-{3:3}, at: icc_init+0x0/0x104
> [    7.393225]
> [    7.393225] but task is already holding lock:
> [    7.399730] c1397444 (fs_reclaim){+.+.}-{0:0}, at: icc_init+0x18/0x104
> [    7.405547]
> [    7.405547] which lock already depends on the new lock.
> [    7.405547]
> [    7.412077]
> [    7.412077] the existing dependency chain (in reverse order) is:
> [    7.420310]
> [    7.420310] -> #2 (fs_reclaim){+.+.}-{0:0}:
> [    7.427767]        fs_reclaim_acquire+0x60/0x94
> [    7.433492]        __kmem_cache_alloc_node+0x30/0x2b4
> [    7.437742]        __kmalloc_node_track_caller+0x48/0x244
> [    7.442954]        kstrdup+0x30/0x58
> [    7.448325]        create_regulator+0x70/0x2b8
> [    7.451981]        regulator_resolve_supply+0x3bc/0x7f4
> [    7.456234]        regulator_register+0x9b0/0xb50
> [    7.461352]        devm_regulator_register+0x50/0x8c
> [    7.466216]        rpm_reg_probe+0xf4/0x1a0
> [    7.471244]        platform_probe+0x5c/0xb0
> [    7.475157]        really_probe+0xc8/0x2d8
> [    7.479318]        __driver_probe_device+0x88/0x1a0
> [    7.483488]        driver_probe_device+0x30/0x108
> [    7.488262]        __driver_attach_async_helper+0x4c/0xa0
> [    7.493133]        async_run_entry_fn+0x24/0xb0
> [    7.498504]        process_one_work+0x208/0x5e4
> [    7.502844]        worker_thread+0x1e0/0x4a4
> [    7.507356]        kthread+0x100/0x120
> [    7.511874]        ret_from_fork+0x14/0x28
> [    7.515428]
> [    7.515428] -> #1 (regulator_ww_class_acquire){+.+.}-{0:0}:
> [    7.519528]        lock_release+0x164/0x340
> [    7.526713]        __mutex_unlock_slowpath+0x3c/0x2fc
> [    7.530626]        regulator_lock_dependent+0x1a4/0x298
> [    7.535839]        regulator_set_voltage+0x30/0xfc
> [    7.540866]        krait_l2_set_one_supply+0x28/0x84
> [    7.545729]        krait_l2_config_regulators+0x90/0x1c4
> [    7.550851]        _set_opp+0xf0/0x438
> [    7.556144]        dev_pm_opp_set_rate+0x118/0x224
> [    7.559703]        icc_node_add+0xe8/0x15c
> [    7.564474]        icc_clk_register+0x150/0x208
> [    7.568556]        krait_l2_probe+0x100/0x114
> [    7.572988]        platform_probe+0x5c/0xb0
> [    7.577495]        really_probe+0xc8/0x2d8
> [    7.581487]        __driver_probe_device+0x88/0x1a0
> [    7.585658]        driver_probe_device+0x30/0x108
> [    7.590433]        __device_attach_driver+0x94/0x10c
> [    7.595300]        bus_for_each_drv+0x84/0xd8
> [    7.600326]        __device_attach+0xac/0x1a8
> [    7.604580]        bus_probe_device+0x8c/0x90
> [    7.608919]        device_add+0x5c8/0x7a4
> [    7.613265]        of_platform_device_create_pdata+0x90/0xbc
> [    7.617260]        qcom_cpufreq_init+0xa8/0x130
> [    7.622984]        do_one_initcall+0x68/0x2d8
> [    7.627235]        kernel_init_freeable+0x1ac/0x208
> [    7.631752]        kernel_init+0x18/0x12c
> [    7.636441]        ret_from_fork+0x14/0x28
> [    7.640602]
> [    7.640602] -> #0 (icc_bw_lock){+.+.}-{3:3}:
> [    7.644607]        __lock_acquire+0x1530/0x28fc
> [    7.650413]        lock_acquire+0x10c/0x33c
> [    7.654757]        icc_init+0x40/0x104
> [    7.658917]        do_one_initcall+0x68/0x2d8
> [    7.662740]        kernel_init_freeable+0x1ac/0x208
> [    7.667168]        kernel_init+0x18/0x12c
> [    7.671855]        ret_from_fork+0x14/0x28
> [    7.676018]
> [    7.676018] other info that might help us debug this:
> [    7.676018]
> [    7.680041] Chain exists of:
> [    7.680041]   icc_bw_lock --> regulator_ww_class_acquire --> fs_reclaim
> [    7.680041]
> [    7.687955]  Possible unsafe locking scenario:
> [    7.687955]
> [    7.699039]        CPU0                    CPU1
> [    7.704752]        ----                    ----
> [    7.709266]   lock(fs_reclaim);
> [    7.713779]                                lock(regulator_ww_class_acquire);
> [    7.716919]                                lock(fs_reclaim);
> [    7.724204]   lock(icc_bw_lock);
> [    7.729833]
> [    7.729833]  *** DEADLOCK ***
> [    7.729833]
> [    7.733071] 1 lock held by swapper/0/1:
> [    7.738691]  #0: c1397444 (fs_reclaim){+.+.}-{0:0}, at: icc_init+0x18/0x104
> [    7.742528]
> [    7.742528] stack backtrace:
> [    7.749463] CPU: 3 PID: 1 Comm: swapper/0 Not tainted
> 6.6.0-rc3-next-20230929-08870-g49503b4b9f55 #129
> [    7.754010] Hardware name: Generic DT based system
> [    7.763183]  unwind_backtrace from show_stack+0x10/0x14
> [    7.767957]  show_stack from dump_stack_lvl+0x60/0x90
> [    7.773082]  dump_stack_lvl from check_noncircular+0x174/0x18c
> [    7.778288]  check_noncircular from __lock_acquire+0x1530/0x28fc
> [    7.784018]  __lock_acquire from lock_acquire+0x10c/0x33c
> [    7.790178]  lock_acquire from icc_init+0x40/0x104
> [    7.795475]  icc_init from do_one_initcall+0x68/0x2d8
> [    7.800159]  do_one_initcall from kernel_init_freeable+0x1ac/0x208
> [    7.805286]  kernel_init_freeable from kernel_init+0x18/0x12c
> [    7.811361]  kernel_init from ret_from_fork+0x14/0x28
> [    7.817177] Exception stack(0xf081dfb0 to 0xf081dff8)
> 

Thanks! I assume you haven't noticed an actual deadlock (i.e. cpufreq
not progressing) without lockdep?

FWIW I'm not fully convinced that this deadlock can actually happen in
practice for your particular limited use case (i.e. that it is not a
false positive).

> > In general, I would expect that adjusting a regulator from an
> > interconnect driver should be made possible somehow. Just because the
> > RPM firmware or similar typically handles this internally on Qualcomm
> > platforms doesn't mean no one else will ever need to do this. If you
> > have a higher bandwidth requests, need to increase the clock, which
> > again depends on a higher voltage, then you need to be able to do the
> > regulator_set_voltage() from the ICC driver somehow.
> 
> This kind of dependency is handled by the consumer, not by the ICC
> driver. Usually we explicitly put them to the opp tables, matching
> rates and volage.
> 

Right, but I'm talking about the voltage requirements of the aggregated
bus clock. The rate for that is calculated from the different bandwidth
requirements from all the consumers and not just a single one. From the
kernel perspective, the interconnect driver is the consumer of that
clock. On Qualcomm SMD RPM platforms the voltage adjustment for those is
handled behind the scenes by the RPM firmware (no idea about RPMH and
RPM-old).

But one could easily think of a (non-Qualcomm) SoC without RPM, where
the interconnect driver calculates a particular bus rate for multiple
consumers and then needs to vote for a power domain or regulator to be
able to generate that clock safely.

> I think I'll still check the required-opps approach. It will require
> manually aggregating the L2 rate requirements, but then we will be
> safe from such kinds of interdependencies. And maybe then we can also
> rewrite msm8996 to use required-opps too.
> 

Sure, thanks! I think at the moment the required-opps implementation is
somewhat entangled with power domains/genpd but perhaps it's easy enough
to refactor that to be more general.

Thanks,
Stephan
Stephen Boyd Oct. 10, 2023, 4:14 a.m. UTC | #4
Quoting Dmitry Baryshkov (2023-10-03 08:31:27)
> On Tue, 3 Oct 2023 at 17:17, Stephan Gerhold <stephan@gerhold.net> wrote:
> >
> > On Tue, Oct 03, 2023 at 04:36:11PM +0300, Dmitry Baryshkov wrote:
> > > On Tue, 3 Oct 2023 at 16:02, Stephan Gerhold <stephan@gerhold.net> wrote:
> > > >
> > > > On Tue, Oct 03, 2023 at 11:30:28AM +0300, Dmitry Baryshkov wrote:
> > > > > On 28/08/2023 21:09, Stephen Boyd wrote:
> > > > > > Quoting Dmitry Baryshkov (2023-08-27 04:50:15)
> > > > > > > diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c
> > > > > > > index d787f2ea36d9..45ffb068979d 100644
> > > > > > > --- a/drivers/interconnect/icc-clk.c
> > > > > > > +++ b/drivers/interconnect/icc-clk.c
> > > > > > > @@ -25,12 +28,16 @@ struct icc_clk_provider {
> > > > > > >   static int icc_clk_set(struct icc_node *src, struct icc_node *dst)
> > > > > > >   {
> > > > > > >          struct icc_clk_node *qn = src->data;
> > > > > > > +       unsigned long rate = icc_units_to_bps(src->peak_bw);
> > > > > > >          int ret;
> > > > > > >          if (!qn || !qn->clk)
> > > > > > >                  return 0;
> > > > > > > -       if (!src->peak_bw) {
> > > > > > > +       if (qn->opp)
> > > > > > > +               return dev_pm_opp_set_rate(qn->dev, rate);
> > > > > >
> > > > > > Just curious how does lockdep do with this? Doesn't OPP call into
> > > > > > interconnect code, so lockdep will complain about ABBA?
> > > > >
> > > > > Unfortunately it does. It seems, the icc-clk is not a proper way to go here.
> > > > > I will take a look at reusing set_required_opps for this case.
> > > > >
> > > >
> > > > Could you elaborate a bit which locks exactly cause trouble here?
> > > > I'm probably missing something here.
> > > >
> > > > From a quick look at the OPP code I don't see a global lock taken there
> > > > for the entire OPP switch sequence, so I'm not sure how this could cause
> > > > an ABBA deadlock.
> > >
> > > For example:
> > >
> > > [    7.680041] Chain exists of:
> > > [    7.680041]   icc_bw_lock --> regulator_ww_class_acquire --> fs_reclaim
> > > [    7.680041]
> > > [    7.687955]  Possible unsafe locking scenario:
> > > [    7.687955]
> > > [    7.699039]        CPU0                    CPU1
> > > [    7.704752]        ----                    ----
> > > [    7.709266]   lock(fs_reclaim);
> > > [    7.713779]                                lock(regulator_ww_class_acquire);
> > > [    7.716919]                                lock(fs_reclaim);
> > > [    7.724204]   lock(icc_bw_lock);
> > >
> >
> > Hm, I'm not entirely sure how to interpret this. Is there really a
> > connection between OPP and ICC here? It looks more like ICC <->
> > regulator and somehow related to memory allocations (the fs_reclaim).
> >
> > Was there more output around this?
> 
> [    7.362902] ======================================================
> [    7.363447] WARNING: possible circular locking dependency detected
> [    7.369434] 6.6.0-rc3-next-20230929-08870-g49503b4b9f55 #129 Not tainted
> [    7.375604] ------------------------------------------------------
> [    7.382453] swapper/0/1 is trying to acquire lock:
> [    7.388437] c143445c (icc_bw_lock){+.+.}-{3:3}, at: icc_init+0x0/0x104
> [    7.393225]
> [    7.393225] but task is already holding lock:
> [    7.399730] c1397444 (fs_reclaim){+.+.}-{0:0}, at: icc_init+0x18/0x104
> [    7.405547]
> [    7.405547] which lock already depends on the new lock.
> [    7.405547]
> [    7.412077]
> [    7.412077] the existing dependency chain (in reverse order) is:
> [    7.420310]
> [    7.420310] -> #2 (fs_reclaim){+.+.}-{0:0}:
> [    7.427767]        fs_reclaim_acquire+0x60/0x94
> [    7.433492]        __kmem_cache_alloc_node+0x30/0x2b4
> [    7.437742]        __kmalloc_node_track_caller+0x48/0x244
> [    7.442954]        kstrdup+0x30/0x58
> [    7.448325]        create_regulator+0x70/0x2b8

The regulator core should avoid holding the lock while calling into
kstrdup() and also debugfs file creation APIs.

-Stephen

> [    7.451981]        regulator_resolve_supply+0x3bc/0x7f4
> [    7.456234]        regulator_register+0x9b0/0xb50
> [    7.461352]        devm_regulator_register+0x50/0x8c
> [    7.466216]        rpm_reg_probe+0xf4/0x1a0
> [    7.471244]        platform_probe+0x5c/0xb0
> [    7.475157]        really_probe+0xc8/0x2d8
> [    7.479318]        __driver_probe_device+0x88/0x1a0
> [    7.483488]        driver_probe_device+0x30/0x108
> [    7.488262]        __driver_attach_async_helper+0x4c/0xa0
> [    7.493133]        async_run_entry_fn+0x24/0xb0
> [    7.498504]        process_one_work+0x208/0x5e4
> [    7.502844]        worker_thread+0x1e0/0x4a4
> [    7.507356]        kthread+0x100/0x120
> [    7.511874]        ret_from_fork+0x14/0x28
> [    7.515428]
> [    7.515428] -> #1 (regulator_ww_class_acquire){+.+.}-{0:0}:
> [    7.519528]        lock_release+0x164/0x340
> [    7.526713]        __mutex_unlock_slowpath+0x3c/0x2fc
> [    7.530626]        regulator_lock_dependent+0x1a4/0x298
> [    7.535839]        regulator_set_voltage+0x30/0xfc
> [    7.540866]        krait_l2_set_one_supply+0x28/0x84
> [    7.545729]        krait_l2_config_regulators+0x90/0x1c4
> [    7.550851]        _set_opp+0xf0/0x438
> [    7.556144]        dev_pm_opp_set_rate+0x118/0x224
> [    7.559703]        icc_node_add+0xe8/0x15c
> [    7.564474]        icc_clk_register+0x150/0x208
> [    7.568556]        krait_l2_probe+0x100/0x114
> [    7.572988]        platform_probe+0x5c/0xb0
> [    7.577495]        really_probe+0xc8/0x2d8
> [    7.581487]        __driver_probe_device+0x88/0x1a0
> [    7.585658]        driver_probe_device+0x30/0x108
> [    7.590433]        __device_attach_driver+0x94/0x10c
> [    7.595300]        bus_for_each_drv+0x84/0xd8
> [    7.600326]        __device_attach+0xac/0x1a8
> [    7.604580]        bus_probe_device+0x8c/0x90
> [    7.608919]        device_add+0x5c8/0x7a4
> [    7.613265]        of_platform_device_create_pdata+0x90/0xbc
> [    7.617260]        qcom_cpufreq_init+0xa8/0x130
> [    7.622984]        do_one_initcall+0x68/0x2d8
> [    7.627235]        kernel_init_freeable+0x1ac/0x208
> [    7.631752]        kernel_init+0x18/0x12c
> [    7.636441]        ret_from_fork+0x14/0x28
> [    7.640602]
> [    7.640602] -> #0 (icc_bw_lock){+.+.}-{3:3}:
> [    7.644607]        __lock_acquire+0x1530/0x28fc
> [    7.650413]        lock_acquire+0x10c/0x33c
> [    7.654757]        icc_init+0x40/0x104
> [    7.658917]        do_one_initcall+0x68/0x2d8
> [    7.662740]        kernel_init_freeable+0x1ac/0x208
> [    7.667168]        kernel_init+0x18/0x12c
> [    7.671855]        ret_from_fork+0x14/0x28
> [    7.676018]
> [    7.676018] other info that might help us debug this:
> [    7.676018]
> [    7.680041] Chain exists of:
> [    7.680041]   icc_bw_lock --> regulator_ww_class_acquire --> fs_reclaim
> [    7.680041]
> [    7.687955]  Possible unsafe locking scenario:
> [    7.687955]
> [    7.699039]        CPU0                    CPU1
> [    7.704752]        ----                    ----
> [    7.709266]   lock(fs_reclaim);
> [    7.713779]                                lock(regulator_ww_class_acquire);
> [    7.716919]                                lock(fs_reclaim);
> [    7.724204]   lock(icc_bw_lock);
> [    7.729833]
> [    7.729833]  *** DEADLOCK ***
> [    7.729833]
> [    7.733071] 1 lock held by swapper/0/1:
> [    7.738691]  #0: c1397444 (fs_reclaim){+.+.}-{0:0}, at: icc_init+0x18/0x104
> [    7.742528]
> [    7.742528] stack backtrace:
> [    7.749463] CPU: 3 PID: 1 Comm: swapper/0 Not tainted
> 6.6.0-rc3-next-20230929-08870-g49503b4b9f55 #129
> [    7.754010] Hardware name: Generic DT based system
> [    7.763183]  unwind_backtrace from show_stack+0x10/0x14
> [    7.767957]  show_stack from dump_stack_lvl+0x60/0x90
> [    7.773082]  dump_stack_lvl from check_noncircular+0x174/0x18c
> [    7.778288]  check_noncircular from __lock_acquire+0x1530/0x28fc
> [    7.784018]  __lock_acquire from lock_acquire+0x10c/0x33c
> [    7.790178]  lock_acquire from icc_init+0x40/0x104
> [    7.795475]  icc_init from do_one_initcall+0x68/0x2d8
> [    7.800159]  do_one_initcall from kernel_init_freeable+0x1ac/0x208
> [    7.805286]  kernel_init_freeable from kernel_init+0x18/0x12c
> [    7.811361]  kernel_init from ret_from_fork+0x14/0x28
> [    7.817177] Exception stack(0xf081dfb0 to 0xf081dff8)
>
diff mbox series

Patch

diff --git a/drivers/interconnect/icc-clk.c b/drivers/interconnect/icc-clk.c
index d787f2ea36d9..45ffb068979d 100644
--- a/drivers/interconnect/icc-clk.c
+++ b/drivers/interconnect/icc-clk.c
@@ -7,10 +7,13 @@ 
 #include <linux/device.h>
 #include <linux/interconnect-clk.h>
 #include <linux/interconnect-provider.h>
+#include <linux/pm_opp.h>
 
 struct icc_clk_node {
+	struct device *dev;
 	struct clk *clk;
 	bool enabled;
+	bool opp;
 };
 
 struct icc_clk_provider {
@@ -25,12 +28,16 @@  struct icc_clk_provider {
 static int icc_clk_set(struct icc_node *src, struct icc_node *dst)
 {
 	struct icc_clk_node *qn = src->data;
+	unsigned long rate = icc_units_to_bps(src->peak_bw);
 	int ret;
 
 	if (!qn || !qn->clk)
 		return 0;
 
-	if (!src->peak_bw) {
+	if (qn->opp)
+		return dev_pm_opp_set_rate(qn->dev, rate);
+
+	if (!rate) {
 		if (qn->enabled)
 			clk_disable_unprepare(qn->clk);
 		qn->enabled = false;
@@ -45,7 +52,7 @@  static int icc_clk_set(struct icc_node *src, struct icc_node *dst)
 		qn->enabled = true;
 	}
 
-	return clk_set_rate(qn->clk, icc_units_to_bps(src->peak_bw));
+	return clk_set_rate(qn->clk, rate);
 }
 
 static int icc_clk_get_bw(struct icc_node *node, u32 *avg, u32 *peak)
@@ -106,7 +113,9 @@  struct icc_provider *icc_clk_register(struct device *dev,
 	icc_provider_init(provider);
 
 	for (i = 0, j = 0; i < num_clocks; i++) {
+		qp->clocks[i].dev = dev;
 		qp->clocks[i].clk = data[i].clk;
+		qp->clocks[i].opp = data[i].opp;
 
 		node = icc_node_create(first_id + j);
 		if (IS_ERR(node)) {
diff --git a/include/linux/interconnect-clk.h b/include/linux/interconnect-clk.h
index 0cd80112bea5..c695e5099901 100644
--- a/include/linux/interconnect-clk.h
+++ b/include/linux/interconnect-clk.h
@@ -11,6 +11,7 @@  struct device;
 struct icc_clk_data {
 	struct clk *clk;
 	const char *name;
+	bool opp;
 };
 
 struct icc_provider *icc_clk_register(struct device *dev,