Message ID | 20220830125249.2373416-1-conor.dooley@microchip.com |
---|---|
State | New |
Headers | show |
Series | PolarFire SoC reset controller & clock cleanups | expand |
On 30.08.2022 15:52, Conor Dooley wrote: > Unnoticed in current code, there is an array bounds violation present > during clock registration. This seems to fail gracefully in v6.0-rc1, > and life carrys on. While converting the driver to use standard clock > structs/ops, kernel panics were seen during boot when built with clang: > > [ 0.581754] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000b1 > [ 0.591520] Oops [#1] > [ 0.594045] Modules linked in: > [ 0.597435] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc1-00011-g8e1459cf4eca #1 > [ 0.606188] Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) > [ 0.613012] epc : __clk_register+0x4a6/0x85c > [ 0.617759] ra : __clk_register+0x49e/0x85c > [ 0.622489] epc : ffffffff803faf7c ra : ffffffff803faf74 sp : ffffffc80400b720 > [ 0.630466] gp : ffffffff810e93f8 tp : ffffffe77fe60000 t0 : ffffffe77ffb3800 > [ 0.638443] t1 : 000000000000000a t2 : ffffffffffffffff s0 : ffffffc80400b7c0 > [ 0.646420] s1 : 0000000000000001 a0 : 0000000000000001 a1 : 0000000000000000 > [ 0.654396] a2 : 0000000000000001 a3 : 0000000000000000 a4 : 0000000000000000 > [ 0.662373] a5 : ffffffff803a5810 a6 : 0000000200000022 a7 : 0000000000000006 > [ 0.670350] s2 : ffffffff81099d48 s3 : ffffffff80d6e28e s4 : 0000000000000028 > [ 0.678327] s5 : ffffffff810ed3c8 s6 : ffffffff810ed3d0 s7 : ffffffe77ffbc100 > [ 0.686304] s8 : ffffffe77ffb1540 s9 : ffffffe77ffb1540 s10: 0000000000000008 > [ 0.694281] s11: 0000000000000000 t3 : 00000000000000c6 t4 : 0000000000000007 > [ 0.702258] t5 : ffffffff810c78c0 t6 : ffffffe77ff88cd0 > [ 0.708125] status: 0000000200000120 badaddr: 00000000000000b1 cause: 000000000000000d > [ 0.716869] [<ffffffff803fb892>] devm_clk_hw_register+0x62/0xaa > [ 0.723420] [<ffffffff80403412>] mpfs_clk_probe+0x1e0/0x244 > > It fails on "clk_periph_timer" - which uses a different parent, that it > tries to find using the macro: > \#define PARENT_CLK(PARENT) (&mpfs_cfg_clks[CLK_##PARENT].cfg.hw) > > If parent is RTCREF, so the macro becomes: &mpfs_cfg_clks[33].cfg.hw > which is well beyond the end of the array. Amazingly, builds with GCC > 11.1 see no problem here, booting correctly and hooking the parent up > etc. Builds with clang-15 do not, with the above panic. > > Drop the macro for the RTCREF and use the array directly to avoid the > panic, using a newly added define that brings the index into the valid > range. > > Fixes: 1c6a7ea32b8c ("clk: microchip: mpfs: add RTCREF clock control") > CC: Nathan Chancellor <nathan@kernel.org> > Signed-off-by: Conor Dooley <conor.dooley@microchip.com> > --- > drivers/clk/microchip/clk-mpfs.c | 5 ++++- > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c > index 070c3b896559..9e41f07b3fa6 100644 > --- a/drivers/clk/microchip/clk-mpfs.c > +++ b/drivers/clk/microchip/clk-mpfs.c > @@ -27,6 +27,8 @@ > #define MSSPLL_POSTDIV_WIDTH 0x07u > #define MSSPLL_FIXED_DIV 4u > > +#define RTCREF_OFFSET (CLK_RTCREF - CLK_ENVM) > + > struct mpfs_clock_data { > void __iomem *base; > void __iomem *msspll_base; > @@ -381,7 +383,8 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { > CLK_PERIPH(CLK_MAC0, "clk_periph_mac0", PARENT_CLK(AHB), 1, 0), > CLK_PERIPH(CLK_MAC1, "clk_periph_mac1", PARENT_CLK(AHB), 2, 0), > CLK_PERIPH(CLK_MMC, "clk_periph_mmc", PARENT_CLK(AHB), 3, 0), > - CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(RTCREF), 4, 0), > + CLK_PERIPH(CLK_TIMER, "clk_periph_timer", > + &mpfs_cfg_clks[CLK_RTCREF - RTCREF_OFFSET].hw, 4, 0), A personal taste: as clk IDs and clk indexes in mpfs_cfg_clks[] array are different thing (ID for clk_periph_timer is already different) and the CLK_RTCREF - RTCREF_OFFSET here is in the end CLK_ENVM = 3 maybe easier to follow the code would be to add new macros like: #define CLK_CPU_OFF 0 #define CLK_AXI_OFF 1 #define CLK_AHB_OFF 2 #define CLK_RTCREF_OFF 3 and change the CLK_PARENT() macro something as follows: #define PARENT_CLK(PARENT) (&mpfs_cfg_clks[CLK_##PARENT##_OFF].hw) > CLK_PERIPH(CLK_MMUART0, "clk_periph_mmuart0", PARENT_CLK(AHB), 5, CLK_IS_CRITICAL), > CLK_PERIPH(CLK_MMUART1, "clk_periph_mmuart1", PARENT_CLK(AHB), 6, 0), > CLK_PERIPH(CLK_MMUART2, "clk_periph_mmuart2", PARENT_CLK(AHB), 7, 0),
On 08/09/2022 07:44, Claudiu Beznea - M18063 wrote: > On 30.08.2022 15:52, Conor Dooley wrote: >> Unnoticed in current code, there is an array bounds violation present >> during clock registration. This seems to fail gracefully in v6.0-rc1, >> and life carrys on. While converting the driver to use standard clock >> structs/ops, kernel panics were seen during boot when built with clang: >> >> [ 0.581754] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000b1 >> [ 0.591520] Oops [#1] >> [ 0.594045] Modules linked in: >> [ 0.597435] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc1-00011-g8e1459cf4eca #1 >> [ 0.606188] Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) >> [ 0.613012] epc : __clk_register+0x4a6/0x85c >> [ 0.617759] ra : __clk_register+0x49e/0x85c >> [ 0.622489] epc : ffffffff803faf7c ra : ffffffff803faf74 sp : ffffffc80400b720 >> [ 0.630466] gp : ffffffff810e93f8 tp : ffffffe77fe60000 t0 : ffffffe77ffb3800 >> [ 0.638443] t1 : 000000000000000a t2 : ffffffffffffffff s0 : ffffffc80400b7c0 >> [ 0.646420] s1 : 0000000000000001 a0 : 0000000000000001 a1 : 0000000000000000 >> [ 0.654396] a2 : 0000000000000001 a3 : 0000000000000000 a4 : 0000000000000000 >> [ 0.662373] a5 : ffffffff803a5810 a6 : 0000000200000022 a7 : 0000000000000006 >> [ 0.670350] s2 : ffffffff81099d48 s3 : ffffffff80d6e28e s4 : 0000000000000028 >> [ 0.678327] s5 : ffffffff810ed3c8 s6 : ffffffff810ed3d0 s7 : ffffffe77ffbc100 >> [ 0.686304] s8 : ffffffe77ffb1540 s9 : ffffffe77ffb1540 s10: 0000000000000008 >> [ 0.694281] s11: 0000000000000000 t3 : 00000000000000c6 t4 : 0000000000000007 >> [ 0.702258] t5 : ffffffff810c78c0 t6 : ffffffe77ff88cd0 >> [ 0.708125] status: 0000000200000120 badaddr: 00000000000000b1 cause: 000000000000000d >> [ 0.716869] [<ffffffff803fb892>] devm_clk_hw_register+0x62/0xaa >> [ 0.723420] [<ffffffff80403412>] mpfs_clk_probe+0x1e0/0x244 >> >> It fails on "clk_periph_timer" - which uses a different parent, that it >> tries to find using the macro: >> \#define PARENT_CLK(PARENT) (&mpfs_cfg_clks[CLK_##PARENT].cfg.hw) >> >> If parent is RTCREF, so the macro becomes: &mpfs_cfg_clks[33].cfg.hw >> which is well beyond the end of the array. Amazingly, builds with GCC >> 11.1 see no problem here, booting correctly and hooking the parent up >> etc. Builds with clang-15 do not, with the above panic. >> >> Drop the macro for the RTCREF and use the array directly to avoid the >> panic, using a newly added define that brings the index into the valid >> range. >> >> Fixes: 1c6a7ea32b8c ("clk: microchip: mpfs: add RTCREF clock control") >> CC: Nathan Chancellor <nathan@kernel.org> >> Signed-off-by: Conor Dooley <conor.dooley@microchip.com> >> --- >> drivers/clk/microchip/clk-mpfs.c | 5 ++++- >> 1 file changed, 4 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c >> index 070c3b896559..9e41f07b3fa6 100644 >> --- a/drivers/clk/microchip/clk-mpfs.c >> +++ b/drivers/clk/microchip/clk-mpfs.c >> @@ -27,6 +27,8 @@ >> #define MSSPLL_POSTDIV_WIDTH 0x07u >> #define MSSPLL_FIXED_DIV 4u >> >> +#define RTCREF_OFFSET (CLK_RTCREF - CLK_ENVM) >> + >> struct mpfs_clock_data { >> void __iomem *base; >> void __iomem *msspll_base; >> @@ -381,7 +383,8 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { >> CLK_PERIPH(CLK_MAC0, "clk_periph_mac0", PARENT_CLK(AHB), 1, 0), >> CLK_PERIPH(CLK_MAC1, "clk_periph_mac1", PARENT_CLK(AHB), 2, 0), >> CLK_PERIPH(CLK_MMC, "clk_periph_mmc", PARENT_CLK(AHB), 3, 0), >> - CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(RTCREF), 4, 0), >> + CLK_PERIPH(CLK_TIMER, "clk_periph_timer", >> + &mpfs_cfg_clks[CLK_RTCREF - RTCREF_OFFSET].hw, 4, 0), > > A personal taste: as clk IDs and clk indexes in mpfs_cfg_clks[] array are > different thing (ID for clk_periph_timer is already different) and the > CLK_RTCREF - RTCREF_OFFSET here is in the end CLK_ENVM = 3 maybe easier to > follow the code would be to add new macros like: > > #define CLK_CPU_OFF 0 > #define CLK_AXI_OFF 1 > #define CLK_AHB_OFF 2 > #define CLK_RTCREF_OFF 3 > > and change the CLK_PARENT() macro something as follows: > > #define PARENT_CLK(PARENT) (&mpfs_cfg_clks[CLK_##PARENT##_OFF].hw) Sure, but that is out-of-scope for this fix which needs backporting. > >> CLK_PERIPH(CLK_MMUART0, "clk_periph_mmuart0", PARENT_CLK(AHB), 5, CLK_IS_CRITICAL), >> CLK_PERIPH(CLK_MMUART1, "clk_periph_mmuart1", PARENT_CLK(AHB), 6, 0), >> CLK_PERIPH(CLK_MMUART2, "clk_periph_mmuart2", PARENT_CLK(AHB), 7, 0), >
diff --git a/drivers/clk/microchip/clk-mpfs.c b/drivers/clk/microchip/clk-mpfs.c index 070c3b896559..9e41f07b3fa6 100644 --- a/drivers/clk/microchip/clk-mpfs.c +++ b/drivers/clk/microchip/clk-mpfs.c @@ -27,6 +27,8 @@ #define MSSPLL_POSTDIV_WIDTH 0x07u #define MSSPLL_FIXED_DIV 4u +#define RTCREF_OFFSET (CLK_RTCREF - CLK_ENVM) + struct mpfs_clock_data { void __iomem *base; void __iomem *msspll_base; @@ -381,7 +383,8 @@ static struct mpfs_periph_hw_clock mpfs_periph_clks[] = { CLK_PERIPH(CLK_MAC0, "clk_periph_mac0", PARENT_CLK(AHB), 1, 0), CLK_PERIPH(CLK_MAC1, "clk_periph_mac1", PARENT_CLK(AHB), 2, 0), CLK_PERIPH(CLK_MMC, "clk_periph_mmc", PARENT_CLK(AHB), 3, 0), - CLK_PERIPH(CLK_TIMER, "clk_periph_timer", PARENT_CLK(RTCREF), 4, 0), + CLK_PERIPH(CLK_TIMER, "clk_periph_timer", + &mpfs_cfg_clks[CLK_RTCREF - RTCREF_OFFSET].hw, 4, 0), CLK_PERIPH(CLK_MMUART0, "clk_periph_mmuart0", PARENT_CLK(AHB), 5, CLK_IS_CRITICAL), CLK_PERIPH(CLK_MMUART1, "clk_periph_mmuart1", PARENT_CLK(AHB), 6, 0), CLK_PERIPH(CLK_MMUART2, "clk_periph_mmuart2", PARENT_CLK(AHB), 7, 0),
Unnoticed in current code, there is an array bounds violation present during clock registration. This seems to fail gracefully in v6.0-rc1, and life carrys on. While converting the driver to use standard clock structs/ops, kernel panics were seen during boot when built with clang: [ 0.581754] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000b1 [ 0.591520] Oops [#1] [ 0.594045] Modules linked in: [ 0.597435] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc1-00011-g8e1459cf4eca #1 [ 0.606188] Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) [ 0.613012] epc : __clk_register+0x4a6/0x85c [ 0.617759] ra : __clk_register+0x49e/0x85c [ 0.622489] epc : ffffffff803faf7c ra : ffffffff803faf74 sp : ffffffc80400b720 [ 0.630466] gp : ffffffff810e93f8 tp : ffffffe77fe60000 t0 : ffffffe77ffb3800 [ 0.638443] t1 : 000000000000000a t2 : ffffffffffffffff s0 : ffffffc80400b7c0 [ 0.646420] s1 : 0000000000000001 a0 : 0000000000000001 a1 : 0000000000000000 [ 0.654396] a2 : 0000000000000001 a3 : 0000000000000000 a4 : 0000000000000000 [ 0.662373] a5 : ffffffff803a5810 a6 : 0000000200000022 a7 : 0000000000000006 [ 0.670350] s2 : ffffffff81099d48 s3 : ffffffff80d6e28e s4 : 0000000000000028 [ 0.678327] s5 : ffffffff810ed3c8 s6 : ffffffff810ed3d0 s7 : ffffffe77ffbc100 [ 0.686304] s8 : ffffffe77ffb1540 s9 : ffffffe77ffb1540 s10: 0000000000000008 [ 0.694281] s11: 0000000000000000 t3 : 00000000000000c6 t4 : 0000000000000007 [ 0.702258] t5 : ffffffff810c78c0 t6 : ffffffe77ff88cd0 [ 0.708125] status: 0000000200000120 badaddr: 00000000000000b1 cause: 000000000000000d [ 0.716869] [<ffffffff803fb892>] devm_clk_hw_register+0x62/0xaa [ 0.723420] [<ffffffff80403412>] mpfs_clk_probe+0x1e0/0x244 It fails on "clk_periph_timer" - which uses a different parent, that it tries to find using the macro: \#define PARENT_CLK(PARENT) (&mpfs_cfg_clks[CLK_##PARENT].cfg.hw) If parent is RTCREF, so the macro becomes: &mpfs_cfg_clks[33].cfg.hw which is well beyond the end of the array. Amazingly, builds with GCC 11.1 see no problem here, booting correctly and hooking the parent up etc. Builds with clang-15 do not, with the above panic. Drop the macro for the RTCREF and use the array directly to avoid the panic, using a newly added define that brings the index into the valid range. Fixes: 1c6a7ea32b8c ("clk: microchip: mpfs: add RTCREF clock control") CC: Nathan Chancellor <nathan@kernel.org> Signed-off-by: Conor Dooley <conor.dooley@microchip.com> --- drivers/clk/microchip/clk-mpfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-)