diff mbox series

[4/4] arm64: dts: rockchip: Add OPP data for CPU cores on RK3588

Message ID 20240125-rk-dts-additions-v1-4-5879275db36f@gmail.com
State New
Headers show
Series RK3588 and Rock 5B dts additions: thermal, OPP, rfkill and fan | expand

Commit Message

Alexey Charkov Jan. 24, 2024, 8:30 p.m. UTC
By default the CPUs on RK3588 start up in a conservative performance
mode. Add frequency and voltage mappings to the device tree to enable
dynamic scaling via cpufreq

Signed-off-by: Alexey Charkov <alchark@gmail.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 209 ++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)

Comments

Daniel Lezcano Jan. 25, 2024, 9:30 a.m. UTC | #1
Hi Alexey,

Adding Viresh

On 24/01/2024 21:30, Alexey Charkov wrote:
> By default the CPUs on RK3588 start up in a conservative performance
> mode. Add frequency and voltage mappings to the device tree to enable
> dynamic scaling via cpufreq
> 
> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> ---
>   arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 209 ++++++++++++++++++++++++++++++
>   1 file changed, 209 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> index 131b9eb21398..e605be531a0f 100644
> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> @@ -97,6 +97,7 @@ cpu_l0: cpu@0 {
>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>;
>   			assigned-clock-rates = <816000000>;
> +			operating-points-v2 = <&cluster0_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <32768>;
>   			i-cache-line-size = <64>;
> @@ -116,6 +117,7 @@ cpu_l1: cpu@100 {
>   			enable-method = "psci";
>   			capacity-dmips-mhz = <530>;
>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
> +			operating-points-v2 = <&cluster0_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <32768>;
>   			i-cache-line-size = <64>;
> @@ -135,6 +137,7 @@ cpu_l2: cpu@200 {
>   			enable-method = "psci";
>   			capacity-dmips-mhz = <530>;
>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
> +			operating-points-v2 = <&cluster0_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <32768>;
>   			i-cache-line-size = <64>;
> @@ -154,6 +157,7 @@ cpu_l3: cpu@300 {
>   			enable-method = "psci";
>   			capacity-dmips-mhz = <530>;
>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
> +			operating-points-v2 = <&cluster0_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <32768>;
>   			i-cache-line-size = <64>;
> @@ -175,6 +179,7 @@ cpu_b0: cpu@400 {
>   			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>   			assigned-clock-rates = <816000000>;
> +			operating-points-v2 = <&cluster1_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <65536>;
>   			i-cache-line-size = <64>;
> @@ -194,6 +199,7 @@ cpu_b1: cpu@500 {
>   			enable-method = "psci";
>   			capacity-dmips-mhz = <1024>;
>   			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
> +			operating-points-v2 = <&cluster1_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <65536>;
>   			i-cache-line-size = <64>;
> @@ -215,6 +221,7 @@ cpu_b2: cpu@600 {
>   			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>   			assigned-clock-rates = <816000000>;
> +			operating-points-v2 = <&cluster2_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <65536>;
>   			i-cache-line-size = <64>;
> @@ -234,6 +241,7 @@ cpu_b3: cpu@700 {
>   			enable-method = "psci";
>   			capacity-dmips-mhz = <1024>;
>   			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
> +			operating-points-v2 = <&cluster2_opp_table>;
>   			cpu-idle-states = <&CPU_SLEEP>;
>   			i-cache-size = <65536>;
>   			i-cache-line-size = <64>;
> @@ -348,6 +356,207 @@ l3_cache: l3-cache {
>   		};
>   	};
>   
> +	cluster0_opp_table: opp-table-cluster0 {
> +		compatible = "operating-points-v2";
> +		opp-shared;
> +
> +		opp-408000000 {
> +			opp-hz = /bits/ 64 <408000000>;
> +			opp-microvolt = <675000 675000 950000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-600000000 {
> +			opp-hz = /bits/ 64 <600000000>;
> +			opp-microvolt = <675000 675000 950000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-816000000 {
> +			opp-hz = /bits/ 64 <816000000>;
> +			opp-microvolt = <675000 675000 950000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1008000000 {
> +			opp-hz = /bits/ 64 <1008000000>;
> +			opp-microvolt = <675000 675000 950000>;
> +			clock-latency-ns = <40000>;
> +		};

It is not useful to introduce OPP with the same voltage. There is no 
gain in terms of energy efficiency as the compute capacity is linearly 
tied with power consumption (P=CxFxV²) in this case.

For example, opp-408 consumes 2 bogoWatts and opp-816 consumes 4 
bogoWatts (because of the same voltage).

For a workload, opp-408 takes 10 sec and opp-816 takes 5 sec because it 
is twice faster.

The energy consumption is:

opp-408 = 10 x 2 = 20 BogoJoules
opp-816 = 5 x 4 = 20 BogoJoules


> +		opp-1200000000 {
> +			opp-hz = /bits/ 64 <1200000000>;
> +			opp-microvolt = <712500 712500 950000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1416000000 {
> +			opp-hz = /bits/ 64 <1416000000>;
> +			opp-microvolt = <762500 762500 950000>;
> +			clock-latency-ns = <40000>;
> +			opp-suspend;
> +		};
> +		opp-1608000000 {
> +			opp-hz = /bits/ 64 <1608000000>;
> +			opp-microvolt = <850000 850000 950000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1800000000 {
> +			opp-hz = /bits/ 64 <1800000000>;
> +			opp-microvolt = <950000 950000 950000>;
> +			clock-latency-ns = <40000>;
> +		};
> +	};
> +
> +	cluster1_opp_table: opp-table-cluster1 {
> +		compatible = "operating-points-v2";
> +		opp-shared;
> +
> +		opp-408000000 {
> +			opp-hz = /bits/ 64 <408000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +			opp-suspend;
> +		};
> +		opp-600000000 {
> +			opp-hz = /bits/ 64 <600000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-816000000 {
> +			opp-hz = /bits/ 64 <816000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1008000000 {
> +			opp-hz = /bits/ 64 <1008000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};

same comment

> +		opp-1200000000 {
> +			opp-hz = /bits/ 64 <1200000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1416000000 {
> +			opp-hz = /bits/ 64 <1416000000>;
> +			opp-microvolt = <725000 725000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1608000000 {
> +			opp-hz = /bits/ 64 <1608000000>;
> +			opp-microvolt = <762500 762500 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1800000000 {
> +			opp-hz = /bits/ 64 <1800000000>;
> +			opp-microvolt = <850000 850000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2016000000 {
> +			opp-hz = /bits/ 64 <2016000000>;
> +			opp-microvolt = <925000 925000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2208000000 {
> +			opp-hz = /bits/ 64 <2208000000>;
> +			opp-microvolt = <987500 987500 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2256000000 {
> +			opp-hz = /bits/ 64 <2256000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2304000000 {
> +			opp-hz = /bits/ 64 <2304000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2352000000 {
> +			opp-hz = /bits/ 64 <2352000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2400000000 {
> +			opp-hz = /bits/ 64 <2400000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};

Same comment

> +	};
> +
> +	cluster2_opp_table: opp-table-cluster2 {
> +		compatible = "operating-points-v2";
> +		opp-shared;
> +
> +		opp-408000000 {
> +			opp-hz = /bits/ 64 <408000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +			opp-suspend;
> +		};
> +		opp-600000000 {
> +			opp-hz = /bits/ 64 <600000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-816000000 {
> +			opp-hz = /bits/ 64 <816000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1008000000 {
> +			opp-hz = /bits/ 64 <1008000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1200000000 {
> +			opp-hz = /bits/ 64 <1200000000>;
> +			opp-microvolt = <675000 675000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1416000000 {
> +			opp-hz = /bits/ 64 <1416000000>;
> +			opp-microvolt = <725000 725000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1608000000 {
> +			opp-hz = /bits/ 64 <1608000000>;
> +			opp-microvolt = <762500 762500 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-1800000000 {
> +			opp-hz = /bits/ 64 <1800000000>;
> +			opp-microvolt = <850000 850000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2016000000 {
> +			opp-hz = /bits/ 64 <2016000000>;
> +			opp-microvolt = <925000 925000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2208000000 {
> +			opp-hz = /bits/ 64 <2208000000>;
> +			opp-microvolt = <987500 987500 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2256000000 {
> +			opp-hz = /bits/ 64 <2256000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2304000000 {
> +			opp-hz = /bits/ 64 <2304000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2352000000 {
> +			opp-hz = /bits/ 64 <2352000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};
> +		opp-2400000000 {
> +			opp-hz = /bits/ 64 <2400000000>;
> +			opp-microvolt = <1000000 1000000 1000000>;
> +			clock-latency-ns = <40000>;
> +		};

Same comment

> +	};
> +
>   	firmware {
>   		optee: optee {
>   			compatible = "linaro,optee-tz";
>
Alexey Charkov Jan. 25, 2024, 10:17 a.m. UTC | #2
On Thu, Jan 25, 2024 at 1:30 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
>
> Hi Alexey,
>
> Adding Viresh
>
> On 24/01/2024 21:30, Alexey Charkov wrote:
> > By default the CPUs on RK3588 start up in a conservative performance
> > mode. Add frequency and voltage mappings to the device tree to enable
> > dynamic scaling via cpufreq
> >
> > Signed-off-by: Alexey Charkov <alchark@gmail.com>
> > ---
> >   arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 209 ++++++++++++++++++++++++++++++
> >   1 file changed, 209 insertions(+)
> >
> > diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > index 131b9eb21398..e605be531a0f 100644
> > --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> > @@ -97,6 +97,7 @@ cpu_l0: cpu@0 {
> >                       clocks = <&scmi_clk SCMI_CLK_CPUL>;
> >                       assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>;
> >                       assigned-clock-rates = <816000000>;
> > +                     operating-points-v2 = <&cluster0_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <32768>;
> >                       i-cache-line-size = <64>;
> > @@ -116,6 +117,7 @@ cpu_l1: cpu@100 {
> >                       enable-method = "psci";
> >                       capacity-dmips-mhz = <530>;
> >                       clocks = <&scmi_clk SCMI_CLK_CPUL>;
> > +                     operating-points-v2 = <&cluster0_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <32768>;
> >                       i-cache-line-size = <64>;
> > @@ -135,6 +137,7 @@ cpu_l2: cpu@200 {
> >                       enable-method = "psci";
> >                       capacity-dmips-mhz = <530>;
> >                       clocks = <&scmi_clk SCMI_CLK_CPUL>;
> > +                     operating-points-v2 = <&cluster0_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <32768>;
> >                       i-cache-line-size = <64>;
> > @@ -154,6 +157,7 @@ cpu_l3: cpu@300 {
> >                       enable-method = "psci";
> >                       capacity-dmips-mhz = <530>;
> >                       clocks = <&scmi_clk SCMI_CLK_CPUL>;
> > +                     operating-points-v2 = <&cluster0_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <32768>;
> >                       i-cache-line-size = <64>;
> > @@ -175,6 +179,7 @@ cpu_b0: cpu@400 {
> >                       clocks = <&scmi_clk SCMI_CLK_CPUB01>;
> >                       assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>;
> >                       assigned-clock-rates = <816000000>;
> > +                     operating-points-v2 = <&cluster1_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <65536>;
> >                       i-cache-line-size = <64>;
> > @@ -194,6 +199,7 @@ cpu_b1: cpu@500 {
> >                       enable-method = "psci";
> >                       capacity-dmips-mhz = <1024>;
> >                       clocks = <&scmi_clk SCMI_CLK_CPUB01>;
> > +                     operating-points-v2 = <&cluster1_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <65536>;
> >                       i-cache-line-size = <64>;
> > @@ -215,6 +221,7 @@ cpu_b2: cpu@600 {
> >                       clocks = <&scmi_clk SCMI_CLK_CPUB23>;
> >                       assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>;
> >                       assigned-clock-rates = <816000000>;
> > +                     operating-points-v2 = <&cluster2_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <65536>;
> >                       i-cache-line-size = <64>;
> > @@ -234,6 +241,7 @@ cpu_b3: cpu@700 {
> >                       enable-method = "psci";
> >                       capacity-dmips-mhz = <1024>;
> >                       clocks = <&scmi_clk SCMI_CLK_CPUB23>;
> > +                     operating-points-v2 = <&cluster2_opp_table>;
> >                       cpu-idle-states = <&CPU_SLEEP>;
> >                       i-cache-size = <65536>;
> >                       i-cache-line-size = <64>;
> > @@ -348,6 +356,207 @@ l3_cache: l3-cache {
> >               };
> >       };
> >
> > +     cluster0_opp_table: opp-table-cluster0 {
> > +             compatible = "operating-points-v2";
> > +             opp-shared;
> > +
> > +             opp-408000000 {
> > +                     opp-hz = /bits/ 64 <408000000>;
> > +                     opp-microvolt = <675000 675000 950000>;
> > +                     clock-latency-ns = <40000>;
> > +             };
> > +             opp-600000000 {
> > +                     opp-hz = /bits/ 64 <600000000>;
> > +                     opp-microvolt = <675000 675000 950000>;
> > +                     clock-latency-ns = <40000>;
> > +             };
> > +             opp-816000000 {
> > +                     opp-hz = /bits/ 64 <816000000>;
> > +                     opp-microvolt = <675000 675000 950000>;
> > +                     clock-latency-ns = <40000>;
> > +             };
> > +             opp-1008000000 {
> > +                     opp-hz = /bits/ 64 <1008000000>;
> > +                     opp-microvolt = <675000 675000 950000>;
> > +                     clock-latency-ns = <40000>;
> > +             };
>
> It is not useful to introduce OPP with the same voltage. There is no
> gain in terms of energy efficiency as the compute capacity is linearly
> tied with power consumption (P=CxFxV²) in this case.
>
> For example, opp-408 consumes 2 bogoWatts and opp-816 consumes 4
> bogoWatts (because of the same voltage).
>
> For a workload, opp-408 takes 10 sec and opp-816 takes 5 sec because it
> is twice faster.
>
> The energy consumption is:
>
> opp-408 = 10 x 2 = 20 BogoJoules
> opp-816 = 5 x 4 = 20 BogoJoules

I see, thank you. Will drop all "lower frequency - same voltage"
instances and resubmit in the next iteration.

Best regards,
Alexey
Dragan Simic Jan. 26, 2024, 6:32 a.m. UTC | #3
Hello Daniel,

On 2024-01-25 10:30, Daniel Lezcano wrote:
> On 24/01/2024 21:30, Alexey Charkov wrote:
>> By default the CPUs on RK3588 start up in a conservative performance
>> mode. Add frequency and voltage mappings to the device tree to enable
>> dynamic scaling via cpufreq
>> 
>> Signed-off-by: Alexey Charkov <alchark@gmail.com>
>> ---
>>   arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 209 
>> ++++++++++++++++++++++++++++++
>>   1 file changed, 209 insertions(+)
>> 
>> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi 
>> b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> index 131b9eb21398..e605be531a0f 100644
>> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
>> @@ -97,6 +97,7 @@ cpu_l0: cpu@0 {
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>;
>>   			assigned-clock-rates = <816000000>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -116,6 +117,7 @@ cpu_l1: cpu@100 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <530>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -135,6 +137,7 @@ cpu_l2: cpu@200 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <530>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -154,6 +157,7 @@ cpu_l3: cpu@300 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <530>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUL>;
>> +			operating-points-v2 = <&cluster0_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <32768>;
>>   			i-cache-line-size = <64>;
>> @@ -175,6 +179,7 @@ cpu_b0: cpu@400 {
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>>   			assigned-clock-rates = <816000000>;
>> +			operating-points-v2 = <&cluster1_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -194,6 +199,7 @@ cpu_b1: cpu@500 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <1024>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
>> +			operating-points-v2 = <&cluster1_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -215,6 +221,7 @@ cpu_b2: cpu@600 {
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>>   			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>>   			assigned-clock-rates = <816000000>;
>> +			operating-points-v2 = <&cluster2_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -234,6 +241,7 @@ cpu_b3: cpu@700 {
>>   			enable-method = "psci";
>>   			capacity-dmips-mhz = <1024>;
>>   			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
>> +			operating-points-v2 = <&cluster2_opp_table>;
>>   			cpu-idle-states = <&CPU_SLEEP>;
>>   			i-cache-size = <65536>;
>>   			i-cache-line-size = <64>;
>> @@ -348,6 +356,207 @@ l3_cache: l3-cache {
>>   		};
>>   	};
>>   +	cluster0_opp_table: opp-table-cluster0 {
>> +		compatible = "operating-points-v2";
>> +		opp-shared;
>> +
>> +		opp-408000000 {
>> +			opp-hz = /bits/ 64 <408000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-600000000 {
>> +			opp-hz = /bits/ 64 <600000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-816000000 {
>> +			opp-hz = /bits/ 64 <816000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1008000000 {
>> +			opp-hz = /bits/ 64 <1008000000>;
>> +			opp-microvolt = <675000 675000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> It is not useful to introduce OPP with the same voltage. There is no
> gain in terms of energy efficiency as the compute capacity is linearly
> tied with power consumption (P=CxFxV²) in this case.
> 
> For example, opp-408 consumes 2 bogoWatts and opp-816 consumes 4
> bogoWatts (because of the same voltage).
> 
> For a workload, opp-408 takes 10 sec and opp-816 takes 5 sec because
> it is twice faster.
> 
> The energy consumption is:
> 
> opp-408 = 10 x 2 = 20 BogoJoules
> opp-816 = 5 x 4 = 20 BogoJoules

I'd respectfully disagree that including multiple OPPs with the same 
voltage
but different frequencies isn't useful.  Please allow me to explain.

See, the total amount of consumed energy is, in general, the same for 
such
OPPs and the same CPU task(s), if we ignore the static leakage current 
and
such stuff, which isn't important here.  Though, the emphasis here is on
"total", i.e. without taking into account the actual amount of time 
required
for the exemplified CPU task(s) to complete.  If the total amount of 
time
is quite short, we aren't going to heat up the package and the board 
enough
to hit the CPU thermal throttling;  this approach is also sometimes 
referred
to as "race to idle", which is actually quite effective for 
battery-powered
mobile devices that tend to load their CPU cores in bursts, while 
remaining
kind of inactive for the remaining time.

However, if the CPU task(s) last long enough to actually saturate the 
thermal
capacities of the package and the board or the device, we're getting 
into the
CPU throttling territory, in which running the CPU cores slower, but 
still as
fast as possible, may actually be beneficial for the overall CPU 
performance.
By running the CPU cores slower, we're lowering the power and 
"spreading" the
total energy consumption over time, i.e. we're making some time to allow 
the
generated heat to dissipate into the surroundings.  As we know, having 
more
energy consumed by the SoC means more heat generated by the SoC, but the
resulting temperature of the SoC depends on how fast the energy is 
consumed,
which equals to how fast the CPUs run;  of course, all that is valid 
under
the reasonable assumption that the entire cooling setup, including the 
board
surroundings, remains unchanged all the time.

Having all that in mind, having a few OPPs with the same voltage but 
different
frequencies can actually help us achieve better CPU performance.  That 
way,
throttling won't have to slow the CPUs more than it's actually needed to 
hit
and maintain the desired thermal trip temperatures.

>> +		opp-1200000000 {
>> +			opp-hz = /bits/ 64 <1200000000>;
>> +			opp-microvolt = <712500 712500 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1416000000 {
>> +			opp-hz = /bits/ 64 <1416000000>;
>> +			opp-microvolt = <762500 762500 950000>;
>> +			clock-latency-ns = <40000>;
>> +			opp-suspend;
>> +		};
>> +		opp-1608000000 {
>> +			opp-hz = /bits/ 64 <1608000000>;
>> +			opp-microvolt = <850000 850000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1800000000 {
>> +			opp-hz = /bits/ 64 <1800000000>;
>> +			opp-microvolt = <950000 950000 950000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +	};
>> +
>> +	cluster1_opp_table: opp-table-cluster1 {
>> +		compatible = "operating-points-v2";
>> +		opp-shared;
>> +
>> +		opp-408000000 {
>> +			opp-hz = /bits/ 64 <408000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +			opp-suspend;
>> +		};
>> +		opp-600000000 {
>> +			opp-hz = /bits/ 64 <600000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-816000000 {
>> +			opp-hz = /bits/ 64 <816000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1008000000 {
>> +			opp-hz = /bits/ 64 <1008000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> same comment
> 
>> +		opp-1200000000 {
>> +			opp-hz = /bits/ 64 <1200000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1416000000 {
>> +			opp-hz = /bits/ 64 <1416000000>;
>> +			opp-microvolt = <725000 725000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1608000000 {
>> +			opp-hz = /bits/ 64 <1608000000>;
>> +			opp-microvolt = <762500 762500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1800000000 {
>> +			opp-hz = /bits/ 64 <1800000000>;
>> +			opp-microvolt = <850000 850000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2016000000 {
>> +			opp-hz = /bits/ 64 <2016000000>;
>> +			opp-microvolt = <925000 925000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2208000000 {
>> +			opp-hz = /bits/ 64 <2208000000>;
>> +			opp-microvolt = <987500 987500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2256000000 {
>> +			opp-hz = /bits/ 64 <2256000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2304000000 {
>> +			opp-hz = /bits/ 64 <2304000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2352000000 {
>> +			opp-hz = /bits/ 64 <2352000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2400000000 {
>> +			opp-hz = /bits/ 64 <2400000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> Same comment
> 
>> +	};
>> +
>> +	cluster2_opp_table: opp-table-cluster2 {
>> +		compatible = "operating-points-v2";
>> +		opp-shared;
>> +
>> +		opp-408000000 {
>> +			opp-hz = /bits/ 64 <408000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +			opp-suspend;
>> +		};
>> +		opp-600000000 {
>> +			opp-hz = /bits/ 64 <600000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-816000000 {
>> +			opp-hz = /bits/ 64 <816000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1008000000 {
>> +			opp-hz = /bits/ 64 <1008000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1200000000 {
>> +			opp-hz = /bits/ 64 <1200000000>;
>> +			opp-microvolt = <675000 675000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1416000000 {
>> +			opp-hz = /bits/ 64 <1416000000>;
>> +			opp-microvolt = <725000 725000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1608000000 {
>> +			opp-hz = /bits/ 64 <1608000000>;
>> +			opp-microvolt = <762500 762500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-1800000000 {
>> +			opp-hz = /bits/ 64 <1800000000>;
>> +			opp-microvolt = <850000 850000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2016000000 {
>> +			opp-hz = /bits/ 64 <2016000000>;
>> +			opp-microvolt = <925000 925000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2208000000 {
>> +			opp-hz = /bits/ 64 <2208000000>;
>> +			opp-microvolt = <987500 987500 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2256000000 {
>> +			opp-hz = /bits/ 64 <2256000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2304000000 {
>> +			opp-hz = /bits/ 64 <2304000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2352000000 {
>> +			opp-hz = /bits/ 64 <2352000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
>> +		opp-2400000000 {
>> +			opp-hz = /bits/ 64 <2400000000>;
>> +			opp-microvolt = <1000000 1000000 1000000>;
>> +			clock-latency-ns = <40000>;
>> +		};
> 
> Same comment
> 
>> +	};
>> +
>>   	firmware {
>>   		optee: optee {
>>   			compatible = "linaro,optee-tz";
>>
Alexey Charkov Jan. 26, 2024, 6:44 a.m. UTC | #4
On Fri, Jan 26, 2024 at 10:32 AM Dragan Simic <dsimic@manjaro.org> wrote:
>
> Hello Daniel,
>
> On 2024-01-25 10:30, Daniel Lezcano wrote:
> > On 24/01/2024 21:30, Alexey Charkov wrote:
> >> By default the CPUs on RK3588 start up in a conservative performance
> >> mode. Add frequency and voltage mappings to the device tree to enable
> >> dynamic scaling via cpufreq
> >>
> >> Signed-off-by: Alexey Charkov <alchark@gmail.com>
> >> ---
> >>   arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 209
> >> ++++++++++++++++++++++++++++++
> >>   1 file changed, 209 insertions(+)
> >>
> >> diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >> b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >> index 131b9eb21398..e605be531a0f 100644
> >> --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >> +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
> >> @@ -97,6 +97,7 @@ cpu_l0: cpu@0 {
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUL>;
> >>                      assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>;
> >>                      assigned-clock-rates = <816000000>;
> >> +                    operating-points-v2 = <&cluster0_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <32768>;
> >>                      i-cache-line-size = <64>;
> >> @@ -116,6 +117,7 @@ cpu_l1: cpu@100 {
> >>                      enable-method = "psci";
> >>                      capacity-dmips-mhz = <530>;
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUL>;
> >> +                    operating-points-v2 = <&cluster0_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <32768>;
> >>                      i-cache-line-size = <64>;
> >> @@ -135,6 +137,7 @@ cpu_l2: cpu@200 {
> >>                      enable-method = "psci";
> >>                      capacity-dmips-mhz = <530>;
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUL>;
> >> +                    operating-points-v2 = <&cluster0_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <32768>;
> >>                      i-cache-line-size = <64>;
> >> @@ -154,6 +157,7 @@ cpu_l3: cpu@300 {
> >>                      enable-method = "psci";
> >>                      capacity-dmips-mhz = <530>;
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUL>;
> >> +                    operating-points-v2 = <&cluster0_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <32768>;
> >>                      i-cache-line-size = <64>;
> >> @@ -175,6 +179,7 @@ cpu_b0: cpu@400 {
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUB01>;
> >>                      assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>;
> >>                      assigned-clock-rates = <816000000>;
> >> +                    operating-points-v2 = <&cluster1_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <65536>;
> >>                      i-cache-line-size = <64>;
> >> @@ -194,6 +199,7 @@ cpu_b1: cpu@500 {
> >>                      enable-method = "psci";
> >>                      capacity-dmips-mhz = <1024>;
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUB01>;
> >> +                    operating-points-v2 = <&cluster1_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <65536>;
> >>                      i-cache-line-size = <64>;
> >> @@ -215,6 +221,7 @@ cpu_b2: cpu@600 {
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUB23>;
> >>                      assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>;
> >>                      assigned-clock-rates = <816000000>;
> >> +                    operating-points-v2 = <&cluster2_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <65536>;
> >>                      i-cache-line-size = <64>;
> >> @@ -234,6 +241,7 @@ cpu_b3: cpu@700 {
> >>                      enable-method = "psci";
> >>                      capacity-dmips-mhz = <1024>;
> >>                      clocks = <&scmi_clk SCMI_CLK_CPUB23>;
> >> +                    operating-points-v2 = <&cluster2_opp_table>;
> >>                      cpu-idle-states = <&CPU_SLEEP>;
> >>                      i-cache-size = <65536>;
> >>                      i-cache-line-size = <64>;
> >> @@ -348,6 +356,207 @@ l3_cache: l3-cache {
> >>              };
> >>      };
> >>   +  cluster0_opp_table: opp-table-cluster0 {
> >> +            compatible = "operating-points-v2";
> >> +            opp-shared;
> >> +
> >> +            opp-408000000 {
> >> +                    opp-hz = /bits/ 64 <408000000>;
> >> +                    opp-microvolt = <675000 675000 950000>;
> >> +                    clock-latency-ns = <40000>;
> >> +            };
> >> +            opp-600000000 {
> >> +                    opp-hz = /bits/ 64 <600000000>;
> >> +                    opp-microvolt = <675000 675000 950000>;
> >> +                    clock-latency-ns = <40000>;
> >> +            };
> >> +            opp-816000000 {
> >> +                    opp-hz = /bits/ 64 <816000000>;
> >> +                    opp-microvolt = <675000 675000 950000>;
> >> +                    clock-latency-ns = <40000>;
> >> +            };
> >> +            opp-1008000000 {
> >> +                    opp-hz = /bits/ 64 <1008000000>;
> >> +                    opp-microvolt = <675000 675000 950000>;
> >> +                    clock-latency-ns = <40000>;
> >> +            };
> >
> > It is not useful to introduce OPP with the same voltage. There is no
> > gain in terms of energy efficiency as the compute capacity is linearly
> > tied with power consumption (P=CxFxV²) in this case.
> >
> > For example, opp-408 consumes 2 bogoWatts and opp-816 consumes 4
> > bogoWatts (because of the same voltage).
> >
> > For a workload, opp-408 takes 10 sec and opp-816 takes 5 sec because
> > it is twice faster.
> >
> > The energy consumption is:
> >
> > opp-408 = 10 x 2 = 20 BogoJoules
> > opp-816 = 5 x 4 = 20 BogoJoules
>
> I'd respectfully disagree that including multiple OPPs with the same
> voltage
> but different frequencies isn't useful.  Please allow me to explain.
>
> See, the total amount of consumed energy is, in general, the same for
> such
> OPPs and the same CPU task(s), if we ignore the static leakage current
> and
> such stuff, which isn't important here.  Though, the emphasis here is on
> "total", i.e. without taking into account the actual amount of time
> required
> for the exemplified CPU task(s) to complete.  If the total amount of
> time
> is quite short, we aren't going to heat up the package and the board
> enough
> to hit the CPU thermal throttling;  this approach is also sometimes
> referred
> to as "race to idle", which is actually quite effective for
> battery-powered
> mobile devices that tend to load their CPU cores in bursts, while
> remaining
> kind of inactive for the remaining time.
>
> However, if the CPU task(s) last long enough to actually saturate the
> thermal
> capacities of the package and the board or the device, we're getting
> into the
> CPU throttling territory, in which running the CPU cores slower, but
> still as
> fast as possible, may actually be beneficial for the overall CPU
> performance.
> By running the CPU cores slower, we're lowering the power and
> "spreading" the
> total energy consumption over time, i.e. we're making some time to allow
> the
> generated heat to dissipate into the surroundings.  As we know, having
> more
> energy consumed by the SoC means more heat generated by the SoC, but the
> resulting temperature of the SoC depends on how fast the energy is
> consumed,
> which equals to how fast the CPUs run;  of course, all that is valid
> under
> the reasonable assumption that the entire cooling setup, including the
> board
> surroundings, remains unchanged all the time.

On the other hand, convective heat dissipation is approximately
proportional to the temperature differential, therefore heating up the
core to a higher temperature over a shorter period of time would let
it dissipate the same joule amount faster. Given that total joules
generated for a particular load are approximately the same for
different frequencies as long as voltage remains the same (as Daniel
pointed out), higher frequency seems to lead to better heat transfer
to the environment for the same load. And also the task completes
sooner, which is probably always good, ceteris paribus.

Not sure how that all changes when throttling enters the game though :)

Best regards,
Alexey
Daniel Lezcano Jan. 26, 2024, 12:56 p.m. UTC | #5
On 26/01/2024 08:49, Dragan Simic wrote:
> On 2024-01-26 08:30, Alexey Charkov wrote:
>> On Fri, Jan 26, 2024 at 11:05 AM Dragan Simic <dsimic@manjaro.org> wrote:
>>> On 2024-01-26 07:44, Alexey Charkov wrote:
>>> > On Fri, Jan 26, 2024 at 10:32 AM Dragan Simic <dsimic@manjaro.org>
>>> > wrote:
>>> >> On 2024-01-25 10:30, Daniel Lezcano wrote:
>>> >> > On 24/01/2024 21:30, Alexey Charkov wrote:
>>> >> >> By default the CPUs on RK3588 start up in a conservative 
>>> performance
>>> >> >> mode. Add frequency and voltage mappings to the device tree to 
>>> enable

[ ... ]

>> Throttling would also lower the voltage at some point, which cools it
>> down much faster!
> 
> Of course, but the key is not to cool (and slow down) the CPU cores too
> much, but just enough to stay within the available thermal envelope,
> which is where the same-voltage, lower-frequency OPPs should shine.

That implies the resulting power is sustainable which I doubt it is the 
case.

The voltage scaling makes the cooling effect efficient not the frequency.

For example:
	opp5 = opp(2GHz, 1V) => 2 BogoWatt
	opp4 = opp(1.9GHz, 1V) => 1.9 BogoWatt
	opp3 = opp(1.8GHz, 0.9V) => 1.458 BogoWatt
	[ other states but we focus on these 3 ]

opp5->opp4 => -5% compute capacity, -5% power, ratio=1
opp4->opp3 => -5% compute capacity, -23.1% power, ratio=21,6

opp5->opp3 => -10% compute capacity, -27.1% power, ratio=36.9

In burst operation (no thermal throttling), opp4 is pointless we agree 
on that.

IMO the following will happen: in burst operation with thermal 
throttling we hit the trip point and then the step wise governor reduces 
opp5 -> opp4. We have slight power reduction but the temperature does 
not decrease, so at the next iteration, it is throttle at opp3. And at 
the end we have opp4 <-> opp3 back and forth instead of opp5 <-> opp3.

It is probable we end up with an equivalent frequency average (or 
compute capacity avg).

opp4 <-> opp3 (longer duration in states, less transitions)
opp5 <-> opp3 (shorter duration in states, more transitions)

Some platforms had their higher OPPs with the same voltage and they 
failed to cool down the CPU in the long run.

Anyway, there is only one way to check it out :)

Alexey, is it possible to compare the compute duration for 'dhrystone' 
with these voltage OPP and without ? (with a period of cool down between 
the test in order to start at the same thermal condition) ?



> When the CPU load isn't bursty but steady and high, we don't race to
> idle, but run a marathon instead, so to speak. :)
Alexey Charkov Jan. 26, 2024, 1:44 p.m. UTC | #6
On Fri, Jan 26, 2024 at 4:56 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 26/01/2024 08:49, Dragan Simic wrote:
> > On 2024-01-26 08:30, Alexey Charkov wrote:
> >> On Fri, Jan 26, 2024 at 11:05 AM Dragan Simic <dsimic@manjaro.org> wrote:
> >>> On 2024-01-26 07:44, Alexey Charkov wrote:
> >>> > On Fri, Jan 26, 2024 at 10:32 AM Dragan Simic <dsimic@manjaro.org>
> >>> > wrote:
> >>> >> On 2024-01-25 10:30, Daniel Lezcano wrote:
> >>> >> > On 24/01/2024 21:30, Alexey Charkov wrote:
> >>> >> >> By default the CPUs on RK3588 start up in a conservative
> >>> performance
> >>> >> >> mode. Add frequency and voltage mappings to the device tree to
> >>> enable
>
> [ ... ]
>
> >> Throttling would also lower the voltage at some point, which cools it
> >> down much faster!
> >
> > Of course, but the key is not to cool (and slow down) the CPU cores too
> > much, but just enough to stay within the available thermal envelope,
> > which is where the same-voltage, lower-frequency OPPs should shine.
>
> That implies the resulting power is sustainable which I doubt it is the
> case.
>
> The voltage scaling makes the cooling effect efficient not the frequency.
>
> For example:
>         opp5 = opp(2GHz, 1V) => 2 BogoWatt
>         opp4 = opp(1.9GHz, 1V) => 1.9 BogoWatt
>         opp3 = opp(1.8GHz, 0.9V) => 1.458 BogoWatt
>         [ other states but we focus on these 3 ]
>
> opp5->opp4 => -5% compute capacity, -5% power, ratio=1
> opp4->opp3 => -5% compute capacity, -23.1% power, ratio=21,6
>
> opp5->opp3 => -10% compute capacity, -27.1% power, ratio=36.9
>
> In burst operation (no thermal throttling), opp4 is pointless we agree
> on that.
>
> IMO the following will happen: in burst operation with thermal
> throttling we hit the trip point and then the step wise governor reduces
> opp5 -> opp4. We have slight power reduction but the temperature does
> not decrease, so at the next iteration, it is throttle at opp3. And at
> the end we have opp4 <-> opp3 back and forth instead of opp5 <-> opp3.
>
> It is probable we end up with an equivalent frequency average (or
> compute capacity avg).
>
> opp4 <-> opp3 (longer duration in states, less transitions)
> opp5 <-> opp3 (shorter duration in states, more transitions)
>
> Some platforms had their higher OPPs with the same voltage and they
> failed to cool down the CPU in the long run.
>
> Anyway, there is only one way to check it out :)
>
> Alexey, is it possible to compare the compute duration for 'dhrystone'
> with these voltage OPP and without ? (with a period of cool down between
> the test in order to start at the same thermal condition) ?

Sure, let me try that - would be interesting to see the results. In my
previous tinkering there were cases when the system stayed at 2.35GHz
for all big cores for non-trivial time (using the step-wise thermal
governor), and that's an example of "same voltage, lower frequency".
Other times though it throttled one cluster down to 1.8GHz and kept
the other at 2.4GHz, and was also stationary at those parameters for
extended time. This probably indicates that both of those states use
sustainable power in my cooling setup.

Note though that I still have that tiny heatsink installed (even
though I disable the fan during tests), and in this setup the
temperature drops from 85C to around 70C in a matter of seconds as
soon as the load stops. And if I enable the fan then it balances the
temperature at the control setpoint of 55C using less than full fan
speed with 8 threads of dhrystone running for extended time (and the
PWM value chosen by the step-wise governor stabilizes at 240 out of
255). Looks like my prior assessment that "the fan is not super mighty
vs. the total thermal output" was wrong after all, despite its modest
size :)

Best regards,
Alexey
Dragan Simic Jan. 26, 2024, 8:04 p.m. UTC | #7
On 2024-01-26 13:56, Daniel Lezcano wrote:
> On 26/01/2024 08:49, Dragan Simic wrote:
>> On 2024-01-26 08:30, Alexey Charkov wrote:
>>> On Fri, Jan 26, 2024 at 11:05 AM Dragan Simic <dsimic@manjaro.org> 
>>> wrote:
>>>> On 2024-01-26 07:44, Alexey Charkov wrote:
>>>> > On Fri, Jan 26, 2024 at 10:32 AM Dragan Simic <dsimic@manjaro.org>
>>>> > wrote:
>>>> >> On 2024-01-25 10:30, Daniel Lezcano wrote:
>>>> >> > On 24/01/2024 21:30, Alexey Charkov wrote:
>>>> >> >> By default the CPUs on RK3588 start up in a conservative performance
>>>> >> >> mode. Add frequency and voltage mappings to the device tree to enable
> 
> [ ... ]
> 
>>> Throttling would also lower the voltage at some point, which cools it
>>> down much faster!
>> 
>> Of course, but the key is not to cool (and slow down) the CPU cores 
>> too
>> much, but just enough to stay within the available thermal envelope,
>> which is where the same-voltage, lower-frequency OPPs should shine.
> 
> That implies the resulting power is sustainable which I doubt it is the 
> case.

Hmm, why wouldn't it be sustainable?  Would you elaborate a bit, please?
I mean, there are so many factors that can't be known for sure in 
advance,
so providing additional CPU throttling granularity can only be helpful.

> The voltage scaling makes the cooling effect efficient not the 
> frequency.
> 
> For example:
> 	opp5 = opp(2GHz, 1V) => 2 BogoWatt
> 	opp4 = opp(1.9GHz, 1V) => 1.9 BogoWatt
> 	opp3 = opp(1.8GHz, 0.9V) => 1.458 BogoWatt
> 	[ other states but we focus on these 3 ]
> 
> opp5->opp4 => -5% compute capacity, -5% power, ratio=1
> opp4->opp3 => -5% compute capacity, -23.1% power, ratio=21,6
> 
> opp5->opp3 => -10% compute capacity, -27.1% power, ratio=36.9
> 
> In burst operation (no thermal throttling), opp4 is pointless we agree 
> on that.

Well, if there's no thermal throtting at all, the opp3 is also not
needed.  In an unlikely scenario like that, the opp5 is all we need.

> IMO the following will happen: in burst operation with thermal
> throttling we hit the trip point and then the step wise governor
> reduces opp5 -> opp4. We have slight power reduction but the
> temperature does not decrease, so at the next iteration, it is
> throttle at opp3. And at the end we have opp4 <-> opp3 back and forth
> instead of opp5 <-> opp3.

Why should the temperature not decrease when switching from the opp5
to the opp4?  See, we can't assume or know in advance that reducing
the power consumption by 5% wouldn't do anything;  5% is actually
quite a lot.  If that would do absolutely nothing, then something
else would probably be wrong or not as expected.

Also, for some workloads it might be better to have rather frequent
transitions between the opp4 and the opp3, instead of staying at the
opp3 for longer priods of time.  Running 100 MHz faster can be quite
significant, especially on two CPU cores.

> It is probable we end up with an equivalent frequency average (or
> compute capacity avg).
> 
> opp4 <-> opp3 (longer duration in states, less transitions)
> opp5 <-> opp3 (shorter duration in states, more transitions)
> 
> Some platforms had their higher OPPs with the same voltage and they
> failed to cool down the CPU in the long run.
> 
> Anyway, there is only one way to check it out :)
> 
> Alexey, is it possible to compare the compute duration for 'dhrystone'
> with these voltage OPP and without ? (with a period of cool down
> between the test in order to start at the same thermal condition) ?

I agree that testing and recording as much data as possible is the best
approach.  However, quite frankly, we should run more different tests,
not only one synthetic test.
Alexey Charkov Jan. 27, 2024, 7:41 p.m. UTC | #8
On Sat, Jan 27, 2024 at 12:33 AM Dragan Simic <dsimic@manjaro.org> wrote:
>
> On 2024-01-26 14:44, Alexey Charkov wrote:
> > On Fri, Jan 26, 2024 at 4:56 PM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> >> On 26/01/2024 08:49, Dragan Simic wrote:
> >> > On 2024-01-26 08:30, Alexey Charkov wrote:
> >> >> On Fri, Jan 26, 2024 at 11:05 AM Dragan Simic <dsimic@manjaro.org> wrote:
> >> >>> On 2024-01-26 07:44, Alexey Charkov wrote:
> >> >>> > On Fri, Jan 26, 2024 at 10:32 AM Dragan Simic <dsimic@manjaro.org>
> >> >>> > wrote:
> >> >>> >> On 2024-01-25 10:30, Daniel Lezcano wrote:
> >> >>> >> > On 24/01/2024 21:30, Alexey Charkov wrote:
> >> >>> >> >> By default the CPUs on RK3588 start up in a conservative
> >> >>> performance
> >> >>> >> >> mode. Add frequency and voltage mappings to the device tree to
> >> >>> enable
> >>
> >> [ ... ]
> >>
> >> >> Throttling would also lower the voltage at some point, which cools it
> >> >> down much faster!
> >> >
> >> > Of course, but the key is not to cool (and slow down) the CPU cores too
> >> > much, but just enough to stay within the available thermal envelope,
> >> > which is where the same-voltage, lower-frequency OPPs should shine.
> >>
> >> That implies the resulting power is sustainable which I doubt it is
> >> the
> >> case.
> >>
> >> The voltage scaling makes the cooling effect efficient not the
> >> frequency.
> >>
> >> For example:
> >>         opp5 = opp(2GHz, 1V) => 2 BogoWatt
> >>         opp4 = opp(1.9GHz, 1V) => 1.9 BogoWatt
> >>         opp3 = opp(1.8GHz, 0.9V) => 1.458 BogoWatt
> >>         [ other states but we focus on these 3 ]
> >>
> >> opp5->opp4 => -5% compute capacity, -5% power, ratio=1
> >> opp4->opp3 => -5% compute capacity, -23.1% power, ratio=21,6
> >>
> >> opp5->opp3 => -10% compute capacity, -27.1% power, ratio=36.9
> >>
> >> In burst operation (no thermal throttling), opp4 is pointless we agree
> >> on that.
> >>
> >> IMO the following will happen: in burst operation with thermal
> >> throttling we hit the trip point and then the step wise governor
> >> reduces
> >> opp5 -> opp4. We have slight power reduction but the temperature does
> >> not decrease, so at the next iteration, it is throttle at opp3. And at
> >> the end we have opp4 <-> opp3 back and forth instead of opp5 <-> opp3.
> >>
> >> It is probable we end up with an equivalent frequency average (or
> >> compute capacity avg).
> >>
> >> opp4 <-> opp3 (longer duration in states, less transitions)
> >> opp5 <-> opp3 (shorter duration in states, more transitions)
> >>
> >> Some platforms had their higher OPPs with the same voltage and they
> >> failed to cool down the CPU in the long run.
> >>
> >> Anyway, there is only one way to check it out :)
> >>
> >> Alexey, is it possible to compare the compute duration for 'dhrystone'
> >> with these voltage OPP and without ? (with a period of cool down
> >> between
> >> the test in order to start at the same thermal condition) ?
> >
> > Sure, let me try that - would be interesting to see the results. In my
> > previous tinkering there were cases when the system stayed at 2.35GHz
> > for all big cores for non-trivial time (using the step-wise thermal
> > governor), and that's an example of "same voltage, lower frequency".
> > Other times though it throttled one cluster down to 1.8GHz and kept
> > the other at 2.4GHz, and was also stationary at those parameters for
> > extended time. This probably indicates that both of those states use
> > sustainable power in my cooling setup.
>
> IMHO, there are simply too many factors at play, including different
> possible cooling setups, so providing additional CPU throttling
> granularity can only be helpful.  Of course, testing and recording
> data is the way to move forward, but I think we should use a few
> different tests.

Soooo, benchmarking these turned out a bit trickier than I had hoped
for. Apparently, dhrystone uses an unsigned int rather than an
unsigned long for the loops count (or something of that sort), which
means that I can't get it to run enough loops to heat up my chip from
a stable idle state to the throttling state (due to counter
wraparound). So I ended up with a couple of crutches, namely:
 - run dhrystone continuously on 6 out of 8 cores to make the chip
warm enough (`taskset -c 0-5 ./dhrystone -t 6 -r 6000` - note that on
my machine cores 6-7 are usually the first ones to get throttled, due
to whatever thermal peculiarities)
 - wait for the temperature to stabilize (which happens at 79.5C)
 - then run timed dhrystone on the remaining 2 out of 6 cores (big
ones) to see how throttling with different OPP tables affects overall
performance.

In the end, here's what I got with the 'original' OPP table (including
"same voltage - different frequencies" states):
alchark@rock-5b ~ $ taskset -c 6-7 ./dhrystone -t 2 -l 4000000000
duration: 0 seconds
number of threads: 2
number of loops: 4000000000000000
delay between starting threads: 0 seconds

Dhrystone(1.1) time for 1233977344 passes = 29.7
This machine benchmarks at 41481539 dhrystones/second
                           23609 DMIPS
Dhrystone(1.1) time for 1233977344 passes = 29.8
This machine benchmarks at 41476618 dhrystones/second
                           23606 DMIPS

Total dhrystone run time: 30.864492 seconds.

And here's what I got with the 'reduced' OPP table (keeping only the
highest frequency state for each voltage):
alchark@rock-5b ~ $ taskset -c 6-7 ./dhrystone -t 2 -l 4000000000
duration: 0 seconds
number of threads: 2
number of loops: 4000000000000000
delay between starting threads: 0 seconds

Dhrystone(1.1) time for 1233977344 passes = 30.9
This machine benchmarks at 39968549 dhrystones/second
                          22748 DMIPS
Dhrystone(1.1) time for 1233977344 passes = 31.0
This machine benchmarks at 39817431 dhrystones/second
                          22662 DMIPS

Total dhrystone run time: 31.995136 seconds.

Bottomline: removing the lower-frequency OPPs led to a 3.8% drop in
performance in this setup. This is probably far from a reliable
estimate, but I guess it indeed indicates that having lower-frequency
states might be beneficial in some load scenarios.

Note though that several seconds after hitting the throttling
threshold cores 6-7 were oscillating between 1.608GHz and 1.8GHz in
both runs, which implies that the whole difference in performance was
due to different speed of initial throttling (i.e. it might be a
peculiarity of the step-wise thermal governor operation when it has to
go through more cooling states to reach the "steady-state" one). Given
that both 1.608GHz and 1.8GHz have no lower-frequency same-voltage
siblings in either of the OPP tables, it implies that under prolonged
constant load there should be no performance difference at all.

Best regards,
Alexey
Alexey Charkov Jan. 28, 2024, 7:32 p.m. UTC | #9
On Sun, Jan 28, 2024 at 7:06 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
>
> Hi Alexey,

Hi Daniel,

> On 27/01/2024 20:41, Alexey Charkov wrote:
> > On Sat, Jan 27, 2024 at 12:33 AM Dragan Simic <dsimic@manjaro.org> wrote:
> >>
> >> On 2024-01-26 14:44, Alexey Charkov wrote:
> >>> On Fri, Jan 26, 2024 at 4:56 PM Daniel Lezcano
> >>> <daniel.lezcano@linaro.org> wrote:
> >>>> On 26/01/2024 08:49, Dragan Simic wrote:
> >>>>> On 2024-01-26 08:30, Alexey Charkov wrote:
> >>>>>> On Fri, Jan 26, 2024 at 11:05 AM Dragan Simic <dsimic@manjaro.org> wrote:
> >>>>>>> On 2024-01-26 07:44, Alexey Charkov wrote:
> >>>>>>>> On Fri, Jan 26, 2024 at 10:32 AM Dragan Simic <dsimic@manjaro.org>
> >>>>>>>> wrote:
> >>>>>>>>> On 2024-01-25 10:30, Daniel Lezcano wrote:
> >>>>>>>>>> On 24/01/2024 21:30, Alexey Charkov wrote:
> >>>>>>>>>>> By default the CPUs on RK3588 start up in a conservative
> >>>>>>> performance
> >>>>>>>>>>> mode. Add frequency and voltage mappings to the device tree to
> >>>>>>> enable
> >>>>
> >>>> [ ... ]
> >>>>
> >>>>>> Throttling would also lower the voltage at some point, which cools it
> >>>>>> down much faster!
> >>>>>
> >>>>> Of course, but the key is not to cool (and slow down) the CPU cores too
> >>>>> much, but just enough to stay within the available thermal envelope,
> >>>>> which is where the same-voltage, lower-frequency OPPs should shine.
> >>>>
> >>>> That implies the resulting power is sustainable which I doubt it is
> >>>> the
> >>>> case.
> >>>>
> >>>> The voltage scaling makes the cooling effect efficient not the
> >>>> frequency.
> >>>>
> >>>> For example:
> >>>>          opp5 = opp(2GHz, 1V) => 2 BogoWatt
> >>>>          opp4 = opp(1.9GHz, 1V) => 1.9 BogoWatt
> >>>>          opp3 = opp(1.8GHz, 0.9V) => 1.458 BogoWatt
> >>>>          [ other states but we focus on these 3 ]
> >>>>
> >>>> opp5->opp4 => -5% compute capacity, -5% power, ratio=1
> >>>> opp4->opp3 => -5% compute capacity, -23.1% power, ratio=21,6
> >>>>
> >>>> opp5->opp3 => -10% compute capacity, -27.1% power, ratio=36.9
> >>>>
> >>>> In burst operation (no thermal throttling), opp4 is pointless we agree
> >>>> on that.
> >>>>
> >>>> IMO the following will happen: in burst operation with thermal
> >>>> throttling we hit the trip point and then the step wise governor
> >>>> reduces
> >>>> opp5 -> opp4. We have slight power reduction but the temperature does
> >>>> not decrease, so at the next iteration, it is throttle at opp3. And at
> >>>> the end we have opp4 <-> opp3 back and forth instead of opp5 <-> opp3.
> >>>>
> >>>> It is probable we end up with an equivalent frequency average (or
> >>>> compute capacity avg).
> >>>>
> >>>> opp4 <-> opp3 (longer duration in states, less transitions)
> >>>> opp5 <-> opp3 (shorter duration in states, more transitions)
> >>>>
> >>>> Some platforms had their higher OPPs with the same voltage and they
> >>>> failed to cool down the CPU in the long run.
> >>>>
> >>>> Anyway, there is only one way to check it out :)
> >>>>
> >>>> Alexey, is it possible to compare the compute duration for 'dhrystone'
> >>>> with these voltage OPP and without ? (with a period of cool down
> >>>> between
> >>>> the test in order to start at the same thermal condition) ?
> >>>
> >>> Sure, let me try that - would be interesting to see the results. In my
> >>> previous tinkering there were cases when the system stayed at 2.35GHz
> >>> for all big cores for non-trivial time (using the step-wise thermal
> >>> governor), and that's an example of "same voltage, lower frequency".
> >>> Other times though it throttled one cluster down to 1.8GHz and kept
> >>> the other at 2.4GHz, and was also stationary at those parameters for
> >>> extended time. This probably indicates that both of those states use
> >>> sustainable power in my cooling setup.
> >>
> >> IMHO, there are simply too many factors at play, including different
> >> possible cooling setups, so providing additional CPU throttling
> >> granularity can only be helpful.  Of course, testing and recording
> >> data is the way to move forward, but I think we should use a few
> >> different tests.
> >
> > Soooo, benchmarking these turned out a bit trickier than I had hoped
> > for. Apparently, dhrystone uses an unsigned int rather than an
> > unsigned long for the loops count (or something of that sort), which
> > means that I can't get it to run enough loops to heat up my chip from
> > a stable idle state to the throttling state (due to counter
> > wraparound). So I ended up with a couple of crutches, namely:
> >   - run dhrystone continuously on 6 out of 8 cores to make the chip
> > warm enough (`taskset -c 0-5 ./dhrystone -t 6 -r 6000` - note that on
> > my machine cores 6-7 are usually the first ones to get throttled, due
> > to whatever thermal peculiarities)
> >   - wait for the temperature to stabilize (which happens at 79.5C)
> >   - then run timed dhrystone on the remaining 2 out of 6 cores (big
> > ones) to see how throttling with different OPP tables affects overall
> > performance.
>
> Thanks for taking the time to test.
>
> > In the end, here's what I got with the 'original' OPP table (including
> > "same voltage - different frequencies" states):
> > alchark@rock-5b ~ $ taskset -c 6-7 ./dhrystone -t 2 -l 4000000000
> > duration: 0 seconds
> > number of threads: 2
> > number of loops: 4000000000000000
> > delay between starting threads: 0 seconds
> >
> > Dhrystone(1.1) time for 1233977344 passes = 29.7
> > This machine benchmarks at 41481539 dhrystones/second
> >                             23609 DMIPS
> > Dhrystone(1.1) time for 1233977344 passes = 29.8
> > This machine benchmarks at 41476618 dhrystones/second
> >                             23606 DMIPS
> >
> > Total dhrystone run time: 30.864492 seconds.
> >
> > And here's what I got with the 'reduced' OPP table (keeping only the
> > highest frequency state for each voltage):
> > alchark@rock-5b ~ $ taskset -c 6-7 ./dhrystone -t 2 -l 4000000000
> > duration: 0 seconds
> > number of threads: 2
> > number of loops: 4000000000000000
> > delay between starting threads: 0 seconds
> >
> > Dhrystone(1.1) time for 1233977344 passes = 30.9
> > This machine benchmarks at 39968549 dhrystones/second
> >                            22748 DMIPS
> > Dhrystone(1.1) time for 1233977344 passes = 31.0
> > This machine benchmarks at 39817431 dhrystones/second
> >                            22662 DMIPS
> >
> > Total dhrystone run time: 31.995136 seconds.
> >
> > Bottomline: removing the lower-frequency OPPs led to a 3.8% drop in
> > performance in this setup. This is probably far from a reliable
> > estimate, but I guess it indeed indicates that having lower-frequency
> > states might be beneficial in some load scenarios.
>
> What is the duration between these two tests?

Several hours and a couple of reboots. I did the first one, recorded
the results and the temperatures, then rebuilt the dtb the next day,
rebooted with it and did everything again with the other OPP table.

> I would be curious if it is repeatable by inverting the setup (reduced
> OPP table and then original OPP table).

Frankly, I can't see how ordering could have mattered, given that I
let the system cool down completely, and also rebooted it to use a
different dtb, so there shouldn't have been any caching effects. Maybe
there is some outside randomness in the results though - perhaps 5-10
repetitions in each case would have been more statistically
meaningful. But then again to make it statistically meaningful I'd
have to peg the other (non-benchmarked) cores to a static OPP to
ensure the thermal governor doesn't play with them when not asked to -
and it all starts to sound like a rabbit hole :)

> BTW: I used -l 10000 for a ~30 seconds workload more or less on the
> rk3399, may be -l 20000 will be ok for the rk3588.

-l 20000 with two threads also gives me about ~30 seconds runtime...
While -l 200000 completed in 25 seconds *facepalm*

Best regards,
Alexey
diff mbox series

Patch

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 131b9eb21398..e605be531a0f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -97,6 +97,7 @@  cpu_l0: cpu@0 {
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
 			assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>;
 			assigned-clock-rates = <816000000>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -116,6 +117,7 @@  cpu_l1: cpu@100 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <530>;
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -135,6 +137,7 @@  cpu_l2: cpu@200 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <530>;
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -154,6 +157,7 @@  cpu_l3: cpu@300 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <530>;
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -175,6 +179,7 @@  cpu_b0: cpu@400 {
 			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
 			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>;
 			assigned-clock-rates = <816000000>;
+			operating-points-v2 = <&cluster1_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -194,6 +199,7 @@  cpu_b1: cpu@500 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <1024>;
 			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
+			operating-points-v2 = <&cluster1_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -215,6 +221,7 @@  cpu_b2: cpu@600 {
 			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
 			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>;
 			assigned-clock-rates = <816000000>;
+			operating-points-v2 = <&cluster2_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -234,6 +241,7 @@  cpu_b3: cpu@700 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <1024>;
 			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
+			operating-points-v2 = <&cluster2_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -348,6 +356,207 @@  l3_cache: l3-cache {
 		};
 	};
 
+	cluster0_opp_table: opp-table-cluster0 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-408000000 {
+			opp-hz = /bits/ 64 <408000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-816000000 {
+			opp-hz = /bits/ 64 <816000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1008000000 {
+			opp-hz = /bits/ 64 <1008000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <712500 712500 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1416000000 {
+			opp-hz = /bits/ 64 <1416000000>;
+			opp-microvolt = <762500 762500 950000>;
+			clock-latency-ns = <40000>;
+			opp-suspend;
+		};
+		opp-1608000000 {
+			opp-hz = /bits/ 64 <1608000000>;
+			opp-microvolt = <850000 850000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1800000000 {
+			opp-hz = /bits/ 64 <1800000000>;
+			opp-microvolt = <950000 950000 950000>;
+			clock-latency-ns = <40000>;
+		};
+	};
+
+	cluster1_opp_table: opp-table-cluster1 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-408000000 {
+			opp-hz = /bits/ 64 <408000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+			opp-suspend;
+		};
+		opp-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-816000000 {
+			opp-hz = /bits/ 64 <816000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1008000000 {
+			opp-hz = /bits/ 64 <1008000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1416000000 {
+			opp-hz = /bits/ 64 <1416000000>;
+			opp-microvolt = <725000 725000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1608000000 {
+			opp-hz = /bits/ 64 <1608000000>;
+			opp-microvolt = <762500 762500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1800000000 {
+			opp-hz = /bits/ 64 <1800000000>;
+			opp-microvolt = <850000 850000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2016000000 {
+			opp-hz = /bits/ 64 <2016000000>;
+			opp-microvolt = <925000 925000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2208000000 {
+			opp-hz = /bits/ 64 <2208000000>;
+			opp-microvolt = <987500 987500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2256000000 {
+			opp-hz = /bits/ 64 <2256000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2304000000 {
+			opp-hz = /bits/ 64 <2304000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2352000000 {
+			opp-hz = /bits/ 64 <2352000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2400000000 {
+			opp-hz = /bits/ 64 <2400000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+	};
+
+	cluster2_opp_table: opp-table-cluster2 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-408000000 {
+			opp-hz = /bits/ 64 <408000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+			opp-suspend;
+		};
+		opp-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-816000000 {
+			opp-hz = /bits/ 64 <816000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1008000000 {
+			opp-hz = /bits/ 64 <1008000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1416000000 {
+			opp-hz = /bits/ 64 <1416000000>;
+			opp-microvolt = <725000 725000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1608000000 {
+			opp-hz = /bits/ 64 <1608000000>;
+			opp-microvolt = <762500 762500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1800000000 {
+			opp-hz = /bits/ 64 <1800000000>;
+			opp-microvolt = <850000 850000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2016000000 {
+			opp-hz = /bits/ 64 <2016000000>;
+			opp-microvolt = <925000 925000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2208000000 {
+			opp-hz = /bits/ 64 <2208000000>;
+			opp-microvolt = <987500 987500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2256000000 {
+			opp-hz = /bits/ 64 <2256000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2304000000 {
+			opp-hz = /bits/ 64 <2304000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2352000000 {
+			opp-hz = /bits/ 64 <2352000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2400000000 {
+			opp-hz = /bits/ 64 <2400000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+	};
+
 	firmware {
 		optee: optee {
 			compatible = "linaro,optee-tz";