diff mbox series

[v3,10/15] cpufreq/amd-pstate: Move limit updating code

Message ID 20241209185248.16301-11-mario.limonciello@amd.com
State New
Headers show
Series amd-pstate fixes and improvements for 6.14 | expand

Commit Message

Mario Limonciello Dec. 9, 2024, 6:52 p.m. UTC
The limit updating code in amd_pstate_epp_update_limit() should not
only apply to EPP updates.  Move it to amd_pstate_update_min_max_limit()
so other callers can benefit as well.

With this move it's not necessary to have clamp_t calls anymore because
the verify callback is called when setting limits.

Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
---
v2:
 * Drop lowest_perf variable
---
 drivers/cpufreq/amd-pstate.c | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

Comments

Mario Limonciello Dec. 16, 2024, 2:21 p.m. UTC | #1
On 12/16/2024 08:16, Dhananjay Ugwekar wrote:
> Hello Mario,
> 
> On 12/10/2024 12:22 AM, Mario Limonciello wrote:
>> The limit updating code in amd_pstate_epp_update_limit() should not
>> only apply to EPP updates.  Move it to amd_pstate_update_min_max_limit()
>> so other callers can benefit as well.
>>
>> With this move it's not necessary to have clamp_t calls anymore because
>> the verify callback is called when setting limits.
> 
> While testing this series, I observed that with amd_pstate=passive + schedutil governor,
> the scaling_max_freq limits were not being honored and I bisected the issue down to this
> patch.
> 
> I went through the code and noticed that in amd_pstate_adjust_perf(), we set the min_perf
> field in MSR_AMD_CPPC_REQ to "cap_perf" which is equal to cpudata->highest_perf (which is
> equal to 255 for non-preferred cores systems). This didnt seem logical to me and I changed
> cap_perf to cpudata->max_limit_perf which gives us the value updated in scaling_max_freq.
> 
> I think as we removed the redundant clamping code, this pre-existing issue got exposed.
> The below diff fixes the issue for me.
> 
> Please let me know your thoughts on this.
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index d7b1de97727a..1ac34e3f1fc5 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>          if (min_perf < lowest_nonlinear_perf)
>                  min_perf = lowest_nonlinear_perf;
> 
> -       max_perf = cap_perf;
> +       max_perf = cpudata->max_limit_perf;
>          if (max_perf < min_perf)
>                  max_perf = min_perf;

With this change I think you can also drop the comparison afterwards, as 
an optimization right?

As this is already in superm1.git/linux-next after testing can you 
please send a patch relative to superm1.git/linux-next branch?

Thanks!

> 
> Thanks,
> Dhananjay
> 
>>
>> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>> v2:
>>   * Drop lowest_perf variable
>> ---
>>   drivers/cpufreq/amd-pstate.c | 28 +++++-----------------------
>>   1 file changed, 5 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 3a3df67c096d5..dc3c45b6f5103 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
>>   	u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
>>   	u64 value = prev;
>>   
>> -	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
>> -			cpudata->max_limit_perf);
>> -	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
>> -			cpudata->max_limit_perf);
>>   	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
>>   
>>   	max_freq = READ_ONCE(cpudata->max_limit_freq);
>> @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>   
>>   static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
>>   {
>> -	u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq;
>> +	u32 max_limit_perf, min_limit_perf, max_perf, max_freq;
>>   	struct amd_cpudata *cpudata = policy->driver_data;
>>   
>>   	max_perf = READ_ONCE(cpudata->highest_perf);
>> @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
>>   	max_limit_perf = div_u64(policy->max * max_perf, max_freq);
>>   	min_limit_perf = div_u64(policy->min * max_perf, max_freq);
>>   
>> -	lowest_perf = READ_ONCE(cpudata->lowest_perf);
>> -	if (min_limit_perf < lowest_perf)
>> -		min_limit_perf = lowest_perf;
>> -
>> -	if (max_limit_perf < min_limit_perf)
>> -		max_limit_perf = min_limit_perf;
>> +	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
>> +		min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
>>   
>>   	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
>>   	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
>> @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>   static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
>>   {
>>   	struct amd_cpudata *cpudata = policy->driver_data;
>> -	u32 max_perf, min_perf;
>>   	u64 value;
>>   	s16 epp;
>>   
>> -	max_perf = READ_ONCE(cpudata->highest_perf);
>> -	min_perf = READ_ONCE(cpudata->lowest_perf);
>>   	amd_pstate_update_min_max_limit(policy);
>>   
>> -	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
>> -			cpudata->max_limit_perf);
>> -	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
>> -			cpudata->max_limit_perf);
>>   	value = READ_ONCE(cpudata->cppc_req_cached);
>>   
>> -	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
>> -		min_perf = min(cpudata->nominal_perf, max_perf);
>> -
>>   	value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
>>   		   AMD_CPPC_DES_PERF_MASK);
>> -	value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
>> +	value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf);
>>   	value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0);
>> -	value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
>> +	value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf);
>>   
>>   	/* Get BIOS pre-defined epp value */
>>   	epp = amd_pstate_get_epp(cpudata, value);
>
Mario Limonciello Dec. 17, 2024, 7:44 p.m. UTC | #2
On 12/17/2024 00:50, Dhananjay Ugwekar wrote:
> On 12/16/2024 9:09 PM, Mario Limonciello wrote:
>> On 12/16/2024 08:45, Dhananjay Ugwekar wrote:
>>> On 12/16/2024 7:51 PM, Mario Limonciello wrote:
>>>> On 12/16/2024 08:16, Dhananjay Ugwekar wrote:
>>>>> Hello Mario,
>>>>>
>>>>> On 12/10/2024 12:22 AM, Mario Limonciello wrote:
>>>>>> The limit updating code in amd_pstate_epp_update_limit() should not
>>>>>> only apply to EPP updates.  Move it to amd_pstate_update_min_max_limit()
>>>>>> so other callers can benefit as well.
>>>>>>
>>>>>> With this move it's not necessary to have clamp_t calls anymore because
>>>>>> the verify callback is called when setting limits.
>>>>>
>>>>> While testing this series, I observed that with amd_pstate=passive + schedutil governor,
>>>>> the scaling_max_freq limits were not being honored and I bisected the issue down to this
>>>>> patch.
>>>>>
>>>>> I went through the code and noticed that in amd_pstate_adjust_perf(), we set the min_perf
>>>>> field in MSR_AMD_CPPC_REQ to "cap_perf" which is equal to cpudata->highest_perf (which is
>>>>> equal to 255 for non-preferred cores systems). This didnt seem logical to me and I changed
>>>>> cap_perf to cpudata->max_limit_perf which gives us the value updated in scaling_max_freq.
>>>>>
>>>>> I think as we removed the redundant clamping code, this pre-existing issue got exposed.
>>>>> The below diff fixes the issue for me.
>>>>>
>>>>> Please let me know your thoughts on this.
>>>>>
>>>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>>>>> index d7b1de97727a..1ac34e3f1fc5 100644
>>>>> --- a/drivers/cpufreq/amd-pstate.c
>>>>> +++ b/drivers/cpufreq/amd-pstate.c
>>>>> @@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>>>>>            if (min_perf < lowest_nonlinear_perf)
>>>>>                    min_perf = lowest_nonlinear_perf;
>>> here^^^
>>>>>
>>>>> -       max_perf = cap_perf;
>>>>> +       max_perf = cpudata->max_limit_perf;
>>>>>            if (max_perf < min_perf)
>>>>>                    max_perf = min_perf;
>>>>
>>>> With this change I think you can also drop the comparison afterwards, as an optimization right?
>>>
>>> Umm I think it is possible that scaling_max_freq is set to a value lower than
>>> lowest_nonlinear_freq in that case this if condition would be needed (as min_perf
>>> is being lower bounded at lowest_nonlinear_freq at the location highlighted above).
>>> I would be okay with keeping this check in.
>>
>> Well this feels like a bigger problem actually - why is it forcefully bounded at lowest nonlinear freq?  Performance is going to be awful at that level
> 
> Actually this wont necessarily deteriorate the performance, as we are just restricting
> the min_perf to not go below lowest_nonlinear level. So we are actually ensuring that
> the schedutil doesnt select a des_perf below lowest_nonlinear_perf.
> 
> (hence why commit 5d9a354cf839a ("cpufreq/amd-pstate: Set the initial min_freq to lowest_nonlinear_freq") was done),

Sorry re-reading I didn't get my thought out properly, I meant to say 
performance is going to be bad BELOW that level.  We're in total 
agreement here.

>>
>> but shouldn't we "let" people go below that in passive and guided?  We do for active.
> 
> Yes I agree, we should allow the user to set min limit in the entire frequency range,
> I thought there would've been some reason for restricting this. But I dont see any
> reasoning for this in the blamed commit log as well. I think one reason would be that
> below lowest_nonlinear_freq we dont get real power savings. And schedutil might dip
> into this lower inefficient range if we dont force bound it.

OK I guess to avoid regressions let's leave it as is and do a minimal 
change and we can revisit lifting this restriction later after you get 
testing done with it to see what actually happens.

> 
> Thanks,
> Dhananjay
> 
>>
>>>
>>> Also, what is the behavior if max_perf is set to a value lower than min_perf in
>>> the CPPC_REQ MSR? I guess platform FW would also be smart enough to handle this
>>> implicitly, but cant say for sure.
>>>
>>
>> I would hope so too; but yeah you're right we don't know for sure.
>>
>>>>
>>>> As this is already in superm1.git/linux-next after testing can you please send a patch relative to superm1.git/linux-next branch?
>>>
>>> Sure, I'll send out the patch once we finalize on the above if condition.
>>>
>>> Regards,
>>> Dhananjay
>>>
>>>>
>>>> Thanks!
>>>>
>>>>>
>>>>> Thanks,
>>>>> Dhananjay
>>>>>
>>>>>>
>>>>>> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com>
>>>>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>>>>>> ---
>>>>>> v2:
>>>>>>     * Drop lowest_perf variable
>>>>>> ---
>>>>>>     drivers/cpufreq/amd-pstate.c | 28 +++++-----------------------
>>>>>>     1 file changed, 5 insertions(+), 23 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>>>>>> index 3a3df67c096d5..dc3c45b6f5103 100644
>>>>>> --- a/drivers/cpufreq/amd-pstate.c
>>>>>> +++ b/drivers/cpufreq/amd-pstate.c
>>>>>> @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
>>>>>>         u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
>>>>>>         u64 value = prev;
>>>>>>     -    min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
>>>>>> -            cpudata->max_limit_perf);
>>>>>> -    max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
>>>>>> -            cpudata->max_limit_perf);
>>>>>>         des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
>>>>>>           max_freq = READ_ONCE(cpudata->max_limit_freq);
>>>>>> @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>>>>>       static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
>>>>>>     {
>>>>>> -    u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq;
>>>>>> +    u32 max_limit_perf, min_limit_perf, max_perf, max_freq;
>>>>>>         struct amd_cpudata *cpudata = policy->driver_data;
>>>>>>           max_perf = READ_ONCE(cpudata->highest_perf);
>>>>>> @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
>>>>>>         max_limit_perf = div_u64(policy->max * max_perf, max_freq);
>>>>>>         min_limit_perf = div_u64(policy->min * max_perf, max_freq);
>>>>>>     -    lowest_perf = READ_ONCE(cpudata->lowest_perf);
>>>>>> -    if (min_limit_perf < lowest_perf)
>>>>>> -        min_limit_perf = lowest_perf;
>>>>>> -
>>>>>> -    if (max_limit_perf < min_limit_perf)
>>>>>> -        max_limit_perf = min_limit_perf;
>>>>>> +    if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
>>>>>> +        min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
>>>>>>           WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
>>>>>>         WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
>>>>>> @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>>>>>     static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
>>>>>>     {
>>>>>>         struct amd_cpudata *cpudata = policy->driver_data;
>>>>>> -    u32 max_perf, min_perf;
>>>>>>         u64 value;
>>>>>>         s16 epp;
>>>>>>     -    max_perf = READ_ONCE(cpudata->highest_perf);
>>>>>> -    min_perf = READ_ONCE(cpudata->lowest_perf);
>>>>>>         amd_pstate_update_min_max_limit(policy);
>>>>>>     -    max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
>>>>>> -            cpudata->max_limit_perf);
>>>>>> -    min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
>>>>>> -            cpudata->max_limit_perf);
>>>>>>         value = READ_ONCE(cpudata->cppc_req_cached);
>>>>>>     -    if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
>>>>>> -        min_perf = min(cpudata->nominal_perf, max_perf);
>>>>>> -
>>>>>>         value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
>>>>>>                AMD_CPPC_DES_PERF_MASK);
>>>>>> -    value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
>>>>>> +    value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf);
>>>>>>         value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0);
>>>>>> -    value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
>>>>>> +    value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf);
>>>>>>           /* Get BIOS pre-defined epp value */
>>>>>>         epp = amd_pstate_get_epp(cpudata, value);
>>>>>
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 3a3df67c096d5..dc3c45b6f5103 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -537,10 +537,6 @@  static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf,
 	u32 nominal_perf = READ_ONCE(cpudata->nominal_perf);
 	u64 value = prev;
 
-	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
-			cpudata->max_limit_perf);
-	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
-			cpudata->max_limit_perf);
 	des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf);
 
 	max_freq = READ_ONCE(cpudata->max_limit_freq);
@@ -607,7 +603,7 @@  static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
 
 static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
 {
-	u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq;
+	u32 max_limit_perf, min_limit_perf, max_perf, max_freq;
 	struct amd_cpudata *cpudata = policy->driver_data;
 
 	max_perf = READ_ONCE(cpudata->highest_perf);
@@ -615,12 +611,8 @@  static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
 	max_limit_perf = div_u64(policy->max * max_perf, max_freq);
 	min_limit_perf = div_u64(policy->min * max_perf, max_freq);
 
-	lowest_perf = READ_ONCE(cpudata->lowest_perf);
-	if (min_limit_perf < lowest_perf)
-		min_limit_perf = lowest_perf;
-
-	if (max_limit_perf < min_limit_perf)
-		max_limit_perf = min_limit_perf;
+	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
+		min_limit_perf = min(cpudata->nominal_perf, max_limit_perf);
 
 	WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
 	WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
@@ -1562,28 +1554,18 @@  static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
 static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
 {
 	struct amd_cpudata *cpudata = policy->driver_data;
-	u32 max_perf, min_perf;
 	u64 value;
 	s16 epp;
 
-	max_perf = READ_ONCE(cpudata->highest_perf);
-	min_perf = READ_ONCE(cpudata->lowest_perf);
 	amd_pstate_update_min_max_limit(policy);
 
-	max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf,
-			cpudata->max_limit_perf);
-	min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf,
-			cpudata->max_limit_perf);
 	value = READ_ONCE(cpudata->cppc_req_cached);
 
-	if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
-		min_perf = min(cpudata->nominal_perf, max_perf);
-
 	value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK |
 		   AMD_CPPC_DES_PERF_MASK);
-	value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf);
+	value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf);
 	value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0);
-	value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf);
+	value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf);
 
 	/* Get BIOS pre-defined epp value */
 	epp = amd_pstate_get_epp(cpudata, value);