Message ID | 20241209185248.16301-11-mario.limonciello@amd.com |
---|---|
State | New |
Headers | show |
Series | amd-pstate fixes and improvements for 6.14 | expand |
On 12/16/2024 08:16, Dhananjay Ugwekar wrote: > Hello Mario, > > On 12/10/2024 12:22 AM, Mario Limonciello wrote: >> The limit updating code in amd_pstate_epp_update_limit() should not >> only apply to EPP updates. Move it to amd_pstate_update_min_max_limit() >> so other callers can benefit as well. >> >> With this move it's not necessary to have clamp_t calls anymore because >> the verify callback is called when setting limits. > > While testing this series, I observed that with amd_pstate=passive + schedutil governor, > the scaling_max_freq limits were not being honored and I bisected the issue down to this > patch. > > I went through the code and noticed that in amd_pstate_adjust_perf(), we set the min_perf > field in MSR_AMD_CPPC_REQ to "cap_perf" which is equal to cpudata->highest_perf (which is > equal to 255 for non-preferred cores systems). This didnt seem logical to me and I changed > cap_perf to cpudata->max_limit_perf which gives us the value updated in scaling_max_freq. > > I think as we removed the redundant clamping code, this pre-existing issue got exposed. > The below diff fixes the issue for me. > > Please let me know your thoughts on this. > > diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c > index d7b1de97727a..1ac34e3f1fc5 100644 > --- a/drivers/cpufreq/amd-pstate.c > +++ b/drivers/cpufreq/amd-pstate.c > @@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, > if (min_perf < lowest_nonlinear_perf) > min_perf = lowest_nonlinear_perf; > > - max_perf = cap_perf; > + max_perf = cpudata->max_limit_perf; > if (max_perf < min_perf) > max_perf = min_perf; With this change I think you can also drop the comparison afterwards, as an optimization right? As this is already in superm1.git/linux-next after testing can you please send a patch relative to superm1.git/linux-next branch? Thanks! > > Thanks, > Dhananjay > >> >> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com> >> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> >> --- >> v2: >> * Drop lowest_perf variable >> --- >> drivers/cpufreq/amd-pstate.c | 28 +++++----------------------- >> 1 file changed, 5 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c >> index 3a3df67c096d5..dc3c45b6f5103 100644 >> --- a/drivers/cpufreq/amd-pstate.c >> +++ b/drivers/cpufreq/amd-pstate.c >> @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, >> u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); >> u64 value = prev; >> >> - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, >> - cpudata->max_limit_perf); >> - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, >> - cpudata->max_limit_perf); >> des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); >> >> max_freq = READ_ONCE(cpudata->max_limit_freq); >> @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) >> >> static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) >> { >> - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; >> + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; >> struct amd_cpudata *cpudata = policy->driver_data; >> >> max_perf = READ_ONCE(cpudata->highest_perf); >> @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) >> max_limit_perf = div_u64(policy->max * max_perf, max_freq); >> min_limit_perf = div_u64(policy->min * max_perf, max_freq); >> >> - lowest_perf = READ_ONCE(cpudata->lowest_perf); >> - if (min_limit_perf < lowest_perf) >> - min_limit_perf = lowest_perf; >> - >> - if (max_limit_perf < min_limit_perf) >> - max_limit_perf = min_limit_perf; >> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) >> + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); >> >> WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); >> WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); >> @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) >> static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) >> { >> struct amd_cpudata *cpudata = policy->driver_data; >> - u32 max_perf, min_perf; >> u64 value; >> s16 epp; >> >> - max_perf = READ_ONCE(cpudata->highest_perf); >> - min_perf = READ_ONCE(cpudata->lowest_perf); >> amd_pstate_update_min_max_limit(policy); >> >> - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, >> - cpudata->max_limit_perf); >> - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, >> - cpudata->max_limit_perf); >> value = READ_ONCE(cpudata->cppc_req_cached); >> >> - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) >> - min_perf = min(cpudata->nominal_perf, max_perf); >> - >> value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | >> AMD_CPPC_DES_PERF_MASK); >> - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); >> + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); >> value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); >> - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); >> + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); >> >> /* Get BIOS pre-defined epp value */ >> epp = amd_pstate_get_epp(cpudata, value); >
On 12/17/2024 00:50, Dhananjay Ugwekar wrote: > On 12/16/2024 9:09 PM, Mario Limonciello wrote: >> On 12/16/2024 08:45, Dhananjay Ugwekar wrote: >>> On 12/16/2024 7:51 PM, Mario Limonciello wrote: >>>> On 12/16/2024 08:16, Dhananjay Ugwekar wrote: >>>>> Hello Mario, >>>>> >>>>> On 12/10/2024 12:22 AM, Mario Limonciello wrote: >>>>>> The limit updating code in amd_pstate_epp_update_limit() should not >>>>>> only apply to EPP updates. Move it to amd_pstate_update_min_max_limit() >>>>>> so other callers can benefit as well. >>>>>> >>>>>> With this move it's not necessary to have clamp_t calls anymore because >>>>>> the verify callback is called when setting limits. >>>>> >>>>> While testing this series, I observed that with amd_pstate=passive + schedutil governor, >>>>> the scaling_max_freq limits were not being honored and I bisected the issue down to this >>>>> patch. >>>>> >>>>> I went through the code and noticed that in amd_pstate_adjust_perf(), we set the min_perf >>>>> field in MSR_AMD_CPPC_REQ to "cap_perf" which is equal to cpudata->highest_perf (which is >>>>> equal to 255 for non-preferred cores systems). This didnt seem logical to me and I changed >>>>> cap_perf to cpudata->max_limit_perf which gives us the value updated in scaling_max_freq. >>>>> >>>>> I think as we removed the redundant clamping code, this pre-existing issue got exposed. >>>>> The below diff fixes the issue for me. >>>>> >>>>> Please let me know your thoughts on this. >>>>> >>>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c >>>>> index d7b1de97727a..1ac34e3f1fc5 100644 >>>>> --- a/drivers/cpufreq/amd-pstate.c >>>>> +++ b/drivers/cpufreq/amd-pstate.c >>>>> @@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, >>>>> if (min_perf < lowest_nonlinear_perf) >>>>> min_perf = lowest_nonlinear_perf; >>> here^^^ >>>>> >>>>> - max_perf = cap_perf; >>>>> + max_perf = cpudata->max_limit_perf; >>>>> if (max_perf < min_perf) >>>>> max_perf = min_perf; >>>> >>>> With this change I think you can also drop the comparison afterwards, as an optimization right? >>> >>> Umm I think it is possible that scaling_max_freq is set to a value lower than >>> lowest_nonlinear_freq in that case this if condition would be needed (as min_perf >>> is being lower bounded at lowest_nonlinear_freq at the location highlighted above). >>> I would be okay with keeping this check in. >> >> Well this feels like a bigger problem actually - why is it forcefully bounded at lowest nonlinear freq? Performance is going to be awful at that level > > Actually this wont necessarily deteriorate the performance, as we are just restricting > the min_perf to not go below lowest_nonlinear level. So we are actually ensuring that > the schedutil doesnt select a des_perf below lowest_nonlinear_perf. > > (hence why commit 5d9a354cf839a ("cpufreq/amd-pstate: Set the initial min_freq to lowest_nonlinear_freq") was done), Sorry re-reading I didn't get my thought out properly, I meant to say performance is going to be bad BELOW that level. We're in total agreement here. >> >> but shouldn't we "let" people go below that in passive and guided? We do for active. > > Yes I agree, we should allow the user to set min limit in the entire frequency range, > I thought there would've been some reason for restricting this. But I dont see any > reasoning for this in the blamed commit log as well. I think one reason would be that > below lowest_nonlinear_freq we dont get real power savings. And schedutil might dip > into this lower inefficient range if we dont force bound it. OK I guess to avoid regressions let's leave it as is and do a minimal change and we can revisit lifting this restriction later after you get testing done with it to see what actually happens. > > Thanks, > Dhananjay > >> >>> >>> Also, what is the behavior if max_perf is set to a value lower than min_perf in >>> the CPPC_REQ MSR? I guess platform FW would also be smart enough to handle this >>> implicitly, but cant say for sure. >>> >> >> I would hope so too; but yeah you're right we don't know for sure. >> >>>> >>>> As this is already in superm1.git/linux-next after testing can you please send a patch relative to superm1.git/linux-next branch? >>> >>> Sure, I'll send out the patch once we finalize on the above if condition. >>> >>> Regards, >>> Dhananjay >>> >>>> >>>> Thanks! >>>> >>>>> >>>>> Thanks, >>>>> Dhananjay >>>>> >>>>>> >>>>>> Reviewed-by: Gautham R. Shenoy <gautham.shenoy@amd.com> >>>>>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com> >>>>>> --- >>>>>> v2: >>>>>> * Drop lowest_perf variable >>>>>> --- >>>>>> drivers/cpufreq/amd-pstate.c | 28 +++++----------------------- >>>>>> 1 file changed, 5 insertions(+), 23 deletions(-) >>>>>> >>>>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c >>>>>> index 3a3df67c096d5..dc3c45b6f5103 100644 >>>>>> --- a/drivers/cpufreq/amd-pstate.c >>>>>> +++ b/drivers/cpufreq/amd-pstate.c >>>>>> @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, >>>>>> u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); >>>>>> u64 value = prev; >>>>>> - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, >>>>>> - cpudata->max_limit_perf); >>>>>> - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, >>>>>> - cpudata->max_limit_perf); >>>>>> des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); >>>>>> max_freq = READ_ONCE(cpudata->max_limit_freq); >>>>>> @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) >>>>>> static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) >>>>>> { >>>>>> - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; >>>>>> + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; >>>>>> struct amd_cpudata *cpudata = policy->driver_data; >>>>>> max_perf = READ_ONCE(cpudata->highest_perf); >>>>>> @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) >>>>>> max_limit_perf = div_u64(policy->max * max_perf, max_freq); >>>>>> min_limit_perf = div_u64(policy->min * max_perf, max_freq); >>>>>> - lowest_perf = READ_ONCE(cpudata->lowest_perf); >>>>>> - if (min_limit_perf < lowest_perf) >>>>>> - min_limit_perf = lowest_perf; >>>>>> - >>>>>> - if (max_limit_perf < min_limit_perf) >>>>>> - max_limit_perf = min_limit_perf; >>>>>> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) >>>>>> + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); >>>>>> WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); >>>>>> WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); >>>>>> @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) >>>>>> static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) >>>>>> { >>>>>> struct amd_cpudata *cpudata = policy->driver_data; >>>>>> - u32 max_perf, min_perf; >>>>>> u64 value; >>>>>> s16 epp; >>>>>> - max_perf = READ_ONCE(cpudata->highest_perf); >>>>>> - min_perf = READ_ONCE(cpudata->lowest_perf); >>>>>> amd_pstate_update_min_max_limit(policy); >>>>>> - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, >>>>>> - cpudata->max_limit_perf); >>>>>> - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, >>>>>> - cpudata->max_limit_perf); >>>>>> value = READ_ONCE(cpudata->cppc_req_cached); >>>>>> - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) >>>>>> - min_perf = min(cpudata->nominal_perf, max_perf); >>>>>> - >>>>>> value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | >>>>>> AMD_CPPC_DES_PERF_MASK); >>>>>> - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); >>>>>> + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); >>>>>> value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); >>>>>> - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); >>>>>> + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); >>>>>> /* Get BIOS pre-defined epp value */ >>>>>> epp = amd_pstate_get_epp(cpudata, value); >>>>> >>>> >>> >> >
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 3a3df67c096d5..dc3c45b6f5103 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -537,10 +537,6 @@ static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, u32 nominal_perf = READ_ONCE(cpudata->nominal_perf); u64 value = prev; - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); des_perf = clamp_t(unsigned long, des_perf, min_perf, max_perf); max_freq = READ_ONCE(cpudata->max_limit_freq); @@ -607,7 +603,7 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf, lowest_perf, max_perf, max_freq; + u32 max_limit_perf, min_limit_perf, max_perf, max_freq; struct amd_cpudata *cpudata = policy->driver_data; max_perf = READ_ONCE(cpudata->highest_perf); @@ -615,12 +611,8 @@ static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * max_perf, max_freq); min_limit_perf = div_u64(policy->min * max_perf, max_freq); - lowest_perf = READ_ONCE(cpudata->lowest_perf); - if (min_limit_perf < lowest_perf) - min_limit_perf = lowest_perf; - - if (max_limit_perf < min_limit_perf) - max_limit_perf = min_limit_perf; + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_limit_perf = min(cpudata->nominal_perf, max_limit_perf); WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1562,28 +1554,18 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) static int amd_pstate_epp_update_limit(struct cpufreq_policy *policy) { struct amd_cpudata *cpudata = policy->driver_data; - u32 max_perf, min_perf; u64 value; s16 epp; - max_perf = READ_ONCE(cpudata->highest_perf); - min_perf = READ_ONCE(cpudata->lowest_perf); amd_pstate_update_min_max_limit(policy); - max_perf = clamp_t(unsigned long, max_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); - min_perf = clamp_t(unsigned long, min_perf, cpudata->min_limit_perf, - cpudata->max_limit_perf); value = READ_ONCE(cpudata->cppc_req_cached); - if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) - min_perf = min(cpudata->nominal_perf, max_perf); - value &= ~(AMD_CPPC_MAX_PERF_MASK | AMD_CPPC_MIN_PERF_MASK | AMD_CPPC_DES_PERF_MASK); - value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, max_perf); + value |= FIELD_PREP(AMD_CPPC_MAX_PERF_MASK, cpudata->max_limit_perf); value |= FIELD_PREP(AMD_CPPC_DES_PERF_MASK, 0); - value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, min_perf); + value |= FIELD_PREP(AMD_CPPC_MIN_PERF_MASK, cpudata->min_limit_perf); /* Get BIOS pre-defined epp value */ epp = amd_pstate_get_epp(cpudata, value);