Message ID | 20230106061420.95715-6-perry.yuan@amd.com |
---|---|
State | Superseded |
Headers | show |
Series | Implement AMD Pstate EPP Driver | expand |
[AMD Official Use Only - General] Hi Wyes. > -----Original Message----- > From: Karny, Wyes <Wyes.Karny@amd.com> > Sent: Friday, January 6, 2023 7:19 PM > To: Yuan, Perry <Perry.Yuan@amd.com>; rafael.j.wysocki@intel.com; > Limonciello, Mario <Mario.Limonciello@amd.com>; Huang, Ray > <Ray.Huang@amd.com>; viresh.kumar@linaro.org > Cc: Sharma, Deepak <Deepak.Sharma@amd.com>; Fontenot, Nathan > <Nathan.Fontenot@amd.com>; Deucher, Alexander > <Alexander.Deucher@amd.com>; Huang, Shimmer > <Shimmer.Huang@amd.com>; Du, Xiaojian <Xiaojian.Du@amd.com>; Meng, > Li (Jassmine) <Li.Meng@amd.com>; linux-pm@vger.kernel.org; linux- > kernel@vger.kernel.org > Subject: Re: [PATCH v10 05/12] cpufreq: amd-pstate: implement Pstate EPP > support for the AMD processors > > Hi Perry, > > On 1/6/2023 11:44 AM, Perry Yuan wrote: > ----------------------------------->8----------------------------------- > ----------------------------------->------- > > +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) { > > + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; > > + struct amd_cpudata *cpudata; > > + struct device *dev; > > + int rc; > > + u64 value; > > Please call `amd_perf_ctl_reset` function here, otherwise amd_pstate would > behave unexpectedly. Let me add this reset function for epp driver in v11 with another Kconfig fix together. Thank you. Perry. > > > + > > + dev = get_cpu_device(policy->cpu); > > + if (!dev) > > + goto free_cpudata1; > > + > > + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); > > + if (!cpudata) > > + return -ENOMEM; > > + > > + cpudata->cpu = policy->cpu; > > + cpudata->epp_policy = 0; > > + > > + rc = amd_pstate_init_perf(cpudata); > > + if (rc) > > + goto free_cpudata1; > > + > > + min_freq = amd_get_min_freq(cpudata); > > + max_freq = amd_get_max_freq(cpudata); > > + nominal_freq = amd_get_nominal_freq(cpudata); > > + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); > > + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { > > + dev_err(dev, "min_freq(%d) or max_freq(%d) value is > incorrect\n", > > + min_freq, max_freq); > > + ret = -EINVAL; > > + goto free_cpudata1; > > + } > > + > > + policy->cpuinfo.min_freq = min_freq; > > + policy->cpuinfo.max_freq = max_freq; > > + /* It will be updated by governor */ > > + policy->cur = policy->cpuinfo.min_freq; > > + > > + /* Initial processor data capability frequencies */ > > + cpudata->max_freq = max_freq; > > + cpudata->min_freq = min_freq; > > + cpudata->nominal_freq = nominal_freq; > > + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; > > + > > + policy->driver_data = cpudata; > > + > > + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value); > > + > > + policy->min = policy->cpuinfo.min_freq; > > + policy->max = policy->cpuinfo.max_freq; > > + > > + /* > > + * Set the policy to powersave to provide a valid fallback value in case > > + * the default cpufreq governor is neither powersave nor > performance. > > + */ > > + policy->policy = CPUFREQ_POLICY_POWERSAVE; > > + > > + if (boot_cpu_has(X86_FEATURE_CPPC)) { > > + policy->fast_switch_possible = true; > > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, > &value); > > + if (ret) > > + return ret; > > + WRITE_ONCE(cpudata->cppc_req_cached, value); > > + > > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, > &value); > > + if (ret) > > + return ret; > > + WRITE_ONCE(cpudata->cppc_cap1_cached, value); > > + } > > + amd_pstate_boost_init(cpudata); > > + > > + return 0; > > + > > +free_cpudata1: > > + kfree(cpudata); > > + return ret; > > +} > > + > > +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) { > > + pr_debug("CPU %d exiting\n", policy->cpu); > > + policy->fast_switch_possible = false; > > + return 0; > > +} > > + > > +static void amd_pstate_epp_init(unsigned int cpu) { > > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); > > + struct amd_cpudata *cpudata = policy->driver_data; > > + u32 max_perf, min_perf; > > + u64 value; > > + s16 epp; > > + > > + max_perf = READ_ONCE(cpudata->highest_perf); > > + min_perf = READ_ONCE(cpudata->lowest_perf); > > + > > + value = READ_ONCE(cpudata->cppc_req_cached); > > + > > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) > > + min_perf = max_perf; > > + > > + /* Initial min/max values for CPPC Performance Controls Register */ > > + value &= ~AMD_CPPC_MIN_PERF(~0L); > > + value |= AMD_CPPC_MIN_PERF(min_perf); > > + > > + value &= ~AMD_CPPC_MAX_PERF(~0L); > > + value |= AMD_CPPC_MAX_PERF(max_perf); > > + > > + /* CPPC EPP feature require to set zero to the desire perf bit */ > > + value &= ~AMD_CPPC_DES_PERF(~0L); > > + value |= AMD_CPPC_DES_PERF(0); > > + > > + if (cpudata->epp_policy == cpudata->policy) > > + goto skip_epp; > > + > > + cpudata->epp_policy = cpudata->policy; > > + > > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { > > + epp = amd_pstate_get_epp(cpudata, value); > > + if (epp < 0) > > + goto skip_epp; > > + /* force the epp value to be zero for performance policy */ > > + epp = 0; > > + } else { > > + /* Get BIOS pre-defined epp value */ > > + epp = amd_pstate_get_epp(cpudata, value); > > + if (epp) > > + goto skip_epp; > > + } > > + /* Set initial EPP value */ > > + if (boot_cpu_has(X86_FEATURE_CPPC)) { > > + value &= ~GENMASK_ULL(31, 24); > > + value |= (u64)epp << 24; > > + } > > + > > +skip_epp: > > + WRITE_ONCE(cpudata->cppc_req_cached, value); > > + amd_pstate_set_epp(cpudata, epp); > > + cpufreq_cpu_put(policy); > > +} > > + > > +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) { > > + struct amd_cpudata *cpudata = policy->driver_data; > > + > > + if (!policy->cpuinfo.max_freq) > > + return -ENODEV; > > + > > + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", > > + policy->cpuinfo.max_freq, policy->max); > > + > > + cpudata->policy = policy->policy; > > + > > + amd_pstate_epp_init(policy->cpu); > > + > > + return 0; > > +} > > + > > +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data > > +*policy) { > > + cpufreq_verify_within_cpu_limits(policy); > > + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy- > >min); > > + return 0; > > +} > > + > > static struct cpufreq_driver amd_pstate_driver = { > > .flags = CPUFREQ_CONST_LOOPS | > CPUFREQ_NEED_UPDATE_LIMITS, > > .verify = amd_pstate_verify, > > @@ -627,6 +973,16 @@ static struct cpufreq_driver amd_pstate_driver = { > > .attr = amd_pstate_attr, > > }; > > > > +static struct cpufreq_driver amd_pstate_epp_driver = { > > + .flags = CPUFREQ_CONST_LOOPS, > > + .verify = amd_pstate_epp_verify_policy, > > + .setpolicy = amd_pstate_epp_set_policy, > > + .init = amd_pstate_epp_cpu_init, > > + .exit = amd_pstate_epp_cpu_exit, > > + .name = "amd_pstate_epp", > > + .attr = amd_pstate_epp_attr, > > +}; > > + > > static int __init amd_pstate_init(void) { > > int ret; > > @@ -655,7 +1011,8 @@ static int __init amd_pstate_init(void) > > /* capability check */ > > if (boot_cpu_has(X86_FEATURE_CPPC)) { > > pr_debug("AMD CPPC MSR based functionality is > supported\n"); > > - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; > > + if (cppc_state == AMD_PSTATE_PASSIVE) > > + current_pstate_driver->adjust_perf = > amd_pstate_adjust_perf; > > } else { > > pr_debug("AMD CPPC shared memory based functionality is > supported\n"); > > static_call_update(amd_pstate_enable, cppc_enable); @@ - > 666,14 > > +1023,13 @@ static int __init amd_pstate_init(void) > > /* enable amd pstate feature */ > > ret = amd_pstate_enable(true); > > if (ret) { > > - pr_err("failed to enable amd-pstate with return %d\n", ret); > > + pr_err("failed to enable with return %d\n", ret); > > return ret; > > } > > > > - ret = cpufreq_register_driver(&amd_pstate_driver); > > + ret = cpufreq_register_driver(current_pstate_driver); > > if (ret) > > - pr_err("failed to register amd_pstate_driver with > return %d\n", > > - ret); > > + pr_err("failed to register with return %d\n", ret); > > > > return ret; > > } > > @@ -695,6 +1051,12 @@ static int __init amd_pstate_param(char *str) > > if (cppc_state == AMD_PSTATE_DISABLE) > > pr_info("driver is explicitly disabled\n"); > > > > + if (cppc_state == AMD_PSTATE_ACTIVE) > > + current_pstate_driver = &amd_pstate_epp_driver; > > + > > + if (cppc_state == AMD_PSTATE_PASSIVE) > > + current_pstate_driver = &amd_pstate_driver; > > + > > return 0; > > } > > > > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h > > index dae2ce0f6735..8341a2a2948a 100644 > > --- a/include/linux/amd-pstate.h > > +++ b/include/linux/amd-pstate.h > > @@ -47,6 +47,10 @@ struct amd_aperf_mperf { > > * @prev: Last Aperf/Mperf/tsc count value read from register > > * @freq: current cpu frequency value > > * @boost_supported: check whether the Processor or SBIOS supports > > boost mode > > + * @epp_policy: Last saved policy used to set energy-performance > > + preference > > + * @epp_cached: Cached CPPC energy-performance preference value > > + * @policy: Cpufreq policy value > > + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value > > * > > * The amd_cpudata is key private data for each CPU thread in AMD P- > State, and > > * represents all the attributes and goals that AMD P-State requests at > runtime. > > @@ -72,6 +76,12 @@ struct amd_cpudata { > > > > u64 freq; > > bool boost_supported; > > + > > + /* EPP feature related attributes*/ > > + s16 epp_policy; > > + s16 epp_cached; > > + u32 policy; > > + u64 cppc_cap1_cached; > > }; > > > > /* > > -- > Thanks & Regards, > Wyes
On Fri, Jan 06, 2023 at 02:14:13PM +0800, Yuan, Perry wrote: > From: Perry Yuan <Perry.Yuan@amd.com> > > Add EPP driver support for AMD SoCs which support a dedicated MSR for > CPPC. EPP is used by the DPM controller to configure the frequency that > a core operates at during short periods of activity. > > The SoC EPP targets are configured on a scale from 0 to 255 where 0 > represents maximum performance and 255 represents maximum efficiency. > > The amd-pstate driver exports profile string names to userspace that are > tied to specific EPP values. > > The balance_performance string (0x80) provides the best balance for > efficiency versus power on most systems, but users can choose other > strings to meet their needs as well. > > $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences > default performance balance_performance balance_power power > > $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference > balance_performance > > To enable the driver,it needs to add `amd_pstate=active` to kernel > command line and kernel will load the active mode epp driver > > Signed-off-by: Perry Yuan <Perry.Yuan@amd.com> > --- > drivers/cpufreq/amd-pstate.c | 374 ++++++++++++++++++++++++++++++++++- > include/linux/amd-pstate.h | 10 + > 2 files changed, 378 insertions(+), 6 deletions(-) > > diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c > index 1a8b31277620..9a92c71af80f 100644 > --- a/drivers/cpufreq/amd-pstate.c > +++ b/drivers/cpufreq/amd-pstate.c > @@ -59,7 +59,9 @@ > * we disable it by default to go acpi-cpufreq on these processors and add a > * module parameter to be able to enable it manually for debugging. > */ > +static struct cpufreq_driver *current_pstate_driver; > static struct cpufreq_driver amd_pstate_driver; > +static struct cpufreq_driver amd_pstate_epp_driver; > static int cppc_state = AMD_PSTATE_DISABLE; > > static inline int get_mode_idx_from_str(const char *str, size_t size) > @@ -73,6 +75,114 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) > return -EINVAL; > } > > +static DEFINE_MUTEX(amd_pstate_limits_lock); > +static DEFINE_MUTEX(amd_pstate_driver_lock); > + > +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) > +{ > + u64 epp; > + int ret; > + > + if (boot_cpu_has(X86_FEATURE_CPPC)) { > + if (!cppc_req_cached) { > + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, > + &cppc_req_cached); > + if (epp) > + return epp; > + } > + epp = (cppc_req_cached >> 24) & 0xFF; > + } else { > + ret = cppc_get_epp_perf(cpudata->cpu, &epp); > + if (ret < 0) { > + pr_debug("Could not retrieve energy perf value (%d)\n", ret); > + return -EIO; > + } > + } > + > + return (s16)(epp & 0xff); > +} > + > +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) > +{ > + s16 epp; > + int index = -EINVAL; > + > + epp = amd_pstate_get_epp(cpudata, 0); > + if (epp < 0) > + return epp; > + > + switch (epp) { > + case HWP_EPP_PERFORMANCE: > + index = EPP_INDEX_PERFORMANCE; > + break; > + case HWP_EPP_BALANCE_PERFORMANCE: > + index = EPP_INDEX_BALANCE_PERFORMANCE; > + break; > + case HWP_EPP_BALANCE_POWERSAVE: > + index = EPP_INDEX_BALANCE_POWERSAVE; > + break; > + case HWP_EPP_POWERSAVE: > + index = EPP_INDEX_POWERSAVE; > + break; > + default: > + break; > + } > + > + return index; > +} > + > +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) > +{ > + int ret; > + struct cppc_perf_ctrls perf_ctrls; > + > + if (boot_cpu_has(X86_FEATURE_CPPC)) { > + u64 value = READ_ONCE(cpudata->cppc_req_cached); > + > + value &= ~GENMASK_ULL(31, 24); > + value |= (u64)epp << 24; > + WRITE_ONCE(cpudata->cppc_req_cached, value); > + > + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); > + if (!ret) > + cpudata->epp_cached = epp; > + } else { > + perf_ctrls.energy_perf = epp; > + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); > + if (ret) { > + pr_debug("failed to set energy perf value (%d)\n", ret); > + return ret; > + } > + cpudata->epp_cached = epp; > + } > + > + return ret; > +} > + > +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, > + int pref_index) > +{ > + int epp = -EINVAL; > + int ret; > + > + if (!pref_index) { > + pr_debug("EPP pref_index is invalid\n"); > + return -EINVAL; > + } > + > + if (epp == -EINVAL) > + epp = epp_values[pref_index]; > + > + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { > + pr_debug("EPP cannot be set under performance policy\n"); > + return -EBUSY; > + } > + > + ret = amd_pstate_set_epp(cpudata, epp); > + > + return ret; > +} > + > static inline int pstate_enable(bool enable) > { > return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); > @@ -81,11 +191,21 @@ static inline int pstate_enable(bool enable) > static int cppc_enable(bool enable) > { > int cpu, ret = 0; > + struct cppc_perf_ctrls perf_ctrls; > > for_each_present_cpu(cpu) { > ret = cppc_set_enable(cpu, enable); > if (ret) > return ret; > + > + /* Enable autonomous mode for EPP */ > + if (cppc_state == AMD_PSTATE_ACTIVE) { > + /* Set desired perf as zero to allow EPP firmware control */ > + perf_ctrls.desired_perf = 0; > + ret = cppc_set_perf(cpu, &perf_ctrls); > + if (ret) > + return ret; > + } > } > > return ret; > @@ -428,7 +548,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) > return; > > cpudata->boost_supported = true; > - amd_pstate_driver.boost_enabled = true; > + current_pstate_driver->boost_enabled = true; > } > > static void amd_perf_ctl_reset(unsigned int cpu) > @@ -602,10 +722,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, > return sprintf(&buf[0], "%u\n", perf); > } > > +static ssize_t show_energy_performance_available_preferences( > + struct cpufreq_policy *policy, char *buf) > +{ > + int i = 0; > + int offset = 0; > + > + while (energy_perf_strings[i] != NULL) > + offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); > + > + sysfs_emit_at(buf, offset, "\n"); > + > + return offset; > +} > + > +static ssize_t store_energy_performance_preference( > + struct cpufreq_policy *policy, const char *buf, size_t count) > +{ > + struct amd_cpudata *cpudata = policy->driver_data; > + char str_preference[21]; > + ssize_t ret; > + > + ret = sscanf(buf, "%20s", str_preference); > + if (ret != 1) > + return -EINVAL; > + > + ret = match_string(energy_perf_strings, -1, str_preference); > + if (ret < 0) > + return -EINVAL; > + > + mutex_lock(&amd_pstate_limits_lock); > + ret = amd_pstate_set_energy_pref_index(cpudata, ret); > + mutex_unlock(&amd_pstate_limits_lock); > + > + return ret ?: count; > +} > + > +static ssize_t show_energy_performance_preference( > + struct cpufreq_policy *policy, char *buf) > +{ > + struct amd_cpudata *cpudata = policy->driver_data; > + int preference; > + > + preference = amd_pstate_get_energy_pref_index(cpudata); > + if (preference < 0) > + return preference; > + > + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); > +} > + > cpufreq_freq_attr_ro(amd_pstate_max_freq); > cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); > > cpufreq_freq_attr_ro(amd_pstate_highest_perf); > +cpufreq_freq_attr_rw(energy_performance_preference); > +cpufreq_freq_attr_ro(energy_performance_available_preferences); > > static struct freq_attr *amd_pstate_attr[] = { > &amd_pstate_max_freq, > @@ -614,6 +785,181 @@ static struct freq_attr *amd_pstate_attr[] = { > NULL, > }; > > +static struct freq_attr *amd_pstate_epp_attr[] = { > + &amd_pstate_max_freq, > + &amd_pstate_lowest_nonlinear_freq, > + &amd_pstate_highest_perf, > + &energy_performance_preference, > + &energy_performance_available_preferences, > + NULL, > +}; > + > +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) > +{ > + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; > + struct amd_cpudata *cpudata; > + struct device *dev; > + int rc; > + u64 value; > + > + dev = get_cpu_device(policy->cpu); > + if (!dev) > + goto free_cpudata1; > + > + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); > + if (!cpudata) > + return -ENOMEM; > + > + cpudata->cpu = policy->cpu; > + cpudata->epp_policy = 0; > + > + rc = amd_pstate_init_perf(cpudata); > + if (rc) > + goto free_cpudata1; > + > + min_freq = amd_get_min_freq(cpudata); > + max_freq = amd_get_max_freq(cpudata); > + nominal_freq = amd_get_nominal_freq(cpudata); > + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); > + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { > + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", > + min_freq, max_freq); > + ret = -EINVAL; > + goto free_cpudata1; > + } > + > + policy->cpuinfo.min_freq = min_freq; > + policy->cpuinfo.max_freq = max_freq; > + /* It will be updated by governor */ > + policy->cur = policy->cpuinfo.min_freq; > + > + /* Initial processor data capability frequencies */ > + cpudata->max_freq = max_freq; > + cpudata->min_freq = min_freq; > + cpudata->nominal_freq = nominal_freq; > + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; > + > + policy->driver_data = cpudata; > + > + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value); As the warning which reminded by 0day, should be amd_pstate_get_epp(cpudata, 0). Others look good for me, with those fixed, please feel free to add Acked-by: Huang Rui <ray.huang@amd.com> > + > + policy->min = policy->cpuinfo.min_freq; > + policy->max = policy->cpuinfo.max_freq; > + > + /* > + * Set the policy to powersave to provide a valid fallback value in case > + * the default cpufreq governor is neither powersave nor performance. > + */ > + policy->policy = CPUFREQ_POLICY_POWERSAVE; > + > + if (boot_cpu_has(X86_FEATURE_CPPC)) { > + policy->fast_switch_possible = true; > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); > + if (ret) > + return ret; > + WRITE_ONCE(cpudata->cppc_req_cached, value); > + > + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); > + if (ret) > + return ret; > + WRITE_ONCE(cpudata->cppc_cap1_cached, value); > + } > + amd_pstate_boost_init(cpudata); > + > + return 0; > + > +free_cpudata1: > + kfree(cpudata); > + return ret; > +} > + > +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) > +{ > + pr_debug("CPU %d exiting\n", policy->cpu); > + policy->fast_switch_possible = false; > + return 0; > +} > + > +static void amd_pstate_epp_init(unsigned int cpu) > +{ > + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); > + struct amd_cpudata *cpudata = policy->driver_data; > + u32 max_perf, min_perf; > + u64 value; > + s16 epp; > + > + max_perf = READ_ONCE(cpudata->highest_perf); > + min_perf = READ_ONCE(cpudata->lowest_perf); > + > + value = READ_ONCE(cpudata->cppc_req_cached); > + > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) > + min_perf = max_perf; > + > + /* Initial min/max values for CPPC Performance Controls Register */ > + value &= ~AMD_CPPC_MIN_PERF(~0L); > + value |= AMD_CPPC_MIN_PERF(min_perf); > + > + value &= ~AMD_CPPC_MAX_PERF(~0L); > + value |= AMD_CPPC_MAX_PERF(max_perf); > + > + /* CPPC EPP feature require to set zero to the desire perf bit */ > + value &= ~AMD_CPPC_DES_PERF(~0L); > + value |= AMD_CPPC_DES_PERF(0); > + > + if (cpudata->epp_policy == cpudata->policy) > + goto skip_epp; > + > + cpudata->epp_policy = cpudata->policy; > + > + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { > + epp = amd_pstate_get_epp(cpudata, value); > + if (epp < 0) > + goto skip_epp; > + /* force the epp value to be zero for performance policy */ > + epp = 0; > + } else { > + /* Get BIOS pre-defined epp value */ > + epp = amd_pstate_get_epp(cpudata, value); > + if (epp) > + goto skip_epp; > + } > + /* Set initial EPP value */ > + if (boot_cpu_has(X86_FEATURE_CPPC)) { > + value &= ~GENMASK_ULL(31, 24); > + value |= (u64)epp << 24; > + } > + > +skip_epp: > + WRITE_ONCE(cpudata->cppc_req_cached, value); > + amd_pstate_set_epp(cpudata, epp); > + cpufreq_cpu_put(policy); > +} > + > +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) > +{ > + struct amd_cpudata *cpudata = policy->driver_data; > + > + if (!policy->cpuinfo.max_freq) > + return -ENODEV; > + > + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", > + policy->cpuinfo.max_freq, policy->max); > + > + cpudata->policy = policy->policy; > + > + amd_pstate_epp_init(policy->cpu); > + > + return 0; > +} > + > +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) > +{ > + cpufreq_verify_within_cpu_limits(policy); > + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); > + return 0; > +} > + > static struct cpufreq_driver amd_pstate_driver = { > .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, > .verify = amd_pstate_verify, > @@ -627,6 +973,16 @@ static struct cpufreq_driver amd_pstate_driver = { > .attr = amd_pstate_attr, > }; > > +static struct cpufreq_driver amd_pstate_epp_driver = { > + .flags = CPUFREQ_CONST_LOOPS, > + .verify = amd_pstate_epp_verify_policy, > + .setpolicy = amd_pstate_epp_set_policy, > + .init = amd_pstate_epp_cpu_init, > + .exit = amd_pstate_epp_cpu_exit, > + .name = "amd_pstate_epp", > + .attr = amd_pstate_epp_attr, > +}; > + > static int __init amd_pstate_init(void) > { > int ret; > @@ -655,7 +1011,8 @@ static int __init amd_pstate_init(void) > /* capability check */ > if (boot_cpu_has(X86_FEATURE_CPPC)) { > pr_debug("AMD CPPC MSR based functionality is supported\n"); > - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; > + if (cppc_state == AMD_PSTATE_PASSIVE) > + current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; > } else { > pr_debug("AMD CPPC shared memory based functionality is supported\n"); > static_call_update(amd_pstate_enable, cppc_enable); > @@ -666,14 +1023,13 @@ static int __init amd_pstate_init(void) > /* enable amd pstate feature */ > ret = amd_pstate_enable(true); > if (ret) { > - pr_err("failed to enable amd-pstate with return %d\n", ret); > + pr_err("failed to enable with return %d\n", ret); > return ret; > } > > - ret = cpufreq_register_driver(&amd_pstate_driver); > + ret = cpufreq_register_driver(current_pstate_driver); > if (ret) > - pr_err("failed to register amd_pstate_driver with return %d\n", > - ret); > + pr_err("failed to register with return %d\n", ret); > > return ret; > } > @@ -695,6 +1051,12 @@ static int __init amd_pstate_param(char *str) > if (cppc_state == AMD_PSTATE_DISABLE) > pr_info("driver is explicitly disabled\n"); > > + if (cppc_state == AMD_PSTATE_ACTIVE) > + current_pstate_driver = &amd_pstate_epp_driver; > + > + if (cppc_state == AMD_PSTATE_PASSIVE) > + current_pstate_driver = &amd_pstate_driver; > + > return 0; > } > > diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h > index dae2ce0f6735..8341a2a2948a 100644 > --- a/include/linux/amd-pstate.h > +++ b/include/linux/amd-pstate.h > @@ -47,6 +47,10 @@ struct amd_aperf_mperf { > * @prev: Last Aperf/Mperf/tsc count value read from register > * @freq: current cpu frequency value > * @boost_supported: check whether the Processor or SBIOS supports boost mode > + * @epp_policy: Last saved policy used to set energy-performance preference > + * @epp_cached: Cached CPPC energy-performance preference value > + * @policy: Cpufreq policy value > + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value > * > * The amd_cpudata is key private data for each CPU thread in AMD P-State, and > * represents all the attributes and goals that AMD P-State requests at runtime. > @@ -72,6 +76,12 @@ struct amd_cpudata { > > u64 freq; > bool boost_supported; > + > + /* EPP feature related attributes*/ > + s16 epp_policy; > + s16 epp_cached; > + u32 policy; > + u64 cppc_cap1_cached; > }; > > /* > -- > 2.34.1 >
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1a8b31277620..9a92c71af80f 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -59,7 +59,9 @@ * we disable it by default to go acpi-cpufreq on these processors and add a * module parameter to be able to enable it manually for debugging. */ +static struct cpufreq_driver *current_pstate_driver; static struct cpufreq_driver amd_pstate_driver; +static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_DISABLE; static inline int get_mode_idx_from_str(const char *str, size_t size) @@ -73,6 +75,114 @@ static inline int get_mode_idx_from_str(const char *str, size_t size) return -EINVAL; } +static DEFINE_MUTEX(amd_pstate_limits_lock); +static DEFINE_MUTEX(amd_pstate_driver_lock); + +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached) +{ + u64 epp; + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + if (!cppc_req_cached) { + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, + &cppc_req_cached); + if (epp) + return epp; + } + epp = (cppc_req_cached >> 24) & 0xFF; + } else { + ret = cppc_get_epp_perf(cpudata->cpu, &epp); + if (ret < 0) { + pr_debug("Could not retrieve energy perf value (%d)\n", ret); + return -EIO; + } + } + + return (s16)(epp & 0xff); +} + +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata) +{ + s16 epp; + int index = -EINVAL; + + epp = amd_pstate_get_epp(cpudata, 0); + if (epp < 0) + return epp; + + switch (epp) { + case HWP_EPP_PERFORMANCE: + index = EPP_INDEX_PERFORMANCE; + break; + case HWP_EPP_BALANCE_PERFORMANCE: + index = EPP_INDEX_BALANCE_PERFORMANCE; + break; + case HWP_EPP_BALANCE_POWERSAVE: + index = EPP_INDEX_BALANCE_POWERSAVE; + break; + case HWP_EPP_POWERSAVE: + index = EPP_INDEX_POWERSAVE; + break; + default: + break; + } + + return index; +} + +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp) +{ + int ret; + struct cppc_perf_ctrls perf_ctrls; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 value = READ_ONCE(cpudata->cppc_req_cached); + + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value); + if (!ret) + cpudata->epp_cached = epp; + } else { + perf_ctrls.energy_perf = epp; + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1); + if (ret) { + pr_debug("failed to set energy perf value (%d)\n", ret); + return ret; + } + cpudata->epp_cached = epp; + } + + return ret; +} + +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata, + int pref_index) +{ + int epp = -EINVAL; + int ret; + + if (!pref_index) { + pr_debug("EPP pref_index is invalid\n"); + return -EINVAL; + } + + if (epp == -EINVAL) + epp = epp_values[pref_index]; + + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("EPP cannot be set under performance policy\n"); + return -EBUSY; + } + + ret = amd_pstate_set_epp(cpudata, epp); + + return ret; +} + static inline int pstate_enable(bool enable) { return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable); @@ -81,11 +191,21 @@ static inline int pstate_enable(bool enable) static int cppc_enable(bool enable) { int cpu, ret = 0; + struct cppc_perf_ctrls perf_ctrls; for_each_present_cpu(cpu) { ret = cppc_set_enable(cpu, enable); if (ret) return ret; + + /* Enable autonomous mode for EPP */ + if (cppc_state == AMD_PSTATE_ACTIVE) { + /* Set desired perf as zero to allow EPP firmware control */ + perf_ctrls.desired_perf = 0; + ret = cppc_set_perf(cpu, &perf_ctrls); + if (ret) + return ret; + } } return ret; @@ -428,7 +548,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) return; cpudata->boost_supported = true; - amd_pstate_driver.boost_enabled = true; + current_pstate_driver->boost_enabled = true; } static void amd_perf_ctl_reset(unsigned int cpu) @@ -602,10 +722,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sprintf(&buf[0], "%u\n", perf); } +static ssize_t show_energy_performance_available_preferences( + struct cpufreq_policy *policy, char *buf) +{ + int i = 0; + int offset = 0; + + while (energy_perf_strings[i] != NULL) + offset += sysfs_emit_at(buf, offset, "%s ", energy_perf_strings[i++]); + + sysfs_emit_at(buf, offset, "\n"); + + return offset; +} + +static ssize_t store_energy_performance_preference( + struct cpufreq_policy *policy, const char *buf, size_t count) +{ + struct amd_cpudata *cpudata = policy->driver_data; + char str_preference[21]; + ssize_t ret; + + ret = sscanf(buf, "%20s", str_preference); + if (ret != 1) + return -EINVAL; + + ret = match_string(energy_perf_strings, -1, str_preference); + if (ret < 0) + return -EINVAL; + + mutex_lock(&amd_pstate_limits_lock); + ret = amd_pstate_set_energy_pref_index(cpudata, ret); + mutex_unlock(&amd_pstate_limits_lock); + + return ret ?: count; +} + +static ssize_t show_energy_performance_preference( + struct cpufreq_policy *policy, char *buf) +{ + struct amd_cpudata *cpudata = policy->driver_data; + int preference; + + preference = amd_pstate_get_energy_pref_index(cpudata); + if (preference < 0) + return preference; + + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_rw(energy_performance_preference); +cpufreq_freq_attr_ro(energy_performance_available_preferences); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, @@ -614,6 +785,181 @@ static struct freq_attr *amd_pstate_attr[] = { NULL, }; +static struct freq_attr *amd_pstate_epp_attr[] = { + &amd_pstate_max_freq, + &amd_pstate_lowest_nonlinear_freq, + &amd_pstate_highest_perf, + &energy_performance_preference, + &energy_performance_available_preferences, + NULL, +}; + +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) +{ + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; + struct amd_cpudata *cpudata; + struct device *dev; + int rc; + u64 value; + + dev = get_cpu_device(policy->cpu); + if (!dev) + goto free_cpudata1; + + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL); + if (!cpudata) + return -ENOMEM; + + cpudata->cpu = policy->cpu; + cpudata->epp_policy = 0; + + rc = amd_pstate_init_perf(cpudata); + if (rc) + goto free_cpudata1; + + min_freq = amd_get_min_freq(cpudata); + max_freq = amd_get_max_freq(cpudata); + nominal_freq = amd_get_nominal_freq(cpudata); + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata); + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) { + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n", + min_freq, max_freq); + ret = -EINVAL; + goto free_cpudata1; + } + + policy->cpuinfo.min_freq = min_freq; + policy->cpuinfo.max_freq = max_freq; + /* It will be updated by governor */ + policy->cur = policy->cpuinfo.min_freq; + + /* Initial processor data capability frequencies */ + cpudata->max_freq = max_freq; + cpudata->min_freq = min_freq; + cpudata->nominal_freq = nominal_freq; + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; + + policy->driver_data = cpudata; + + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value); + + policy->min = policy->cpuinfo.min_freq; + policy->max = policy->cpuinfo.max_freq; + + /* + * Set the policy to powersave to provide a valid fallback value in case + * the default cpufreq governor is neither powersave nor performance. + */ + policy->policy = CPUFREQ_POLICY_POWERSAVE; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + policy->fast_switch_possible = true; + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value); + if (ret) + return ret; + WRITE_ONCE(cpudata->cppc_req_cached, value); + + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value); + if (ret) + return ret; + WRITE_ONCE(cpudata->cppc_cap1_cached, value); + } + amd_pstate_boost_init(cpudata); + + return 0; + +free_cpudata1: + kfree(cpudata); + return ret; +} + +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) +{ + pr_debug("CPU %d exiting\n", policy->cpu); + policy->fast_switch_possible = false; + return 0; +} + +static void amd_pstate_epp_init(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + u32 max_perf, min_perf; + u64 value; + s16 epp; + + max_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); + + value = READ_ONCE(cpudata->cppc_req_cached); + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) + min_perf = max_perf; + + /* Initial min/max values for CPPC Performance Controls Register */ + value &= ~AMD_CPPC_MIN_PERF(~0L); + value |= AMD_CPPC_MIN_PERF(min_perf); + + value &= ~AMD_CPPC_MAX_PERF(~0L); + value |= AMD_CPPC_MAX_PERF(max_perf); + + /* CPPC EPP feature require to set zero to the desire perf bit */ + value &= ~AMD_CPPC_DES_PERF(~0L); + value |= AMD_CPPC_DES_PERF(0); + + if (cpudata->epp_policy == cpudata->policy) + goto skip_epp; + + cpudata->epp_policy = cpudata->policy; + + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) { + epp = amd_pstate_get_epp(cpudata, value); + if (epp < 0) + goto skip_epp; + /* force the epp value to be zero for performance policy */ + epp = 0; + } else { + /* Get BIOS pre-defined epp value */ + epp = amd_pstate_get_epp(cpudata, value); + if (epp) + goto skip_epp; + } + /* Set initial EPP value */ + if (boot_cpu_has(X86_FEATURE_CPPC)) { + value &= ~GENMASK_ULL(31, 24); + value |= (u64)epp << 24; + } + +skip_epp: + WRITE_ONCE(cpudata->cppc_req_cached, value); + amd_pstate_set_epp(cpudata, epp); + cpufreq_cpu_put(policy); +} + +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy) +{ + struct amd_cpudata *cpudata = policy->driver_data; + + if (!policy->cpuinfo.max_freq) + return -ENODEV; + + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n", + policy->cpuinfo.max_freq, policy->max); + + cpudata->policy = policy->policy; + + amd_pstate_epp_init(policy->cpu); + + return 0; +} + +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy) +{ + cpufreq_verify_within_cpu_limits(policy); + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min); + return 0; +} + static struct cpufreq_driver amd_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS, .verify = amd_pstate_verify, @@ -627,6 +973,16 @@ static struct cpufreq_driver amd_pstate_driver = { .attr = amd_pstate_attr, }; +static struct cpufreq_driver amd_pstate_epp_driver = { + .flags = CPUFREQ_CONST_LOOPS, + .verify = amd_pstate_epp_verify_policy, + .setpolicy = amd_pstate_epp_set_policy, + .init = amd_pstate_epp_cpu_init, + .exit = amd_pstate_epp_cpu_exit, + .name = "amd_pstate_epp", + .attr = amd_pstate_epp_attr, +}; + static int __init amd_pstate_init(void) { int ret; @@ -655,7 +1011,8 @@ static int __init amd_pstate_init(void) /* capability check */ if (boot_cpu_has(X86_FEATURE_CPPC)) { pr_debug("AMD CPPC MSR based functionality is supported\n"); - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; + if (cppc_state == AMD_PSTATE_PASSIVE) + current_pstate_driver->adjust_perf = amd_pstate_adjust_perf; } else { pr_debug("AMD CPPC shared memory based functionality is supported\n"); static_call_update(amd_pstate_enable, cppc_enable); @@ -666,14 +1023,13 @@ static int __init amd_pstate_init(void) /* enable amd pstate feature */ ret = amd_pstate_enable(true); if (ret) { - pr_err("failed to enable amd-pstate with return %d\n", ret); + pr_err("failed to enable with return %d\n", ret); return ret; } - ret = cpufreq_register_driver(&amd_pstate_driver); + ret = cpufreq_register_driver(current_pstate_driver); if (ret) - pr_err("failed to register amd_pstate_driver with return %d\n", - ret); + pr_err("failed to register with return %d\n", ret); return ret; } @@ -695,6 +1051,12 @@ static int __init amd_pstate_param(char *str) if (cppc_state == AMD_PSTATE_DISABLE) pr_info("driver is explicitly disabled\n"); + if (cppc_state == AMD_PSTATE_ACTIVE) + current_pstate_driver = &amd_pstate_epp_driver; + + if (cppc_state == AMD_PSTATE_PASSIVE) + current_pstate_driver = &amd_pstate_driver; + return 0; } diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index dae2ce0f6735..8341a2a2948a 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -47,6 +47,10 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @epp_policy: Last saved policy used to set energy-performance preference + * @epp_cached: Cached CPPC energy-performance preference value + * @policy: Cpufreq policy value + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value * * The amd_cpudata is key private data for each CPU thread in AMD P-State, and * represents all the attributes and goals that AMD P-State requests at runtime. @@ -72,6 +76,12 @@ struct amd_cpudata { u64 freq; bool boost_supported; + + /* EPP feature related attributes*/ + s16 epp_policy; + s16 epp_cached; + u32 policy; + u64 cppc_cap1_cached; }; /*