diff mbox series

[v2,2/3] rt-tests: cyclictest: Support idle state disabling via libcpupower

Message ID 20241113114509.1058593-3-tglozar@redhat.com
State New
Headers show
Series rt-tests: cyclictest: Support idle state disabling via libcpupower | expand

Commit Message

Tomas Glozar Nov. 13, 2024, 11:45 a.m. UTC
From: Tomas Glozar <tglozar@redhat.com>

cyclictest allows reducing latency on wake up from idle by setting
/dev/cpu_dma_latency during the measurement. This has an effect on
the idle states of all CPUs, including those which are not included
in the measurement.

Add option --deepest-idle-state that allows limiting the idle state
only on cpus where the measurement is running.

libcpupower is used to do the disabling of idle states via
the corresponding sysfs interface.

Note: The feature was first implemented for rtla-timerlat, this
implementation is based on the rtla one.

Signed-off-by: Tomas Glozar <tglozar@redhat.com>
---
 src/cyclictest/cyclictest.c | 205 +++++++++++++++++++++++++++++++++++-
 1 file changed, 204 insertions(+), 1 deletion(-)

Comments

John Kacur Nov. 26, 2024, 10:29 p.m. UTC | #1
On Wed, 13 Nov 2024, tglozar@redhat.com wrote:

> From: Tomas Glozar <tglozar@redhat.com>
> 
> cyclictest allows reducing latency on wake up from idle by setting
> /dev/cpu_dma_latency during the measurement. This has an effect on
> the idle states of all CPUs, including those which are not included
> in the measurement.
> 
> Add option --deepest-idle-state that allows limiting the idle state
> only on cpus where the measurement is running.
> 
> libcpupower is used to do the disabling of idle states via
> the corresponding sysfs interface.
> 
> Note: The feature was first implemented for rtla-timerlat, this
> implementation is based on the rtla one.
> 
> Signed-off-by: Tomas Glozar <tglozar@redhat.com>
> ---
>  src/cyclictest/cyclictest.c | 205 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 204 insertions(+), 1 deletion(-)
> 
> diff --git a/src/cyclictest/cyclictest.c b/src/cyclictest/cyclictest.c
> index 1ce62cf..b1f8420 100644
> --- a/src/cyclictest/cyclictest.c
> +++ b/src/cyclictest/cyclictest.c
> @@ -8,6 +8,9 @@
>   * (C) 2005-2007 Thomas Gleixner <tglx@linutronix.de>
>   *
>   */
> +#ifdef HAVE_LIBCPUPOWER_SUPPORT
> +#include <cpuidle.h>
> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <stdint.h>
> @@ -223,6 +226,8 @@ static void rstat_setup(void);
>  static int latency_target_fd = -1;
>  static int32_t latency_target_value = 0;
>  
> +static int deepest_idle_state = -2;
> +
>  static int rstat_ftruncate(int fd, off_t len);
>  static int rstat_fd = -1;
>  /* strlen("/cyclictest") + digits in max pid len + '\0' */
> @@ -254,6 +259,11 @@ static void set_latency_target(void)
>  		return;
>  	}
>  
> +	if (deepest_idle_state >= -1) {
> +		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");

I don't think we want to have a warning when the software is doing what we 
request of it.
Can we either just move the logic out of this function into main and
call either set_latency_target or the deepest latency state logic as 
appropriate, or move all the power management logic into a new function?


> +		return;
> +	}
> +
>  	errno = 0;
>  	err = stat("/dev/cpu_dma_latency", &s);
>  	if (err == -1) {
> @@ -278,6 +288,161 @@ static void set_latency_target(void)
>  	printf("# /dev/cpu_dma_latency set to %dus\n", latency_target_value);
>  }
>  
> +#ifdef HAVE_LIBCPUPOWER_SUPPORT
> +static unsigned int **saved_cpu_idle_disable_state;
> +static size_t saved_cpu_idle_disable_state_alloc_ctr;
> +
> +/*
> + * save_cpu_idle_state_disable - save disable for all idle states of a cpu
> + *
> + * Saves the current disable of all idle states of a cpu, to be subsequently
> + * restored via restore_cpu_idle_disable_state.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +static int save_cpu_idle_disable_state(unsigned int cpu)
> +{
> +	unsigned int nr_states;
> +	unsigned int state;
> +	int disabled;
> +	int nr_cpus;
> +
> +	nr_states = cpuidle_state_count(cpu);
> +
> +	if (nr_states == 0)
> +		return 0;
> +
> +	if (saved_cpu_idle_disable_state == NULL) {
> +		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
> +		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
> +		if (!saved_cpu_idle_disable_state)
> +			return -1;
> +	}
> +
> +	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
> +	if (!saved_cpu_idle_disable_state[cpu])
> +		return -1;
> +	saved_cpu_idle_disable_state_alloc_ctr++;
> +
> +	for (state = 0; state < nr_states; state++) {
> +		disabled = cpuidle_is_state_disabled(cpu, state);
> +		if (disabled < 0)
> +			return disabled;
> +		saved_cpu_idle_disable_state[cpu][state] = disabled;
> +	}
> +
> +	return nr_states;
> +}
> +
> +/*
> + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
> + *
> + * Restores the current disable state of all idle states of a cpu that was
> + * previously saved by save_cpu_idle_disable_state.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +static int restore_cpu_idle_disable_state(unsigned int cpu)
> +{
> +	unsigned int nr_states;
> +	unsigned int state;
> +	int disabled;
> +	int result;
> +
> +	nr_states = cpuidle_state_count(cpu);
> +
> +	if (nr_states == 0)
> +		return 0;
> +
> +	if (!saved_cpu_idle_disable_state)
> +		return -1;
> +
> +	for (state = 0; state < nr_states; state++) {
> +		if (!saved_cpu_idle_disable_state[cpu])
> +			return -1;
> +		disabled = saved_cpu_idle_disable_state[cpu][state];
> +		result = cpuidle_state_disable(cpu, state, disabled);
> +		if (result < 0)
> +			return result;
> +	}
> +
> +	free(saved_cpu_idle_disable_state[cpu]);
> +	saved_cpu_idle_disable_state[cpu] = NULL;
> +	saved_cpu_idle_disable_state_alloc_ctr--;
> +	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
> +		free(saved_cpu_idle_disable_state);
> +		saved_cpu_idle_disable_state = NULL;
> +	}
> +
> +	return nr_states;
> +}
> +
> +/*
> + * free_cpu_idle_disable_states - free saved idle state disable for all cpus
> + *
> + * Frees the memory used for storing cpu idle state disable for all cpus
> + * and states.
> + *
> + * Normally, the memory is freed automatically in
> + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
> + * error.
> + */
> +static void free_cpu_idle_disable_states(void)
> +{
> +	int cpu;
> +	int nr_cpus;
> +
> +	if (!saved_cpu_idle_disable_state)
> +		return;
> +
> +	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
> +
> +	for (cpu = 0; cpu < nr_cpus; cpu++) {
> +		free(saved_cpu_idle_disable_state[cpu]);
> +		saved_cpu_idle_disable_state[cpu] = NULL;
> +	}
> +
> +	free(saved_cpu_idle_disable_state);
> +	saved_cpu_idle_disable_state = NULL;
> +}
> +
> +/*
> + * set_deepest_cpu_idle_state - limit idle state of cpu
> + *
> + * Disables all idle states deeper than the one given in
> + * deepest_state (assuming states with higher number are deeper).
> + *
> + * This is used to reduce the exit from idle latency. Unlike
> + * set_cpu_dma_latency, it can disable idle states per cpu.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +static int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
> +{
> +	unsigned int nr_states;
> +	unsigned int state;
> +	int result;
> +
> +	nr_states = cpuidle_state_count(cpu);
> +
> +	for (state = deepest_state + 1; state < nr_states; state++) {
> +		result = cpuidle_state_disable(cpu, state, 1);
> +		if (result < 0)
> +			return result;
> +	}
> +
> +	return nr_states;
> +}
> +
> +static inline int have_libcpupower_support(void) { return 1; }
> +#else
> +static inline int save_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
> +static inline int restore_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
> +static inline void free_cpu_idle_disable_states(void) { }
> +static inline int set_deepest_cpu_idle_state(__attribute__((unused)) unsigned int cpu,
> +											 __attribute__((unused)) unsigned int state) { return -1; }
> +static inline int have_libcpupower_support(void) { return 0; }
> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */
>  
>  enum {
>  	ERROR_GENERAL	= -1,
> @@ -779,6 +944,10 @@ static void display_help(int error)
>  	       "-c CLOCK --clock=CLOCK     select clock\n"
>  	       "                           0 = CLOCK_MONOTONIC (default)\n"
>  	       "                           1 = CLOCK_REALTIME\n"
> +	       "         --deepest-idle-state=n\n"
> +	       "                           Reduce exit from idle latency by limiting idle state\n"
> +	       "                           up to n on used cpus (-1 disables all idle states).\n"
> +	       "                           Power management is not suppresed on other cpus.\n"
>  	       "         --default-system  Don't attempt to tune the system from cyclictest.\n"
>  	       "                           Power management is not suppressed.\n"
>  	       "                           This might give poorer results, but will allow you\n"
> @@ -919,7 +1088,7 @@ enum option_values {
>  	OPT_TRIGGER_NODES, OPT_UNBUFFERED, OPT_NUMA, OPT_VERBOSE,
>  	OPT_DBGCYCLIC, OPT_POLICY, OPT_HELP, OPT_NUMOPTS,
>  	OPT_ALIGNED, OPT_SECALIGNED, OPT_LAPTOP, OPT_SMI,
> -	OPT_TRACEMARK, OPT_POSIX_TIMERS,
> +	OPT_TRACEMARK, OPT_POSIX_TIMERS, OPT_DEEPEST_IDLE_STATE,
>  };
>  
>  /* Process commandline options */
> @@ -975,6 +1144,7 @@ static void process_options(int argc, char *argv[], int max_cpus)
>  			{"policy",           required_argument, NULL, OPT_POLICY },
>  			{"help",             no_argument,       NULL, OPT_HELP },
>  			{"posix_timers",     no_argument,	NULL, OPT_POSIX_TIMERS },
> +			{"deepest-idle-state", required_argument,	NULL, OPT_DEEPEST_IDLE_STATE },
>  			{NULL, 0, NULL, 0 },
>  		};
>  		int c = getopt_long(argc, argv, "a::A::b:c:d:D:F:h:H:i:l:MNo:p:mqrRsSt::uvD:x",
> @@ -1175,6 +1345,9 @@ static void process_options(int argc, char *argv[], int max_cpus)
>  			break;
>  		case OPT_TRACEMARK:
>  			trace_marker = 1; break;
> +		case OPT_DEEPEST_IDLE_STATE:
> +			deepest_idle_state = atoi(optarg);
> +			break;
>  		}
>  	}
>  
> @@ -1782,6 +1955,26 @@ int main(int argc, char **argv)
>  	/* use the /dev/cpu_dma_latency trick if it's there */
>  	set_latency_target();
>  
> +	if (deepest_idle_state >= -1) {
> +		if (!have_libcpupower_support()) {
> +			fprintf(stderr, "cyclictest built without libcpupower, --deepest-idle-state is not supported\n");
> +			goto out;
> +		}
> +
> +		for (i = 0; i < max_cpus; i++) {
> +			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
> +				continue;
> +			if (save_cpu_idle_disable_state(i) < 0) {
> +				fprintf(stderr, "Could not save cpu idle state.\n");
> +				goto out;
> +			}
> +			if (set_deepest_cpu_idle_state(i, deepest_idle_state) < 0) {
> +				fprintf(stderr, "Could not set deepest cpu idle state.\n");
> +				goto out;
> +			}
> +		}
> +	}
> +
>  	if (tracelimit && trace_marker)
>  		enable_trace_mark();
>  
> @@ -2147,6 +2340,16 @@ int main(int argc, char **argv)
>  	if (latency_target_fd >= 0)
>  		close(latency_target_fd);
>  
> +	/* restore and free cpu idle disable states */
> +	if (deepest_idle_state >= -1) {
> +		for (i = 0; i < max_cpus; i++) {
> +			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
> +				continue;
> +			restore_cpu_idle_disable_state(i);
> +		}
> +	}
> +	free_cpu_idle_disable_states();
> +
>  	if (affinity_mask)
>  		rt_bitmask_free(affinity_mask);
>  
> -- 
> 2.47.0
> 
> 
>
Crystal Wood Nov. 27, 2024, 12:08 a.m. UTC | #2
On Tue, 2024-11-26 at 17:29 -0500, John Kacur wrote:
> 
> On Wed, 13 Nov 2024, tglozar@redhat.com wrote:
> 
> 
> > @@ -254,6 +259,11 @@ static void set_latency_target(void)
> >  		return;
> >  	}
> >  
> > +	if (deepest_idle_state >= -1) {
> > +		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");
> 
> I don't think we want to have a warning when the software is doing what we 
> request of it.
> Can we either just move the logic out of this function into main and
> call either set_latency_target or the deepest latency state logic as 
> appropriate, or move all the power management logic into a new function?

This could be said about the laptop and power_management checks too...
I'd go with verbose info prints rather than warnings for all three, if
anything.

I'm not sure how cluttering up main() with more logic would help, but
turning set_latency_target() into something like
setup_power()/cleanup_power() sounds good.

-Crystal
Tomas Glozar Nov. 27, 2024, 9:45 a.m. UTC | #3
st 27. 11. 2024 v 1:09 odesílatel Crystal Wood <crwood@redhat.com> napsal:
>
> This could be said about the laptop and power_management checks too...
> I'd go with verbose info prints rather than warnings for all three, if
> anything.
>

I agree. I believe my code is consistent with what we already have.
When you do cyclictest --default-system, you also get a warning about
not setting cpu_dma_latency, despite it being what you have explicitly
requested. My patch just does the same thing for --deepest-idle-state.

I suggest deferring this to a future patch that would remove the
warnings and another one to refactor the code.

> I'm not sure how cluttering up main() with more logic would help, but
> turning set_latency_target() into something like
> setup_power()/cleanup_power() sounds good.
>
> -Crystal
>

Yeah, main() is already long enough, I'd prefer to avoid cluttering it up more.

Tomas
John Kacur Nov. 27, 2024, 3:47 p.m. UTC | #4
On Tue, 26 Nov 2024, Crystal Wood wrote:

> On Tue, 2024-11-26 at 17:29 -0500, John Kacur wrote:
> > 
> > On Wed, 13 Nov 2024, tglozar@redhat.com wrote:
> > 
> > 
> > > @@ -254,6 +259,11 @@ static void set_latency_target(void)
> > >  		return;
> > >  	}
> > >  
> > > +	if (deepest_idle_state >= -1) {
> > > +		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");
> > 
> > I don't think we want to have a warning when the software is doing what we 
> > request of it.
> > Can we either just move the logic out of this function into main and
> > call either set_latency_target or the deepest latency state logic as 
> > appropriate, or move all the power management logic into a new function?
> 
> This could be said about the laptop and power_management checks too...

true

> I'd go with verbose info prints rather than warnings for all three, if
> anything.

Note that verbose on cyclictest doesn't mean print extra warnings, it 
means output values on stdout for statistics.

> 
> I'm not sure how cluttering up main() with more logic would help, but
> turning set_latency_target() into something like
> setup_power()/cleanup_power() sounds good.
>
 

Sure, that's why I gave two options, however, there is already 
--deepest-idle-state logic in main(), how much more clutter does it add to
not call set_latency_target() if we're using --deepest-idle-state?

John
John Kacur Nov. 27, 2024, 3:51 p.m. UTC | #5
On Wed, 27 Nov 2024, Tomas Glozar wrote:

> st 27. 11. 2024 v 1:09 odesílatel Crystal Wood <crwood@redhat.com> napsal:
> >
> > This could be said about the laptop and power_management checks too...
> > I'd go with verbose info prints rather than warnings for all three, if
> > anything.
> >
> 
> I agree. I believe my code is consistent with what we already have.
> When you do cyclictest --default-system, you also get a warning about
> not setting cpu_dma_latency, despite it being what you have explicitly
> requested. My patch just does the same thing for --deepest-idle-state.
> 
> I suggest deferring this to a future patch that would remove the
> warnings and another one to refactor the code.


> 
> > I'm not sure how cluttering up main() with more logic would help, but
> > turning set_latency_target() into something like
> > setup_power()/cleanup_power() sounds good.
> >
> > -Crystal
> >
> 
> Yeah, main() is already long enough, I'd prefer to avoid cluttering it up more.
> 
> Tomas

Alright, you convinced me.

Signed-off-by: John Kacur <jkacur@redhat.com>
Crystal Wood Dec. 2, 2024, 7:30 p.m. UTC | #6
On Wed, 2024-11-27 at 10:47 -0500, John Kacur wrote:
> 
> On Tue, 26 Nov 2024, Crystal Wood wrote:
> 
> > On Tue, 2024-11-26 at 17:29 -0500, John Kacur wrote:
> > 
> > > I don't think we want to have a warning when the software is doing what we 
> > > request of it.
> > > Can we either just move the logic out of this function into main and
> > > call either set_latency_target or the deepest latency state logic as 
> > > appropriate, or move all the power management logic into a new function?
> > 
> > This could be said about the laptop and power_management checks too...
> 
> true
> 
> > I'd go with verbose info prints rather than warnings for all three, if
> > anything.
> 
> Note that verbose on cyclictest doesn't mean print extra warnings, it 
> means output values on stdout for statistics.

That's what the help says, but it's already used to gate other things
that don't seem to be related, at least at first glance...

> 
> > 
> > I'm not sure how cluttering up main() with more logic would help, but
> > turning set_latency_target() into something like
> > setup_power()/cleanup_power() sounds good.
> > 
>  
> 
> Sure, that's why I gave two options, however, there is already 
> --deepest-idle-state logic in main(), how much more clutter does it add to
> not call set_latency_target() if we're using --deepest-idle-state?

I meant moving the deepest idle stuff out of main() and into
setup_power()/cleanup_power().  In other words, I was agreeing with your
second option.

-Crystal
diff mbox series

Patch

diff --git a/src/cyclictest/cyclictest.c b/src/cyclictest/cyclictest.c
index 1ce62cf..b1f8420 100644
--- a/src/cyclictest/cyclictest.c
+++ b/src/cyclictest/cyclictest.c
@@ -8,6 +8,9 @@ 
  * (C) 2005-2007 Thomas Gleixner <tglx@linutronix.de>
  *
  */
+#ifdef HAVE_LIBCPUPOWER_SUPPORT
+#include <cpuidle.h>
+#endif /* HAVE_LIBCPUPOWER_SUPPORT */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -223,6 +226,8 @@  static void rstat_setup(void);
 static int latency_target_fd = -1;
 static int32_t latency_target_value = 0;
 
+static int deepest_idle_state = -2;
+
 static int rstat_ftruncate(int fd, off_t len);
 static int rstat_fd = -1;
 /* strlen("/cyclictest") + digits in max pid len + '\0' */
@@ -254,6 +259,11 @@  static void set_latency_target(void)
 		return;
 	}
 
+	if (deepest_idle_state >= -1) {
+		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");
+		return;
+	}
+
 	errno = 0;
 	err = stat("/dev/cpu_dma_latency", &s);
 	if (err == -1) {
@@ -278,6 +288,161 @@  static void set_latency_target(void)
 	printf("# /dev/cpu_dma_latency set to %dus\n", latency_target_value);
 }
 
+#ifdef HAVE_LIBCPUPOWER_SUPPORT
+static unsigned int **saved_cpu_idle_disable_state;
+static size_t saved_cpu_idle_disable_state_alloc_ctr;
+
+/*
+ * save_cpu_idle_state_disable - save disable for all idle states of a cpu
+ *
+ * Saves the current disable of all idle states of a cpu, to be subsequently
+ * restored via restore_cpu_idle_disable_state.
+ *
+ * Return: idle state count on success, negative on error
+ */
+static int save_cpu_idle_disable_state(unsigned int cpu)
+{
+	unsigned int nr_states;
+	unsigned int state;
+	int disabled;
+	int nr_cpus;
+
+	nr_states = cpuidle_state_count(cpu);
+
+	if (nr_states == 0)
+		return 0;
+
+	if (saved_cpu_idle_disable_state == NULL) {
+		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
+		if (!saved_cpu_idle_disable_state)
+			return -1;
+	}
+
+	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
+	if (!saved_cpu_idle_disable_state[cpu])
+		return -1;
+	saved_cpu_idle_disable_state_alloc_ctr++;
+
+	for (state = 0; state < nr_states; state++) {
+		disabled = cpuidle_is_state_disabled(cpu, state);
+		if (disabled < 0)
+			return disabled;
+		saved_cpu_idle_disable_state[cpu][state] = disabled;
+	}
+
+	return nr_states;
+}
+
+/*
+ * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
+ *
+ * Restores the current disable state of all idle states of a cpu that was
+ * previously saved by save_cpu_idle_disable_state.
+ *
+ * Return: idle state count on success, negative on error
+ */
+static int restore_cpu_idle_disable_state(unsigned int cpu)
+{
+	unsigned int nr_states;
+	unsigned int state;
+	int disabled;
+	int result;
+
+	nr_states = cpuidle_state_count(cpu);
+
+	if (nr_states == 0)
+		return 0;
+
+	if (!saved_cpu_idle_disable_state)
+		return -1;
+
+	for (state = 0; state < nr_states; state++) {
+		if (!saved_cpu_idle_disable_state[cpu])
+			return -1;
+		disabled = saved_cpu_idle_disable_state[cpu][state];
+		result = cpuidle_state_disable(cpu, state, disabled);
+		if (result < 0)
+			return result;
+	}
+
+	free(saved_cpu_idle_disable_state[cpu]);
+	saved_cpu_idle_disable_state[cpu] = NULL;
+	saved_cpu_idle_disable_state_alloc_ctr--;
+	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
+		free(saved_cpu_idle_disable_state);
+		saved_cpu_idle_disable_state = NULL;
+	}
+
+	return nr_states;
+}
+
+/*
+ * free_cpu_idle_disable_states - free saved idle state disable for all cpus
+ *
+ * Frees the memory used for storing cpu idle state disable for all cpus
+ * and states.
+ *
+ * Normally, the memory is freed automatically in
+ * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
+ * error.
+ */
+static void free_cpu_idle_disable_states(void)
+{
+	int cpu;
+	int nr_cpus;
+
+	if (!saved_cpu_idle_disable_state)
+		return;
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		free(saved_cpu_idle_disable_state[cpu]);
+		saved_cpu_idle_disable_state[cpu] = NULL;
+	}
+
+	free(saved_cpu_idle_disable_state);
+	saved_cpu_idle_disable_state = NULL;
+}
+
+/*
+ * set_deepest_cpu_idle_state - limit idle state of cpu
+ *
+ * Disables all idle states deeper than the one given in
+ * deepest_state (assuming states with higher number are deeper).
+ *
+ * This is used to reduce the exit from idle latency. Unlike
+ * set_cpu_dma_latency, it can disable idle states per cpu.
+ *
+ * Return: idle state count on success, negative on error
+ */
+static int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
+{
+	unsigned int nr_states;
+	unsigned int state;
+	int result;
+
+	nr_states = cpuidle_state_count(cpu);
+
+	for (state = deepest_state + 1; state < nr_states; state++) {
+		result = cpuidle_state_disable(cpu, state, 1);
+		if (result < 0)
+			return result;
+	}
+
+	return nr_states;
+}
+
+static inline int have_libcpupower_support(void) { return 1; }
+#else
+static inline int save_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
+static inline int restore_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
+static inline void free_cpu_idle_disable_states(void) { }
+static inline int set_deepest_cpu_idle_state(__attribute__((unused)) unsigned int cpu,
+											 __attribute__((unused)) unsigned int state) { return -1; }
+static inline int have_libcpupower_support(void) { return 0; }
+#endif /* HAVE_LIBCPUPOWER_SUPPORT */
 
 enum {
 	ERROR_GENERAL	= -1,
@@ -779,6 +944,10 @@  static void display_help(int error)
 	       "-c CLOCK --clock=CLOCK     select clock\n"
 	       "                           0 = CLOCK_MONOTONIC (default)\n"
 	       "                           1 = CLOCK_REALTIME\n"
+	       "         --deepest-idle-state=n\n"
+	       "                           Reduce exit from idle latency by limiting idle state\n"
+	       "                           up to n on used cpus (-1 disables all idle states).\n"
+	       "                           Power management is not suppresed on other cpus.\n"
 	       "         --default-system  Don't attempt to tune the system from cyclictest.\n"
 	       "                           Power management is not suppressed.\n"
 	       "                           This might give poorer results, but will allow you\n"
@@ -919,7 +1088,7 @@  enum option_values {
 	OPT_TRIGGER_NODES, OPT_UNBUFFERED, OPT_NUMA, OPT_VERBOSE,
 	OPT_DBGCYCLIC, OPT_POLICY, OPT_HELP, OPT_NUMOPTS,
 	OPT_ALIGNED, OPT_SECALIGNED, OPT_LAPTOP, OPT_SMI,
-	OPT_TRACEMARK, OPT_POSIX_TIMERS,
+	OPT_TRACEMARK, OPT_POSIX_TIMERS, OPT_DEEPEST_IDLE_STATE,
 };
 
 /* Process commandline options */
@@ -975,6 +1144,7 @@  static void process_options(int argc, char *argv[], int max_cpus)
 			{"policy",           required_argument, NULL, OPT_POLICY },
 			{"help",             no_argument,       NULL, OPT_HELP },
 			{"posix_timers",     no_argument,	NULL, OPT_POSIX_TIMERS },
+			{"deepest-idle-state", required_argument,	NULL, OPT_DEEPEST_IDLE_STATE },
 			{NULL, 0, NULL, 0 },
 		};
 		int c = getopt_long(argc, argv, "a::A::b:c:d:D:F:h:H:i:l:MNo:p:mqrRsSt::uvD:x",
@@ -1175,6 +1345,9 @@  static void process_options(int argc, char *argv[], int max_cpus)
 			break;
 		case OPT_TRACEMARK:
 			trace_marker = 1; break;
+		case OPT_DEEPEST_IDLE_STATE:
+			deepest_idle_state = atoi(optarg);
+			break;
 		}
 	}
 
@@ -1782,6 +1955,26 @@  int main(int argc, char **argv)
 	/* use the /dev/cpu_dma_latency trick if it's there */
 	set_latency_target();
 
+	if (deepest_idle_state >= -1) {
+		if (!have_libcpupower_support()) {
+			fprintf(stderr, "cyclictest built without libcpupower, --deepest-idle-state is not supported\n");
+			goto out;
+		}
+
+		for (i = 0; i < max_cpus; i++) {
+			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
+				continue;
+			if (save_cpu_idle_disable_state(i) < 0) {
+				fprintf(stderr, "Could not save cpu idle state.\n");
+				goto out;
+			}
+			if (set_deepest_cpu_idle_state(i, deepest_idle_state) < 0) {
+				fprintf(stderr, "Could not set deepest cpu idle state.\n");
+				goto out;
+			}
+		}
+	}
+
 	if (tracelimit && trace_marker)
 		enable_trace_mark();
 
@@ -2147,6 +2340,16 @@  int main(int argc, char **argv)
 	if (latency_target_fd >= 0)
 		close(latency_target_fd);
 
+	/* restore and free cpu idle disable states */
+	if (deepest_idle_state >= -1) {
+		for (i = 0; i < max_cpus; i++) {
+			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
+				continue;
+			restore_cpu_idle_disable_state(i);
+		}
+	}
+	free_cpu_idle_disable_states();
+
 	if (affinity_mask)
 		rt_bitmask_free(affinity_mask);