diff mbox series

[v2,2/3] rt-tests: cyclictest: Support idle state disabling via libcpupower

Message ID 20241113114509.1058593-3-tglozar@redhat.com
State New
Headers show
Series rt-tests: cyclictest: Support idle state disabling via libcpupower | expand

Commit Message

Tomas Glozar Nov. 13, 2024, 11:45 a.m. UTC
From: Tomas Glozar <tglozar@redhat.com>

cyclictest allows reducing latency on wake up from idle by setting
/dev/cpu_dma_latency during the measurement. This has an effect on
the idle states of all CPUs, including those which are not included
in the measurement.

Add option --deepest-idle-state that allows limiting the idle state
only on cpus where the measurement is running.

libcpupower is used to do the disabling of idle states via
the corresponding sysfs interface.

Note: The feature was first implemented for rtla-timerlat, this
implementation is based on the rtla one.

Signed-off-by: Tomas Glozar <tglozar@redhat.com>
---
 src/cyclictest/cyclictest.c | 205 +++++++++++++++++++++++++++++++++++-
 1 file changed, 204 insertions(+), 1 deletion(-)

Comments

John Kacur Nov. 26, 2024, 10:29 p.m. UTC | #1
On Wed, 13 Nov 2024, tglozar@redhat.com wrote:

> From: Tomas Glozar <tglozar@redhat.com>
> 
> cyclictest allows reducing latency on wake up from idle by setting
> /dev/cpu_dma_latency during the measurement. This has an effect on
> the idle states of all CPUs, including those which are not included
> in the measurement.
> 
> Add option --deepest-idle-state that allows limiting the idle state
> only on cpus where the measurement is running.
> 
> libcpupower is used to do the disabling of idle states via
> the corresponding sysfs interface.
> 
> Note: The feature was first implemented for rtla-timerlat, this
> implementation is based on the rtla one.
> 
> Signed-off-by: Tomas Glozar <tglozar@redhat.com>
> ---
>  src/cyclictest/cyclictest.c | 205 +++++++++++++++++++++++++++++++++++-
>  1 file changed, 204 insertions(+), 1 deletion(-)
> 
> diff --git a/src/cyclictest/cyclictest.c b/src/cyclictest/cyclictest.c
> index 1ce62cf..b1f8420 100644
> --- a/src/cyclictest/cyclictest.c
> +++ b/src/cyclictest/cyclictest.c
> @@ -8,6 +8,9 @@
>   * (C) 2005-2007 Thomas Gleixner <tglx@linutronix.de>
>   *
>   */
> +#ifdef HAVE_LIBCPUPOWER_SUPPORT
> +#include <cpuidle.h>
> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <stdint.h>
> @@ -223,6 +226,8 @@ static void rstat_setup(void);
>  static int latency_target_fd = -1;
>  static int32_t latency_target_value = 0;
>  
> +static int deepest_idle_state = -2;
> +
>  static int rstat_ftruncate(int fd, off_t len);
>  static int rstat_fd = -1;
>  /* strlen("/cyclictest") + digits in max pid len + '\0' */
> @@ -254,6 +259,11 @@ static void set_latency_target(void)
>  		return;
>  	}
>  
> +	if (deepest_idle_state >= -1) {
> +		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");

I don't think we want to have a warning when the software is doing what we 
request of it.
Can we either just move the logic out of this function into main and
call either set_latency_target or the deepest latency state logic as 
appropriate, or move all the power management logic into a new function?


> +		return;
> +	}
> +
>  	errno = 0;
>  	err = stat("/dev/cpu_dma_latency", &s);
>  	if (err == -1) {
> @@ -278,6 +288,161 @@ static void set_latency_target(void)
>  	printf("# /dev/cpu_dma_latency set to %dus\n", latency_target_value);
>  }
>  
> +#ifdef HAVE_LIBCPUPOWER_SUPPORT
> +static unsigned int **saved_cpu_idle_disable_state;
> +static size_t saved_cpu_idle_disable_state_alloc_ctr;
> +
> +/*
> + * save_cpu_idle_state_disable - save disable for all idle states of a cpu
> + *
> + * Saves the current disable of all idle states of a cpu, to be subsequently
> + * restored via restore_cpu_idle_disable_state.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +static int save_cpu_idle_disable_state(unsigned int cpu)
> +{
> +	unsigned int nr_states;
> +	unsigned int state;
> +	int disabled;
> +	int nr_cpus;
> +
> +	nr_states = cpuidle_state_count(cpu);
> +
> +	if (nr_states == 0)
> +		return 0;
> +
> +	if (saved_cpu_idle_disable_state == NULL) {
> +		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
> +		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
> +		if (!saved_cpu_idle_disable_state)
> +			return -1;
> +	}
> +
> +	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
> +	if (!saved_cpu_idle_disable_state[cpu])
> +		return -1;
> +	saved_cpu_idle_disable_state_alloc_ctr++;
> +
> +	for (state = 0; state < nr_states; state++) {
> +		disabled = cpuidle_is_state_disabled(cpu, state);
> +		if (disabled < 0)
> +			return disabled;
> +		saved_cpu_idle_disable_state[cpu][state] = disabled;
> +	}
> +
> +	return nr_states;
> +}
> +
> +/*
> + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
> + *
> + * Restores the current disable state of all idle states of a cpu that was
> + * previously saved by save_cpu_idle_disable_state.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +static int restore_cpu_idle_disable_state(unsigned int cpu)
> +{
> +	unsigned int nr_states;
> +	unsigned int state;
> +	int disabled;
> +	int result;
> +
> +	nr_states = cpuidle_state_count(cpu);
> +
> +	if (nr_states == 0)
> +		return 0;
> +
> +	if (!saved_cpu_idle_disable_state)
> +		return -1;
> +
> +	for (state = 0; state < nr_states; state++) {
> +		if (!saved_cpu_idle_disable_state[cpu])
> +			return -1;
> +		disabled = saved_cpu_idle_disable_state[cpu][state];
> +		result = cpuidle_state_disable(cpu, state, disabled);
> +		if (result < 0)
> +			return result;
> +	}
> +
> +	free(saved_cpu_idle_disable_state[cpu]);
> +	saved_cpu_idle_disable_state[cpu] = NULL;
> +	saved_cpu_idle_disable_state_alloc_ctr--;
> +	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
> +		free(saved_cpu_idle_disable_state);
> +		saved_cpu_idle_disable_state = NULL;
> +	}
> +
> +	return nr_states;
> +}
> +
> +/*
> + * free_cpu_idle_disable_states - free saved idle state disable for all cpus
> + *
> + * Frees the memory used for storing cpu idle state disable for all cpus
> + * and states.
> + *
> + * Normally, the memory is freed automatically in
> + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
> + * error.
> + */
> +static void free_cpu_idle_disable_states(void)
> +{
> +	int cpu;
> +	int nr_cpus;
> +
> +	if (!saved_cpu_idle_disable_state)
> +		return;
> +
> +	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
> +
> +	for (cpu = 0; cpu < nr_cpus; cpu++) {
> +		free(saved_cpu_idle_disable_state[cpu]);
> +		saved_cpu_idle_disable_state[cpu] = NULL;
> +	}
> +
> +	free(saved_cpu_idle_disable_state);
> +	saved_cpu_idle_disable_state = NULL;
> +}
> +
> +/*
> + * set_deepest_cpu_idle_state - limit idle state of cpu
> + *
> + * Disables all idle states deeper than the one given in
> + * deepest_state (assuming states with higher number are deeper).
> + *
> + * This is used to reduce the exit from idle latency. Unlike
> + * set_cpu_dma_latency, it can disable idle states per cpu.
> + *
> + * Return: idle state count on success, negative on error
> + */
> +static int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
> +{
> +	unsigned int nr_states;
> +	unsigned int state;
> +	int result;
> +
> +	nr_states = cpuidle_state_count(cpu);
> +
> +	for (state = deepest_state + 1; state < nr_states; state++) {
> +		result = cpuidle_state_disable(cpu, state, 1);
> +		if (result < 0)
> +			return result;
> +	}
> +
> +	return nr_states;
> +}
> +
> +static inline int have_libcpupower_support(void) { return 1; }
> +#else
> +static inline int save_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
> +static inline int restore_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
> +static inline void free_cpu_idle_disable_states(void) { }
> +static inline int set_deepest_cpu_idle_state(__attribute__((unused)) unsigned int cpu,
> +											 __attribute__((unused)) unsigned int state) { return -1; }
> +static inline int have_libcpupower_support(void) { return 0; }
> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */
>  
>  enum {
>  	ERROR_GENERAL	= -1,
> @@ -779,6 +944,10 @@ static void display_help(int error)
>  	       "-c CLOCK --clock=CLOCK     select clock\n"
>  	       "                           0 = CLOCK_MONOTONIC (default)\n"
>  	       "                           1 = CLOCK_REALTIME\n"
> +	       "         --deepest-idle-state=n\n"
> +	       "                           Reduce exit from idle latency by limiting idle state\n"
> +	       "                           up to n on used cpus (-1 disables all idle states).\n"
> +	       "                           Power management is not suppresed on other cpus.\n"
>  	       "         --default-system  Don't attempt to tune the system from cyclictest.\n"
>  	       "                           Power management is not suppressed.\n"
>  	       "                           This might give poorer results, but will allow you\n"
> @@ -919,7 +1088,7 @@ enum option_values {
>  	OPT_TRIGGER_NODES, OPT_UNBUFFERED, OPT_NUMA, OPT_VERBOSE,
>  	OPT_DBGCYCLIC, OPT_POLICY, OPT_HELP, OPT_NUMOPTS,
>  	OPT_ALIGNED, OPT_SECALIGNED, OPT_LAPTOP, OPT_SMI,
> -	OPT_TRACEMARK, OPT_POSIX_TIMERS,
> +	OPT_TRACEMARK, OPT_POSIX_TIMERS, OPT_DEEPEST_IDLE_STATE,
>  };
>  
>  /* Process commandline options */
> @@ -975,6 +1144,7 @@ static void process_options(int argc, char *argv[], int max_cpus)
>  			{"policy",           required_argument, NULL, OPT_POLICY },
>  			{"help",             no_argument,       NULL, OPT_HELP },
>  			{"posix_timers",     no_argument,	NULL, OPT_POSIX_TIMERS },
> +			{"deepest-idle-state", required_argument,	NULL, OPT_DEEPEST_IDLE_STATE },
>  			{NULL, 0, NULL, 0 },
>  		};
>  		int c = getopt_long(argc, argv, "a::A::b:c:d:D:F:h:H:i:l:MNo:p:mqrRsSt::uvD:x",
> @@ -1175,6 +1345,9 @@ static void process_options(int argc, char *argv[], int max_cpus)
>  			break;
>  		case OPT_TRACEMARK:
>  			trace_marker = 1; break;
> +		case OPT_DEEPEST_IDLE_STATE:
> +			deepest_idle_state = atoi(optarg);
> +			break;
>  		}
>  	}
>  
> @@ -1782,6 +1955,26 @@ int main(int argc, char **argv)
>  	/* use the /dev/cpu_dma_latency trick if it's there */
>  	set_latency_target();
>  
> +	if (deepest_idle_state >= -1) {
> +		if (!have_libcpupower_support()) {
> +			fprintf(stderr, "cyclictest built without libcpupower, --deepest-idle-state is not supported\n");
> +			goto out;
> +		}
> +
> +		for (i = 0; i < max_cpus; i++) {
> +			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
> +				continue;
> +			if (save_cpu_idle_disable_state(i) < 0) {
> +				fprintf(stderr, "Could not save cpu idle state.\n");
> +				goto out;
> +			}
> +			if (set_deepest_cpu_idle_state(i, deepest_idle_state) < 0) {
> +				fprintf(stderr, "Could not set deepest cpu idle state.\n");
> +				goto out;
> +			}
> +		}
> +	}
> +
>  	if (tracelimit && trace_marker)
>  		enable_trace_mark();
>  
> @@ -2147,6 +2340,16 @@ int main(int argc, char **argv)
>  	if (latency_target_fd >= 0)
>  		close(latency_target_fd);
>  
> +	/* restore and free cpu idle disable states */
> +	if (deepest_idle_state >= -1) {
> +		for (i = 0; i < max_cpus; i++) {
> +			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
> +				continue;
> +			restore_cpu_idle_disable_state(i);
> +		}
> +	}
> +	free_cpu_idle_disable_states();
> +
>  	if (affinity_mask)
>  		rt_bitmask_free(affinity_mask);
>  
> -- 
> 2.47.0
> 
> 
>
Crystal Wood Nov. 27, 2024, 12:08 a.m. UTC | #2
On Tue, 2024-11-26 at 17:29 -0500, John Kacur wrote:
> 
> On Wed, 13 Nov 2024, tglozar@redhat.com wrote:
> 
> 
> > @@ -254,6 +259,11 @@ static void set_latency_target(void)
> >  		return;
> >  	}
> >  
> > +	if (deepest_idle_state >= -1) {
> > +		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");
> 
> I don't think we want to have a warning when the software is doing what we 
> request of it.
> Can we either just move the logic out of this function into main and
> call either set_latency_target or the deepest latency state logic as 
> appropriate, or move all the power management logic into a new function?

This could be said about the laptop and power_management checks too...
I'd go with verbose info prints rather than warnings for all three, if
anything.

I'm not sure how cluttering up main() with more logic would help, but
turning set_latency_target() into something like
setup_power()/cleanup_power() sounds good.

-Crystal
Tomas Glozar Nov. 27, 2024, 9:45 a.m. UTC | #3
st 27. 11. 2024 v 1:09 odesílatel Crystal Wood <crwood@redhat.com> napsal:
>
> This could be said about the laptop and power_management checks too...
> I'd go with verbose info prints rather than warnings for all three, if
> anything.
>

I agree. I believe my code is consistent with what we already have.
When you do cyclictest --default-system, you also get a warning about
not setting cpu_dma_latency, despite it being what you have explicitly
requested. My patch just does the same thing for --deepest-idle-state.

I suggest deferring this to a future patch that would remove the
warnings and another one to refactor the code.

> I'm not sure how cluttering up main() with more logic would help, but
> turning set_latency_target() into something like
> setup_power()/cleanup_power() sounds good.
>
> -Crystal
>

Yeah, main() is already long enough, I'd prefer to avoid cluttering it up more.

Tomas
John Kacur Nov. 27, 2024, 3:47 p.m. UTC | #4
On Tue, 26 Nov 2024, Crystal Wood wrote:

> On Tue, 2024-11-26 at 17:29 -0500, John Kacur wrote:
> > 
> > On Wed, 13 Nov 2024, tglozar@redhat.com wrote:
> > 
> > 
> > > @@ -254,6 +259,11 @@ static void set_latency_target(void)
> > >  		return;
> > >  	}
> > >  
> > > +	if (deepest_idle_state >= -1) {
> > > +		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");
> > 
> > I don't think we want to have a warning when the software is doing what we 
> > request of it.
> > Can we either just move the logic out of this function into main and
> > call either set_latency_target or the deepest latency state logic as 
> > appropriate, or move all the power management logic into a new function?
> 
> This could be said about the laptop and power_management checks too...

true

> I'd go with verbose info prints rather than warnings for all three, if
> anything.

Note that verbose on cyclictest doesn't mean print extra warnings, it 
means output values on stdout for statistics.

> 
> I'm not sure how cluttering up main() with more logic would help, but
> turning set_latency_target() into something like
> setup_power()/cleanup_power() sounds good.
>
 

Sure, that's why I gave two options, however, there is already 
--deepest-idle-state logic in main(), how much more clutter does it add to
not call set_latency_target() if we're using --deepest-idle-state?

John
John Kacur Nov. 27, 2024, 3:51 p.m. UTC | #5
On Wed, 27 Nov 2024, Tomas Glozar wrote:

> st 27. 11. 2024 v 1:09 odesílatel Crystal Wood <crwood@redhat.com> napsal:
> >
> > This could be said about the laptop and power_management checks too...
> > I'd go with verbose info prints rather than warnings for all three, if
> > anything.
> >
> 
> I agree. I believe my code is consistent with what we already have.
> When you do cyclictest --default-system, you also get a warning about
> not setting cpu_dma_latency, despite it being what you have explicitly
> requested. My patch just does the same thing for --deepest-idle-state.
> 
> I suggest deferring this to a future patch that would remove the
> warnings and another one to refactor the code.


> 
> > I'm not sure how cluttering up main() with more logic would help, but
> > turning set_latency_target() into something like
> > setup_power()/cleanup_power() sounds good.
> >
> > -Crystal
> >
> 
> Yeah, main() is already long enough, I'd prefer to avoid cluttering it up more.
> 
> Tomas

Alright, you convinced me.

Signed-off-by: John Kacur <jkacur@redhat.com>
Crystal Wood Dec. 2, 2024, 7:30 p.m. UTC | #6
On Wed, 2024-11-27 at 10:47 -0500, John Kacur wrote:
> 
> On Tue, 26 Nov 2024, Crystal Wood wrote:
> 
> > On Tue, 2024-11-26 at 17:29 -0500, John Kacur wrote:
> > 
> > > I don't think we want to have a warning when the software is doing what we 
> > > request of it.
> > > Can we either just move the logic out of this function into main and
> > > call either set_latency_target or the deepest latency state logic as 
> > > appropriate, or move all the power management logic into a new function?
> > 
> > This could be said about the laptop and power_management checks too...
> 
> true
> 
> > I'd go with verbose info prints rather than warnings for all three, if
> > anything.
> 
> Note that verbose on cyclictest doesn't mean print extra warnings, it 
> means output values on stdout for statistics.

That's what the help says, but it's already used to gate other things
that don't seem to be related, at least at first glance...

> 
> > 
> > I'm not sure how cluttering up main() with more logic would help, but
> > turning set_latency_target() into something like
> > setup_power()/cleanup_power() sounds good.
> > 
>  
> 
> Sure, that's why I gave two options, however, there is already 
> --deepest-idle-state logic in main(), how much more clutter does it add to
> not call set_latency_target() if we're using --deepest-idle-state?

I meant moving the deepest idle stuff out of main() and into
setup_power()/cleanup_power().  In other words, I was agreeing with your
second option.

-Crystal
Sebastian Andrzej Siewior Dec. 6, 2024, 11:52 a.m. UTC | #7
On 2024-11-27 10:45:49 [+0100], Tomas Glozar wrote:
> st 27. 11. 2024 v 1:09 odesílatel Crystal Wood <crwood@redhat.com> napsal:
> >
> > This could be said about the laptop and power_management checks too...
> > I'd go with verbose info prints rather than warnings for all three, if
> > anything.
> >
> 
> I agree. I believe my code is consistent with what we already have.
> When you do cyclictest --default-system, you also get a warning about
> not setting cpu_dma_latency, despite it being what you have explicitly
> requested. My patch just does the same thing for --deepest-idle-state.

What is the default behaviour and what is the intended behaviour?
Couldn't we somehow avoid adding yet another option?

> I suggest deferring this to a future patch that would remove the
> warnings and another one to refactor the code.

Sebastian
Tomas Glozar Dec. 6, 2024, 12:14 p.m. UTC | #8
pá 6. 12. 2024 v 12:52 odesílatel Sebastian Andrzej Siewior
<bigeasy@linutronix.de> napsal:
> What is the default behaviour and what is the intended behaviour?
> Couldn't we somehow avoid adding yet another option?

The default behavior is to hold /dev/cpu_dma_latency at zero, which
disables all idle states on all CPUs, not only those on which
cyclictest measurements are running. This has the disadvantage of a
higher power consumption than needed in most cases. With
--deepest-idle-state, idle states are limited only on CPUs cyclictest
is running on.

There are two reasons why we can't just use the latter and have to
have options for both. Firstly, some hardware does not support
disabling idle states on individual CPUs via the cpuidle sysfs
interface, which is used by --deepest-idle-state. Secondly, latencies
measured with --deepest-idle-state might still be higher in some cases
compared to holding /dev/cpu_dma_latency.

>
> > I suggest deferring this to a future patch that would remove the
> > warnings and another one to refactor the code.
>
> Sebastian
>

Tomas
Sebastian Andrzej Siewior Dec. 6, 2024, 2:41 p.m. UTC | #9
On 2024-12-06 13:14:02 [+0100], Tomas Glozar wrote:
> pá 6. 12. 2024 v 12:52 odesílatel Sebastian Andrzej Siewior
> <bigeasy@linutronix.de> napsal:
> > What is the default behaviour and what is the intended behaviour?
> > Couldn't we somehow avoid adding yet another option?
> 
> The default behavior is to hold /dev/cpu_dma_latency at zero, which
> disables all idle states on all CPUs, not only those on which
> cyclictest measurements are running. This has the disadvantage of a
> higher power consumption than needed in most cases. With
> --deepest-idle-state, idle states are limited only on CPUs cyclictest
> is running on.

This is a real use case? /dev/cpu_dma_latency is a big hammer to disable
everything that might cause latency.
So you have 4 CPUs, CPU0 is getting idle from time to time and CPU1-3 is
doing RT work so it can't take sleep?

> There are two reasons why we can't just use the latter and have to
> have options for both. Firstly, some hardware does not support
> disabling idle states on individual CPUs via the cpuidle sysfs
> interface, which is used by --deepest-idle-state. Secondly, latencies
> measured with --deepest-idle-state might still be higher in some cases
> compared to holding /dev/cpu_dma_latency.

Yes. Especially if you find power management thingy that is covered by
cpu_dma_latency but not by this new switch.
 
> Tomas

Sebastian
John Kacur Dec. 6, 2024, 3:29 p.m. UTC | #10
On Fri, 6 Dec 2024, Sebastian Andrzej Siewior wrote:

> On 2024-11-27 10:45:49 [+0100], Tomas Glozar wrote:
> > st 27. 11. 2024 v 1:09 odesílatel Crystal Wood <crwood@redhat.com> napsal:
> > >
> > > This could be said about the laptop and power_management checks too...
> > > I'd go with verbose info prints rather than warnings for all three, if
> > > anything.
> > >
> > 
> > I agree. I believe my code is consistent with what we already have.
> > When you do cyclictest --default-system, you also get a warning about
> > not setting cpu_dma_latency, despite it being what you have explicitly
> > requested. My patch just does the same thing for --deepest-idle-state.
> 
> What is the default behaviour and what is the intended behaviour?
> Couldn't we somehow avoid adding yet another option?

The default doesn't change, you don't have to use the new option.
We want to be able to measure whether using some level of power saving 
will still give us acceptible soft realtime behaviour.

John

> 
> > I suggest deferring this to a future patch that would remove the
> > warnings and another one to refactor the code.
> 
> Sebastian
> 
>
diff mbox series

Patch

diff --git a/src/cyclictest/cyclictest.c b/src/cyclictest/cyclictest.c
index 1ce62cf..b1f8420 100644
--- a/src/cyclictest/cyclictest.c
+++ b/src/cyclictest/cyclictest.c
@@ -8,6 +8,9 @@ 
  * (C) 2005-2007 Thomas Gleixner <tglx@linutronix.de>
  *
  */
+#ifdef HAVE_LIBCPUPOWER_SUPPORT
+#include <cpuidle.h>
+#endif /* HAVE_LIBCPUPOWER_SUPPORT */
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -223,6 +226,8 @@  static void rstat_setup(void);
 static int latency_target_fd = -1;
 static int32_t latency_target_value = 0;
 
+static int deepest_idle_state = -2;
+
 static int rstat_ftruncate(int fd, off_t len);
 static int rstat_fd = -1;
 /* strlen("/cyclictest") + digits in max pid len + '\0' */
@@ -254,6 +259,11 @@  static void set_latency_target(void)
 		return;
 	}
 
+	if (deepest_idle_state >= -1) {
+		warn("not setting cpu_dma_latency, --deepest-idle-state is set instead\n");
+		return;
+	}
+
 	errno = 0;
 	err = stat("/dev/cpu_dma_latency", &s);
 	if (err == -1) {
@@ -278,6 +288,161 @@  static void set_latency_target(void)
 	printf("# /dev/cpu_dma_latency set to %dus\n", latency_target_value);
 }
 
+#ifdef HAVE_LIBCPUPOWER_SUPPORT
+static unsigned int **saved_cpu_idle_disable_state;
+static size_t saved_cpu_idle_disable_state_alloc_ctr;
+
+/*
+ * save_cpu_idle_state_disable - save disable for all idle states of a cpu
+ *
+ * Saves the current disable of all idle states of a cpu, to be subsequently
+ * restored via restore_cpu_idle_disable_state.
+ *
+ * Return: idle state count on success, negative on error
+ */
+static int save_cpu_idle_disable_state(unsigned int cpu)
+{
+	unsigned int nr_states;
+	unsigned int state;
+	int disabled;
+	int nr_cpus;
+
+	nr_states = cpuidle_state_count(cpu);
+
+	if (nr_states == 0)
+		return 0;
+
+	if (saved_cpu_idle_disable_state == NULL) {
+		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
+		if (!saved_cpu_idle_disable_state)
+			return -1;
+	}
+
+	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
+	if (!saved_cpu_idle_disable_state[cpu])
+		return -1;
+	saved_cpu_idle_disable_state_alloc_ctr++;
+
+	for (state = 0; state < nr_states; state++) {
+		disabled = cpuidle_is_state_disabled(cpu, state);
+		if (disabled < 0)
+			return disabled;
+		saved_cpu_idle_disable_state[cpu][state] = disabled;
+	}
+
+	return nr_states;
+}
+
+/*
+ * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
+ *
+ * Restores the current disable state of all idle states of a cpu that was
+ * previously saved by save_cpu_idle_disable_state.
+ *
+ * Return: idle state count on success, negative on error
+ */
+static int restore_cpu_idle_disable_state(unsigned int cpu)
+{
+	unsigned int nr_states;
+	unsigned int state;
+	int disabled;
+	int result;
+
+	nr_states = cpuidle_state_count(cpu);
+
+	if (nr_states == 0)
+		return 0;
+
+	if (!saved_cpu_idle_disable_state)
+		return -1;
+
+	for (state = 0; state < nr_states; state++) {
+		if (!saved_cpu_idle_disable_state[cpu])
+			return -1;
+		disabled = saved_cpu_idle_disable_state[cpu][state];
+		result = cpuidle_state_disable(cpu, state, disabled);
+		if (result < 0)
+			return result;
+	}
+
+	free(saved_cpu_idle_disable_state[cpu]);
+	saved_cpu_idle_disable_state[cpu] = NULL;
+	saved_cpu_idle_disable_state_alloc_ctr--;
+	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
+		free(saved_cpu_idle_disable_state);
+		saved_cpu_idle_disable_state = NULL;
+	}
+
+	return nr_states;
+}
+
+/*
+ * free_cpu_idle_disable_states - free saved idle state disable for all cpus
+ *
+ * Frees the memory used for storing cpu idle state disable for all cpus
+ * and states.
+ *
+ * Normally, the memory is freed automatically in
+ * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
+ * error.
+ */
+static void free_cpu_idle_disable_states(void)
+{
+	int cpu;
+	int nr_cpus;
+
+	if (!saved_cpu_idle_disable_state)
+		return;
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		free(saved_cpu_idle_disable_state[cpu]);
+		saved_cpu_idle_disable_state[cpu] = NULL;
+	}
+
+	free(saved_cpu_idle_disable_state);
+	saved_cpu_idle_disable_state = NULL;
+}
+
+/*
+ * set_deepest_cpu_idle_state - limit idle state of cpu
+ *
+ * Disables all idle states deeper than the one given in
+ * deepest_state (assuming states with higher number are deeper).
+ *
+ * This is used to reduce the exit from idle latency. Unlike
+ * set_cpu_dma_latency, it can disable idle states per cpu.
+ *
+ * Return: idle state count on success, negative on error
+ */
+static int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
+{
+	unsigned int nr_states;
+	unsigned int state;
+	int result;
+
+	nr_states = cpuidle_state_count(cpu);
+
+	for (state = deepest_state + 1; state < nr_states; state++) {
+		result = cpuidle_state_disable(cpu, state, 1);
+		if (result < 0)
+			return result;
+	}
+
+	return nr_states;
+}
+
+static inline int have_libcpupower_support(void) { return 1; }
+#else
+static inline int save_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
+static inline int restore_cpu_idle_disable_state(__attribute__((unused)) unsigned int cpu) { return -1; }
+static inline void free_cpu_idle_disable_states(void) { }
+static inline int set_deepest_cpu_idle_state(__attribute__((unused)) unsigned int cpu,
+											 __attribute__((unused)) unsigned int state) { return -1; }
+static inline int have_libcpupower_support(void) { return 0; }
+#endif /* HAVE_LIBCPUPOWER_SUPPORT */
 
 enum {
 	ERROR_GENERAL	= -1,
@@ -779,6 +944,10 @@  static void display_help(int error)
 	       "-c CLOCK --clock=CLOCK     select clock\n"
 	       "                           0 = CLOCK_MONOTONIC (default)\n"
 	       "                           1 = CLOCK_REALTIME\n"
+	       "         --deepest-idle-state=n\n"
+	       "                           Reduce exit from idle latency by limiting idle state\n"
+	       "                           up to n on used cpus (-1 disables all idle states).\n"
+	       "                           Power management is not suppresed on other cpus.\n"
 	       "         --default-system  Don't attempt to tune the system from cyclictest.\n"
 	       "                           Power management is not suppressed.\n"
 	       "                           This might give poorer results, but will allow you\n"
@@ -919,7 +1088,7 @@  enum option_values {
 	OPT_TRIGGER_NODES, OPT_UNBUFFERED, OPT_NUMA, OPT_VERBOSE,
 	OPT_DBGCYCLIC, OPT_POLICY, OPT_HELP, OPT_NUMOPTS,
 	OPT_ALIGNED, OPT_SECALIGNED, OPT_LAPTOP, OPT_SMI,
-	OPT_TRACEMARK, OPT_POSIX_TIMERS,
+	OPT_TRACEMARK, OPT_POSIX_TIMERS, OPT_DEEPEST_IDLE_STATE,
 };
 
 /* Process commandline options */
@@ -975,6 +1144,7 @@  static void process_options(int argc, char *argv[], int max_cpus)
 			{"policy",           required_argument, NULL, OPT_POLICY },
 			{"help",             no_argument,       NULL, OPT_HELP },
 			{"posix_timers",     no_argument,	NULL, OPT_POSIX_TIMERS },
+			{"deepest-idle-state", required_argument,	NULL, OPT_DEEPEST_IDLE_STATE },
 			{NULL, 0, NULL, 0 },
 		};
 		int c = getopt_long(argc, argv, "a::A::b:c:d:D:F:h:H:i:l:MNo:p:mqrRsSt::uvD:x",
@@ -1175,6 +1345,9 @@  static void process_options(int argc, char *argv[], int max_cpus)
 			break;
 		case OPT_TRACEMARK:
 			trace_marker = 1; break;
+		case OPT_DEEPEST_IDLE_STATE:
+			deepest_idle_state = atoi(optarg);
+			break;
 		}
 	}
 
@@ -1782,6 +1955,26 @@  int main(int argc, char **argv)
 	/* use the /dev/cpu_dma_latency trick if it's there */
 	set_latency_target();
 
+	if (deepest_idle_state >= -1) {
+		if (!have_libcpupower_support()) {
+			fprintf(stderr, "cyclictest built without libcpupower, --deepest-idle-state is not supported\n");
+			goto out;
+		}
+
+		for (i = 0; i < max_cpus; i++) {
+			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
+				continue;
+			if (save_cpu_idle_disable_state(i) < 0) {
+				fprintf(stderr, "Could not save cpu idle state.\n");
+				goto out;
+			}
+			if (set_deepest_cpu_idle_state(i, deepest_idle_state) < 0) {
+				fprintf(stderr, "Could not set deepest cpu idle state.\n");
+				goto out;
+			}
+		}
+	}
+
 	if (tracelimit && trace_marker)
 		enable_trace_mark();
 
@@ -2147,6 +2340,16 @@  int main(int argc, char **argv)
 	if (latency_target_fd >= 0)
 		close(latency_target_fd);
 
+	/* restore and free cpu idle disable states */
+	if (deepest_idle_state >= -1) {
+		for (i = 0; i < max_cpus; i++) {
+			if (affinity_mask && !numa_bitmask_isbitset(affinity_mask, i))
+				continue;
+			restore_cpu_idle_disable_state(i);
+		}
+	}
+	free_cpu_idle_disable_states();
+
 	if (affinity_mask)
 		rt_bitmask_free(affinity_mask);