diff mbox series

[v10,01/11] cpuidle/poll_state: poll via smp_cond_load_relaxed_timewait()

Message ID 20250218213337.377987-2-ankur.a.arora@oracle.com
State New
Headers show
Series arm64: support poll_idle() | expand

Commit Message

Ankur Arora Feb. 18, 2025, 9:33 p.m. UTC
The inner loop in poll_idle() polls to see if the thread's
TIF_NEED_RESCHED bit is set. The loop exits once the condition is met,
or if the poll time limit has been exceeded.

To minimize the number of instructions executed in each iteration, the
time check is rate-limited. In addition, each loop iteration executes
cpu_relax() which on certain platforms provides a hint to the pipeline
that the loop is busy-waiting, which allows the processor to reduce
power consumption.

However, cpu_relax() is defined optimally only on x86. On arm64, for
instance, it is implemented as a YIELD which only serves as a hint
to the CPU that it prioritize a different hardware thread if one is
available. arm64, does expose a more optimal polling mechanism via
smp_cond_load_relaxed_timewait() which uses LDXR, WFE to wait until a
store to a specified region, or until a timeout.

These semantics are essentially identical to what we want
from poll_idle(). So, restructure the loop to use
smp_cond_load_relaxed_timewait() instead.

The generated code remains close to the original version.

Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 drivers/cpuidle/poll_state.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

Comments

Ankur Arora May 13, 2025, 5:29 a.m. UTC | #1
Ankur Arora <ankur.a.arora@oracle.com> writes:

> The inner loop in poll_idle() polls to see if the thread's
> TIF_NEED_RESCHED bit is set. The loop exits once the condition is met,
> or if the poll time limit has been exceeded.
>
> To minimize the number of instructions executed in each iteration, the
> time check is rate-limited. In addition, each loop iteration executes
> cpu_relax() which on certain platforms provides a hint to the pipeline
> that the loop is busy-waiting, which allows the processor to reduce
> power consumption.
>
> However, cpu_relax() is defined optimally only on x86. On arm64, for
> instance, it is implemented as a YIELD which only serves as a hint
> to the CPU that it prioritize a different hardware thread if one is
> available. arm64, does expose a more optimal polling mechanism via
> smp_cond_load_relaxed_timewait() which uses LDXR, WFE to wait until a
> store to a specified region, or until a timeout.
>
> These semantics are essentially identical to what we want
> from poll_idle(). So, restructure the loop to use
> smp_cond_load_relaxed_timewait() instead.
>
> The generated code remains close to the original version.
>
> Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  drivers/cpuidle/poll_state.c | 27 ++++++++-------------------
>  1 file changed, 8 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
> index 9b6d90a72601..5117d3d37036 100644
> --- a/drivers/cpuidle/poll_state.c
> +++ b/drivers/cpuidle/poll_state.c
> @@ -8,35 +8,24 @@
>  #include <linux/sched/clock.h>
>  #include <linux/sched/idle.h>
>
> -#define POLL_IDLE_RELAX_COUNT	200
> -
>  static int __cpuidle poll_idle(struct cpuidle_device *dev,
>  			       struct cpuidle_driver *drv, int index)
>  {
> -	u64 time_start;
> -
> -	time_start = local_clock_noinstr();
>
>  	dev->poll_time_limit = false;
>
>  	raw_local_irq_enable();
>  	if (!current_set_polling_and_test()) {
> -		unsigned int loop_count = 0;
> -		u64 limit;
> +		unsigned long flags;
> +		u64 time_start = local_clock_noinstr();
> +		u64 limit = cpuidle_poll_time(drv, dev);
>
> -		limit = cpuidle_poll_time(drv, dev);
> +		flags = smp_cond_load_relaxed_timewait(&current_thread_info()->flags,
> +						       VAL & _TIF_NEED_RESCHED,
> +						       local_clock_noinstr(),
> +						       time_start + limit);
>
> -		while (!need_resched()) {
> -			cpu_relax();
> -			if (loop_count++ < POLL_IDLE_RELAX_COUNT)
> -				continue;
> -
> -			loop_count = 0;
> -			if (local_clock_noinstr() - time_start > limit) {
> -				dev->poll_time_limit = true;
> -				break;
> -			}
> -		}
> +		dev->poll_time_limit = !(flags & _TIF_NEED_RESCHED);
>  	}
>  	raw_local_irq_disable();

The barrier-v2 [1] interface is slightly different from the one proposed
in v1 (which this series is based on.)

[1] https://lore.kernel.org/lkml/20250502085223.1316925-1-ankur.a.arora@oracle.com/

For testing please use the following patch. It adds a new parameter
(__smp_cond_timewait_coarse) explicitly specifying the waiting policy.

--

diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 9b6d90a72601..2970368663c7 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -8,35 +8,25 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/idle.h>

-#define POLL_IDLE_RELAX_COUNT	200
-
 static int __cpuidle poll_idle(struct cpuidle_device *dev,
 			       struct cpuidle_driver *drv, int index)
 {
-	u64 time_start;
-
-	time_start = local_clock_noinstr();

 	dev->poll_time_limit = false;

 	raw_local_irq_enable();
 	if (!current_set_polling_and_test()) {
-		unsigned int loop_count = 0;
-		u64 limit;
+		unsigned long flags;
+		u64 time_start = local_clock_noinstr();
+		u64 limit = cpuidle_poll_time(drv, dev);

-		limit = cpuidle_poll_time(drv, dev);
+		flags = smp_cond_load_relaxed_timewait(&current_thread_info()->flags,
+						       VAL & _TIF_NEED_RESCHED,
+						       __smp_cond_timewait_coarse,
+						       local_clock_noinstr(),
+						       time_start + limit);

-		while (!need_resched()) {
-			cpu_relax();
-			if (loop_count++ < POLL_IDLE_RELAX_COUNT)
-				continue;
-
-			loop_count = 0;
-			if (local_clock_noinstr() - time_start > limit) {
-				dev->poll_time_limit = true;
-				break;
-			}
-		}
+		dev->poll_time_limit = !(flags & _TIF_NEED_RESCHED);
 	}
 	raw_local_irq_disable();

--
ankur
diff mbox series

Patch

diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 9b6d90a72601..5117d3d37036 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -8,35 +8,24 @@ 
 #include <linux/sched/clock.h>
 #include <linux/sched/idle.h>
 
-#define POLL_IDLE_RELAX_COUNT	200
-
 static int __cpuidle poll_idle(struct cpuidle_device *dev,
 			       struct cpuidle_driver *drv, int index)
 {
-	u64 time_start;
-
-	time_start = local_clock_noinstr();
 
 	dev->poll_time_limit = false;
 
 	raw_local_irq_enable();
 	if (!current_set_polling_and_test()) {
-		unsigned int loop_count = 0;
-		u64 limit;
+		unsigned long flags;
+		u64 time_start = local_clock_noinstr();
+		u64 limit = cpuidle_poll_time(drv, dev);
 
-		limit = cpuidle_poll_time(drv, dev);
+		flags = smp_cond_load_relaxed_timewait(&current_thread_info()->flags,
+						       VAL & _TIF_NEED_RESCHED,
+						       local_clock_noinstr(),
+						       time_start + limit);
 
-		while (!need_resched()) {
-			cpu_relax();
-			if (loop_count++ < POLL_IDLE_RELAX_COUNT)
-				continue;
-
-			loop_count = 0;
-			if (local_clock_noinstr() - time_start > limit) {
-				dev->poll_time_limit = true;
-				break;
-			}
-		}
+		dev->poll_time_limit = !(flags & _TIF_NEED_RESCHED);
 	}
 	raw_local_irq_disable();