@@ -78,6 +78,8 @@ struct tvec_root {
struct tvec_base {
spinlock_t lock;
struct timer_list *running_timer;
+ struct list_head migration_list;
+ struct tvec_base *preferred_target;
unsigned long timer_jiffies;
unsigned long next_timer;
unsigned long active_timers;
@@ -785,10 +787,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
* We are trying to schedule the timer on the local CPU.
* However we can't change timer's base while it is running,
* otherwise del_timer_sync() can't detect that the timer's
- * handler yet has not finished. This also guarantees that
- * the timer is serialized wrt itself.
+ * handler yet has not finished.
+ *
+ * Move timer to migration_list which can be processed after all
+ * timers in current cycle are serviced. This also guarantees
+ * that the timer is serialized wrt itself.
*/
- if (likely(base->running_timer != timer)) {
+ if (unlikely(base->running_timer == timer)) {
+ timer->expires = expires;
+ base->preferred_target = new_base;
+ list_add_tail(&timer->entry, &base->migration_list);
+ goto out_unlock;
+ } else {
/* See the comment in lock_timer_base() */
timer_set_base(timer, NULL);
spin_unlock(&base->lock);
@@ -1214,6 +1224,33 @@ static inline void __run_timers(struct tvec_base *base)
spin_lock_irq(&base->lock);
}
}
+
+ /*
+ * Timer handler re-armed timer and we didn't wanted to add that
+ * on a idle local CPU. Its handler has finished now, lets
+ * enqueue it again.
+ */
+ if (unlikely(!list_empty(&base->migration_list))) {
+ struct tvec_base *new_base = base->preferred_target;
+
+ do {
+ timer = list_first_entry(&base->migration_list,
+ struct timer_list, entry);
+
+ __list_del(timer->entry.prev, timer->entry.next);
+
+ /* See the comment in lock_timer_base() */
+ timer_set_base(timer, NULL);
+ spin_unlock(&base->lock);
+
+ spin_lock(&new_base->lock);
+ timer_set_base(timer, new_base);
+ internal_add_timer(new_base, timer);
+ spin_unlock(&new_base->lock);
+
+ spin_lock(&base->lock);
+ } while (!list_empty(&base->migration_list));
+ }
}
base->running_timer = NULL;
spin_unlock_irq(&base->lock);
@@ -1583,6 +1620,7 @@ static int init_timers_cpu(int cpu)
for (j = 0; j < TVR_SIZE; j++)
INIT_LIST_HEAD(base->tv1.vec + j);
+ INIT_LIST_HEAD(&base->migration_list);
base->timer_jiffies = jiffies;
base->next_timer = base->timer_jiffies;
base->active_timers = 0;
Does this look any better ?
I tested this on Exynos (Dual ARM Cortex-A9) board, with ubuntu over it.
System was fairly idle and I did 'dmesg > /dev/null' on one of the CPUs
in an infinite loop, to get CPUs out of idle.
I have got few more concerns/diffs over this to further optimize things,
but kept them separate so that I can drop them if they aren't correct.
1.) Should the above list_empty(migration_list) block be added out of the
while (time_after_eq(jiffies, base->timer_jiffies))
block ? So that we check it only once per timer interrupt.
Also ->running_timer is set to the last serviced timer and a
del_timer_sync() might be waiting to remove it. But we continue with
the migration list first, without clearing it first. Not sure if this
is important at all..
2.) By the time we finish serving all pending timers, local CPU might not be
idle anymore OR the target CPU may become idle.
@@ -1233,6 +1233,14 @@ static inline void __run_timers(struct tvec_base *base)
if (unlikely(!list_empty(&base->migration_list))) {
struct tvec_base *new_base = base->preferred_target;
+ if (!idle_cpu(base->cpu)) {
+ /* Local CPU not idle anymore */
+ new_base = base;
+ } else if (idle_cpu(new_base->cpu)) {
+ /* Re-evaluate base, target CPU has gone idle */
+ new_base = per_cpu(tvec_bases, get_nohz_timer_target(false));
+ }
+
do {
timer = list_first_entry(&base->migration_list,
struct timer_list, entry);
The first if block is getting hit a lot of times in my tests. But
the second one has never been hit (Probably because of only two CPUs,
not sure).
2.) It might be better to update preferred_target every time we choose a
difference base. This may help us avoid calling get_nohz_timer_target()
in the second if block above.
@@ -783,6 +783,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
new_base = per_cpu(tvec_bases, cpu);
if (base != new_base) {
+ base->preferred_target = new_base;
+
/*
* We are trying to schedule the timer on the local CPU.
* However we can't change timer's base while it is running,
@@ -795,7 +797,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
*/
if (unlikely(base->running_timer == timer)) {
timer->expires = expires;
- base->preferred_target = new_base;
list_add_tail(&timer->entry, &base->migration_list);
goto out_unlock;
} else {