@@ -656,6 +656,31 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
if (next_lock != waiter->lock)
goto out_unlock_pi;
+ /*
+ * There could be 'spurious' loops in the lock graph due to ww_mutex,
+ * consider:
+ *
+ * P1: A, ww_A, ww_B
+ * P2: ww_B, ww_A
+ * P3: A
+ *
+ * P3 should not return -EDEADLK because it gets trapped in the cycle
+ * created by P1 and P2 (which will resolve -- and runs into
+ * max_lock_depth above). Therefore disable detect_deadlock such that
+ * the below termination condition can trigger once all relevant tasks
+ * are boosted.
+ *
+ * Even when we start with ww_mutex we can disable deadlock detection,
+ * since we would supress a ww_mutex induced deadlock at [6] anyway.
+ * Supressing it here however is not sufficient since we might still
+ * hit [6] due to adjustment driven iteration.
+ *
+ * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
+ * utterly fail to report it; lockdep should.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
+ detect_deadlock = false;
+
/*
* Drop out, when the task has no waiters. Note,
* top_waiter can be NULL, when we are in the deboosting
@@ -717,8 +742,21 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* walk, we detected a deadlock.
*/
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
- raw_spin_unlock(&lock->wait_lock);
ret = -EDEADLK;
+
+ /*
+ * When the deadlock is due to ww_mutex; also see above. Don't
+ * report the deadlock and instead let the ww_mutex wound/die
+ * logic pick which of the contending threads gets -EDEADLK.
+ *
+ * NOTE: assumes the cycle only contains a single ww_class; any
+ * other configuration and we fail to report; also, see
+ * lockdep.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx)
+ ret = 0;
+
+ raw_spin_unlock(&lock->wait_lock);
goto out_unlock_pi;
}
@@ -1100,8 +1138,13 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
/* Check whether the waiter should back out immediately */
rtm = container_of(lock, struct rt_mutex, rtmutex);
res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
- if (res)
+ if (res) {
+ raw_spin_lock(&task->pi_lock);
+ rt_mutex_dequeue(lock, waiter);
+ task->pi_blocked_on = NULL;
+ raw_spin_unlock(&task->pi_lock);
return res;
+ }
}
if (!owner)
@@ -1347,8 +1390,9 @@ static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
* for CONFIG_PREEMPT_RCU=y)
* - the VCPU on which owner runs is preempted
*/
- if (!owner->on_cpu || waiter != rt_mutex_top_waiter(lock) ||
- need_resched() || vcpu_is_preempted(task_cpu(owner))) {
+ if (!owner->on_cpu || need_resched() ||
+ rt_mutex_waiter_is_top_waiter(lock, waiter) ||
+ vcpu_is_preempted(task_cpu(owner))) {
res = false;
break;
}
@@ -95,6 +95,19 @@ static inline int rt_mutex_has_waiters(struct rt_mutex_base *lock)
return !RB_EMPTY_ROOT(&lock->waiters.rb_root);
}
+/*
+ * Lockless speculative check whether @waiter is still the top waiter on
+ * @lock. This is solely comparing pointers and not derefencing the
+ * leftmost entry which might be about to vanish.
+ */
+static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
+ struct rt_mutex_waiter *waiter)
+{
+ struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+
+ return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
+}
+
static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
{
struct rb_node *leftmost = rb_first_cached(&lock->waiters);
@@ -3081,7 +3081,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
ktime_t to = NSEC_PER_SEC / HZ;
set_current_state(TASK_UNINTERRUPTIBLE);
- schedule_hrtimeout(&to, HRTIMER_MODE_REL);
+ schedule_hrtimeout(&to, HRTIMER_MODE_REL_HARD);
continue;
}
@@ -8588,7 +8588,6 @@ static void balance_push(struct rq *rq)
struct task_struct *push_task = rq->curr;
lockdep_assert_rq_held(rq);
- SCHED_WARN_ON(rq->cpu != smp_processor_id());
/*
* Ensure the thing is persistent until balance_push_set(.on = false);
@@ -8596,9 +8595,10 @@ static void balance_push(struct rq *rq)
rq->balance_callback = &balance_push_callback;
/*
- * Only active while going offline.
+ * Only active while going offline and when invoked on the outgoing
+ * CPU.
*/
- if (!cpu_dying(rq->cpu))
+ if (!cpu_dying(rq->cpu) && rq == this_rq())
return;
/*
@@ -2255,6 +2255,9 @@ static inline struct task_struct *get_push_task(struct rq *rq)
if (p->nr_cpus_allowed == 1)
return NULL;
+ if (p->migration_disabled)
+ return NULL;
+
rq->push_busy = true;
return get_task_struct(p);
}
@@ -1 +1 @@
--rt13
+-rt14
@@ -82,7 +82,7 @@
struct zsmalloc_handle {
unsigned long addr;
- struct mutex lock;
+ spinlock_t lock;
};
#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle))
@@ -370,7 +370,7 @@ static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
if (p) {
struct zsmalloc_handle *zh = p;
- mutex_init(&zh->lock);
+ spin_lock_init(&zh->lock);
}
#endif
return (unsigned long)p;
@@ -927,7 +927,7 @@ static inline int testpin_tag(unsigned long handle)
#ifdef CONFIG_PREEMPT_RT
struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
- return mutex_is_locked(&zh->lock);
+ return spin_is_locked(&zh->lock);
#else
return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
#endif
@@ -938,7 +938,7 @@ static inline int trypin_tag(unsigned long handle)
#ifdef CONFIG_PREEMPT_RT
struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
- return mutex_trylock(&zh->lock);
+ return spin_trylock(&zh->lock);
#else
return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
#endif
@@ -949,7 +949,7 @@ static void pin_tag(unsigned long handle) __acquires(bitlock)
#ifdef CONFIG_PREEMPT_RT
struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
- return mutex_lock(&zh->lock);
+ return spin_lock(&zh->lock);
#else
bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
#endif
@@ -960,7 +960,7 @@ static void unpin_tag(unsigned long handle) __releases(bitlock)
#ifdef CONFIG_PREEMPT_RT
struct zsmalloc_handle *zh = zs_get_pure_handle(handle);
- return mutex_unlock(&zh->lock);
+ return spin_unlock(&zh->lock);
#else
bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
#endif