Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * RT-Mutexes: simple blocking mutual exclusion locks with PI support
4 : *
5 : * started by Ingo Molnar and Thomas Gleixner.
6 : *
7 : * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
8 : * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
9 : * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
10 : * Copyright (C) 2006 Esben Nielsen
11 : * Adaptive Spinlocks:
12 : * Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
13 : * and Peter Morreale,
14 : * Adaptive Spinlocks simplification:
15 : * Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
16 : *
17 : * See Documentation/locking/rt-mutex-design.rst for details.
18 : */
19 : #include <linux/sched.h>
20 : #include <linux/sched/debug.h>
21 : #include <linux/sched/deadline.h>
22 : #include <linux/sched/signal.h>
23 : #include <linux/sched/rt.h>
24 : #include <linux/sched/wake_q.h>
25 : #include <linux/ww_mutex.h>
26 :
27 : #include <trace/events/lock.h>
28 :
29 : #include "rtmutex_common.h"
30 :
31 : #ifndef WW_RT
32 : # define build_ww_mutex() (false)
33 : # define ww_container_of(rtm) NULL
34 :
35 : static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter,
36 : struct rt_mutex *lock,
37 : struct ww_acquire_ctx *ww_ctx)
38 : {
39 : return 0;
40 : }
41 :
42 : static inline void __ww_mutex_check_waiters(struct rt_mutex *lock,
43 : struct ww_acquire_ctx *ww_ctx)
44 : {
45 : }
46 :
47 : static inline void ww_mutex_lock_acquired(struct ww_mutex *lock,
48 : struct ww_acquire_ctx *ww_ctx)
49 : {
50 : }
51 :
52 : static inline int __ww_mutex_check_kill(struct rt_mutex *lock,
53 : struct rt_mutex_waiter *waiter,
54 : struct ww_acquire_ctx *ww_ctx)
55 : {
56 : return 0;
57 : }
58 :
59 : #else
60 : # define build_ww_mutex() (true)
61 : # define ww_container_of(rtm) container_of(rtm, struct ww_mutex, base)
62 : # include "ww_mutex.h"
63 : #endif
64 :
65 : /*
66 : * lock->owner state tracking:
67 : *
68 : * lock->owner holds the task_struct pointer of the owner. Bit 0
69 : * is used to keep track of the "lock has waiters" state.
70 : *
71 : * owner bit0
72 : * NULL 0 lock is free (fast acquire possible)
73 : * NULL 1 lock is free and has waiters and the top waiter
74 : * is going to take the lock*
75 : * taskpointer 0 lock is held (fast release possible)
76 : * taskpointer 1 lock is held and has waiters**
77 : *
78 : * The fast atomic compare exchange based acquire and release is only
79 : * possible when bit 0 of lock->owner is 0.
80 : *
81 : * (*) It also can be a transitional state when grabbing the lock
82 : * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
83 : * we need to set the bit0 before looking at the lock, and the owner may be
84 : * NULL in this small time, hence this can be a transitional state.
85 : *
86 : * (**) There is a small time when bit 0 is set but there are no
87 : * waiters. This can happen when grabbing the lock in the slow path.
88 : * To prevent a cmpxchg of the owner releasing the lock, we need to
89 : * set this bit before looking at the lock.
90 : */
91 :
92 : static __always_inline struct task_struct *
93 : rt_mutex_owner_encode(struct rt_mutex_base *lock, struct task_struct *owner)
94 : {
95 0 : unsigned long val = (unsigned long)owner;
96 :
97 0 : if (rt_mutex_has_waiters(lock))
98 0 : val |= RT_MUTEX_HAS_WAITERS;
99 :
100 0 : return (struct task_struct *)val;
101 : }
102 :
103 : static __always_inline void
104 : rt_mutex_set_owner(struct rt_mutex_base *lock, struct task_struct *owner)
105 : {
106 : /*
107 : * lock->wait_lock is held but explicit acquire semantics are needed
108 : * for a new lock owner so WRITE_ONCE is insufficient.
109 : */
110 0 : xchg_acquire(&lock->owner, rt_mutex_owner_encode(lock, owner));
111 : }
112 :
113 : static __always_inline void rt_mutex_clear_owner(struct rt_mutex_base *lock)
114 : {
115 : /* lock->wait_lock is held so the unlock provides release semantics. */
116 0 : WRITE_ONCE(lock->owner, rt_mutex_owner_encode(lock, NULL));
117 : }
118 :
119 : static __always_inline void clear_rt_mutex_waiters(struct rt_mutex_base *lock)
120 : {
121 0 : lock->owner = (struct task_struct *)
122 0 : ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
123 : }
124 :
125 : static __always_inline void
126 : fixup_rt_mutex_waiters(struct rt_mutex_base *lock, bool acquire_lock)
127 : {
128 0 : unsigned long owner, *p = (unsigned long *) &lock->owner;
129 :
130 0 : if (rt_mutex_has_waiters(lock))
131 : return;
132 :
133 : /*
134 : * The rbtree has no waiters enqueued, now make sure that the
135 : * lock->owner still has the waiters bit set, otherwise the
136 : * following can happen:
137 : *
138 : * CPU 0 CPU 1 CPU2
139 : * l->owner=T1
140 : * rt_mutex_lock(l)
141 : * lock(l->lock)
142 : * l->owner = T1 | HAS_WAITERS;
143 : * enqueue(T2)
144 : * boost()
145 : * unlock(l->lock)
146 : * block()
147 : *
148 : * rt_mutex_lock(l)
149 : * lock(l->lock)
150 : * l->owner = T1 | HAS_WAITERS;
151 : * enqueue(T3)
152 : * boost()
153 : * unlock(l->lock)
154 : * block()
155 : * signal(->T2) signal(->T3)
156 : * lock(l->lock)
157 : * dequeue(T2)
158 : * deboost()
159 : * unlock(l->lock)
160 : * lock(l->lock)
161 : * dequeue(T3)
162 : * ==> wait list is empty
163 : * deboost()
164 : * unlock(l->lock)
165 : * lock(l->lock)
166 : * fixup_rt_mutex_waiters()
167 : * if (wait_list_empty(l) {
168 : * l->owner = owner
169 : * owner = l->owner & ~HAS_WAITERS;
170 : * ==> l->owner = T1
171 : * }
172 : * lock(l->lock)
173 : * rt_mutex_unlock(l) fixup_rt_mutex_waiters()
174 : * if (wait_list_empty(l) {
175 : * owner = l->owner & ~HAS_WAITERS;
176 : * cmpxchg(l->owner, T1, NULL)
177 : * ===> Success (l->owner = NULL)
178 : *
179 : * l->owner = owner
180 : * ==> l->owner = T1
181 : * }
182 : *
183 : * With the check for the waiter bit in place T3 on CPU2 will not
184 : * overwrite. All tasks fiddling with the waiters bit are
185 : * serialized by l->lock, so nothing else can modify the waiters
186 : * bit. If the bit is set then nothing can change l->owner either
187 : * so the simple RMW is safe. The cmpxchg() will simply fail if it
188 : * happens in the middle of the RMW because the waiters bit is
189 : * still set.
190 : */
191 0 : owner = READ_ONCE(*p);
192 0 : if (owner & RT_MUTEX_HAS_WAITERS) {
193 : /*
194 : * See rt_mutex_set_owner() and rt_mutex_clear_owner() on
195 : * why xchg_acquire() is used for updating owner for
196 : * locking and WRITE_ONCE() for unlocking.
197 : *
198 : * WRITE_ONCE() would work for the acquire case too, but
199 : * in case that the lock acquisition failed it might
200 : * force other lockers into the slow path unnecessarily.
201 : */
202 : if (acquire_lock)
203 0 : xchg_acquire(p, owner & ~RT_MUTEX_HAS_WAITERS);
204 : else
205 0 : WRITE_ONCE(*p, owner & ~RT_MUTEX_HAS_WAITERS);
206 : }
207 : }
208 :
209 : /*
210 : * We can speed up the acquire/release, if there's no debugging state to be
211 : * set up.
212 : */
213 : #ifndef CONFIG_DEBUG_RT_MUTEXES
214 : static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
215 : struct task_struct *old,
216 : struct task_struct *new)
217 : {
218 0 : return try_cmpxchg_acquire(&lock->owner, &old, new);
219 : }
220 :
221 : static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
222 : struct task_struct *old,
223 : struct task_struct *new)
224 : {
225 0 : return try_cmpxchg_release(&lock->owner, &old, new);
226 : }
227 :
228 : /*
229 : * Callers must hold the ->wait_lock -- which is the whole purpose as we force
230 : * all future threads that attempt to [Rmw] the lock to the slowpath. As such
231 : * relaxed semantics suffice.
232 : */
233 : static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
234 : {
235 0 : unsigned long owner, *p = (unsigned long *) &lock->owner;
236 :
237 : do {
238 0 : owner = *p;
239 0 : } while (cmpxchg_relaxed(p, owner,
240 : owner | RT_MUTEX_HAS_WAITERS) != owner);
241 :
242 : /*
243 : * The cmpxchg loop above is relaxed to avoid back-to-back ACQUIRE
244 : * operations in the event of contention. Ensure the successful
245 : * cmpxchg is visible.
246 : */
247 0 : smp_mb__after_atomic();
248 : }
249 :
250 : /*
251 : * Safe fastpath aware unlock:
252 : * 1) Clear the waiters bit
253 : * 2) Drop lock->wait_lock
254 : * 3) Try to unlock the lock with cmpxchg
255 : */
256 : static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
257 : unsigned long flags)
258 : __releases(lock->wait_lock)
259 : {
260 0 : struct task_struct *owner = rt_mutex_owner(lock);
261 :
262 0 : clear_rt_mutex_waiters(lock);
263 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
264 : /*
265 : * If a new waiter comes in between the unlock and the cmpxchg
266 : * we have two situations:
267 : *
268 : * unlock(wait_lock);
269 : * lock(wait_lock);
270 : * cmpxchg(p, owner, 0) == owner
271 : * mark_rt_mutex_waiters(lock);
272 : * acquire(lock);
273 : * or:
274 : *
275 : * unlock(wait_lock);
276 : * lock(wait_lock);
277 : * mark_rt_mutex_waiters(lock);
278 : *
279 : * cmpxchg(p, owner, 0) != owner
280 : * enqueue_waiter();
281 : * unlock(wait_lock);
282 : * lock(wait_lock);
283 : * wake waiter();
284 : * unlock(wait_lock);
285 : * lock(wait_lock);
286 : * acquire(lock);
287 : */
288 0 : return rt_mutex_cmpxchg_release(lock, owner, NULL);
289 : }
290 :
291 : #else
292 : static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
293 : struct task_struct *old,
294 : struct task_struct *new)
295 : {
296 : return false;
297 :
298 : }
299 :
300 : static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
301 : struct task_struct *old,
302 : struct task_struct *new)
303 : {
304 : return false;
305 : }
306 :
307 : static __always_inline void mark_rt_mutex_waiters(struct rt_mutex_base *lock)
308 : {
309 : lock->owner = (struct task_struct *)
310 : ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
311 : }
312 :
313 : /*
314 : * Simple slow path only version: lock->owner is protected by lock->wait_lock.
315 : */
316 : static __always_inline bool unlock_rt_mutex_safe(struct rt_mutex_base *lock,
317 : unsigned long flags)
318 : __releases(lock->wait_lock)
319 : {
320 : lock->owner = NULL;
321 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
322 : return true;
323 : }
324 : #endif
325 :
326 : static __always_inline int __waiter_prio(struct task_struct *task)
327 : {
328 0 : int prio = task->prio;
329 :
330 0 : if (!rt_prio(prio))
331 : return DEFAULT_PRIO;
332 :
333 : return prio;
334 : }
335 :
336 : static __always_inline void
337 : waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
338 : {
339 0 : waiter->prio = __waiter_prio(task);
340 0 : waiter->deadline = task->dl.deadline;
341 : }
342 :
343 : /*
344 : * Only use with rt_mutex_waiter_{less,equal}()
345 : */
346 : #define task_to_waiter(p) \
347 : &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
348 :
349 : static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
350 : struct rt_mutex_waiter *right)
351 : {
352 0 : if (left->prio < right->prio)
353 : return 1;
354 :
355 : /*
356 : * If both waiters have dl_prio(), we check the deadlines of the
357 : * associated tasks.
358 : * If left waiter has a dl_prio(), and we didn't return 1 above,
359 : * then right waiter has a dl_prio() too.
360 : */
361 0 : if (dl_prio(left->prio))
362 0 : return dl_time_before(left->deadline, right->deadline);
363 :
364 : return 0;
365 : }
366 :
367 : static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
368 : struct rt_mutex_waiter *right)
369 : {
370 0 : if (left->prio != right->prio)
371 : return 0;
372 :
373 : /*
374 : * If both waiters have dl_prio(), we check the deadlines of the
375 : * associated tasks.
376 : * If left waiter has a dl_prio(), and we didn't return 0 above,
377 : * then right waiter has a dl_prio() too.
378 : */
379 0 : if (dl_prio(left->prio))
380 0 : return left->deadline == right->deadline;
381 :
382 : return 1;
383 : }
384 :
385 : static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
386 : struct rt_mutex_waiter *top_waiter)
387 : {
388 0 : if (rt_mutex_waiter_less(waiter, top_waiter))
389 : return true;
390 :
391 : #ifdef RT_MUTEX_BUILD_SPINLOCKS
392 : /*
393 : * Note that RT tasks are excluded from same priority (lateral)
394 : * steals to prevent the introduction of an unbounded latency.
395 : */
396 : if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
397 : return false;
398 :
399 : return rt_mutex_waiter_equal(waiter, top_waiter);
400 : #else
401 : return false;
402 : #endif
403 : }
404 :
405 : #define __node_2_waiter(node) \
406 : rb_entry((node), struct rt_mutex_waiter, tree_entry)
407 :
408 : static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
409 : {
410 0 : struct rt_mutex_waiter *aw = __node_2_waiter(a);
411 0 : struct rt_mutex_waiter *bw = __node_2_waiter(b);
412 :
413 0 : if (rt_mutex_waiter_less(aw, bw))
414 : return 1;
415 :
416 : if (!build_ww_mutex())
417 : return 0;
418 :
419 : if (rt_mutex_waiter_less(bw, aw))
420 : return 0;
421 :
422 : /* NOTE: relies on waiter->ww_ctx being set before insertion */
423 : if (aw->ww_ctx) {
424 : if (!bw->ww_ctx)
425 : return 1;
426 :
427 : return (signed long)(aw->ww_ctx->stamp -
428 : bw->ww_ctx->stamp) < 0;
429 : }
430 :
431 : return 0;
432 : }
433 :
434 : static __always_inline void
435 : rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
436 : {
437 0 : rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
438 : }
439 :
440 : static __always_inline void
441 : rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
442 : {
443 0 : if (RB_EMPTY_NODE(&waiter->tree_entry))
444 : return;
445 :
446 0 : rb_erase_cached(&waiter->tree_entry, &lock->waiters);
447 0 : RB_CLEAR_NODE(&waiter->tree_entry);
448 : }
449 :
450 : #define __node_2_pi_waiter(node) \
451 : rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
452 :
453 : static __always_inline bool
454 : __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
455 : {
456 0 : return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
457 : }
458 :
459 : static __always_inline void
460 : rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
461 : {
462 0 : rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
463 : }
464 :
465 : static __always_inline void
466 : rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
467 : {
468 0 : if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
469 : return;
470 :
471 0 : rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
472 0 : RB_CLEAR_NODE(&waiter->pi_tree_entry);
473 : }
474 :
475 : static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
476 : {
477 0 : struct task_struct *pi_task = NULL;
478 :
479 : lockdep_assert_held(&p->pi_lock);
480 :
481 0 : if (task_has_pi_waiters(p))
482 0 : pi_task = task_top_pi_waiter(p)->task;
483 :
484 0 : rt_mutex_setprio(p, pi_task);
485 : }
486 :
487 : /* RT mutex specific wake_q wrappers */
488 : static __always_inline void rt_mutex_wake_q_add_task(struct rt_wake_q_head *wqh,
489 : struct task_struct *task,
490 : unsigned int wake_state)
491 : {
492 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && wake_state == TASK_RTLOCK_WAIT) {
493 : if (IS_ENABLED(CONFIG_PROVE_LOCKING))
494 : WARN_ON_ONCE(wqh->rtlock_task);
495 : get_task_struct(task);
496 : wqh->rtlock_task = task;
497 : } else {
498 0 : wake_q_add(&wqh->head, task);
499 : }
500 : }
501 :
502 : static __always_inline void rt_mutex_wake_q_add(struct rt_wake_q_head *wqh,
503 : struct rt_mutex_waiter *w)
504 : {
505 0 : rt_mutex_wake_q_add_task(wqh, w->task, w->wake_state);
506 : }
507 :
508 : static __always_inline void rt_mutex_wake_up_q(struct rt_wake_q_head *wqh)
509 : {
510 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && wqh->rtlock_task) {
511 : wake_up_state(wqh->rtlock_task, TASK_RTLOCK_WAIT);
512 : put_task_struct(wqh->rtlock_task);
513 : wqh->rtlock_task = NULL;
514 : }
515 :
516 0 : if (!wake_q_empty(&wqh->head))
517 0 : wake_up_q(&wqh->head);
518 :
519 : /* Pairs with preempt_disable() in mark_wakeup_next_waiter() */
520 0 : preempt_enable();
521 : }
522 :
523 : /*
524 : * Deadlock detection is conditional:
525 : *
526 : * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted
527 : * if the detect argument is == RT_MUTEX_FULL_CHAINWALK.
528 : *
529 : * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always
530 : * conducted independent of the detect argument.
531 : *
532 : * If the waiter argument is NULL this indicates the deboost path and
533 : * deadlock detection is disabled independent of the detect argument
534 : * and the config settings.
535 : */
536 : static __always_inline bool
537 : rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter,
538 : enum rtmutex_chainwalk chwalk)
539 : {
540 : if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES))
541 : return waiter != NULL;
542 : return chwalk == RT_MUTEX_FULL_CHAINWALK;
543 : }
544 :
545 : static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_struct *p)
546 : {
547 0 : return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
548 : }
549 :
550 : /*
551 : * Adjust the priority chain. Also used for deadlock detection.
552 : * Decreases task's usage by one - may thus free the task.
553 : *
554 : * @task: the task owning the mutex (owner) for which a chain walk is
555 : * probably needed
556 : * @chwalk: do we have to carry out deadlock detection?
557 : * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
558 : * things for a task that has just got its priority adjusted, and
559 : * is waiting on a mutex)
560 : * @next_lock: the mutex on which the owner of @orig_lock was blocked before
561 : * we dropped its pi_lock. Is never dereferenced, only used for
562 : * comparison to detect lock chain changes.
563 : * @orig_waiter: rt_mutex_waiter struct for the task that has just donated
564 : * its priority to the mutex owner (can be NULL in the case
565 : * depicted above or if the top waiter is gone away and we are
566 : * actually deboosting the owner)
567 : * @top_task: the current top waiter
568 : *
569 : * Returns 0 or -EDEADLK.
570 : *
571 : * Chain walk basics and protection scope
572 : *
573 : * [R] refcount on task
574 : * [P] task->pi_lock held
575 : * [L] rtmutex->wait_lock held
576 : *
577 : * Step Description Protected by
578 : * function arguments:
579 : * @task [R]
580 : * @orig_lock if != NULL @top_task is blocked on it
581 : * @next_lock Unprotected. Cannot be
582 : * dereferenced. Only used for
583 : * comparison.
584 : * @orig_waiter if != NULL @top_task is blocked on it
585 : * @top_task current, or in case of proxy
586 : * locking protected by calling
587 : * code
588 : * again:
589 : * loop_sanity_check();
590 : * retry:
591 : * [1] lock(task->pi_lock); [R] acquire [P]
592 : * [2] waiter = task->pi_blocked_on; [P]
593 : * [3] check_exit_conditions_1(); [P]
594 : * [4] lock = waiter->lock; [P]
595 : * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
596 : * unlock(task->pi_lock); release [P]
597 : * goto retry;
598 : * }
599 : * [6] check_exit_conditions_2(); [P] + [L]
600 : * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
601 : * [8] unlock(task->pi_lock); release [P]
602 : * put_task_struct(task); release [R]
603 : * [9] check_exit_conditions_3(); [L]
604 : * [10] task = owner(lock); [L]
605 : * get_task_struct(task); [L] acquire [R]
606 : * lock(task->pi_lock); [L] acquire [P]
607 : * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
608 : * [12] check_exit_conditions_4(); [P] + [L]
609 : * [13] unlock(task->pi_lock); release [P]
610 : * unlock(lock->wait_lock); release [L]
611 : * goto again;
612 : */
613 0 : static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
614 : enum rtmutex_chainwalk chwalk,
615 : struct rt_mutex_base *orig_lock,
616 : struct rt_mutex_base *next_lock,
617 : struct rt_mutex_waiter *orig_waiter,
618 : struct task_struct *top_task)
619 : {
620 0 : struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
621 : struct rt_mutex_waiter *prerequeue_top_waiter;
622 0 : int ret = 0, depth = 0;
623 : struct rt_mutex_base *lock;
624 : bool detect_deadlock;
625 0 : bool requeue = true;
626 :
627 0 : detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
628 :
629 : /*
630 : * The (de)boosting is a step by step approach with a lot of
631 : * pitfalls. We want this to be preemptible and we want hold a
632 : * maximum of two locks per step. So we have to check
633 : * carefully whether things change under us.
634 : */
635 : again:
636 : /*
637 : * We limit the lock chain length for each invocation.
638 : */
639 0 : if (++depth > max_lock_depth) {
640 : static int prev_max;
641 :
642 : /*
643 : * Print this only once. If the admin changes the limit,
644 : * print a new message when reaching the limit again.
645 : */
646 0 : if (prev_max != max_lock_depth) {
647 0 : prev_max = max_lock_depth;
648 0 : printk(KERN_WARNING "Maximum lock depth %d reached "
649 : "task: %s (%d)\n", max_lock_depth,
650 : top_task->comm, task_pid_nr(top_task));
651 : }
652 0 : put_task_struct(task);
653 :
654 0 : return -EDEADLK;
655 : }
656 :
657 : /*
658 : * We are fully preemptible here and only hold the refcount on
659 : * @task. So everything can have changed under us since the
660 : * caller or our own code below (goto retry/again) dropped all
661 : * locks.
662 : */
663 : retry:
664 : /*
665 : * [1] Task cannot go away as we did a get_task() before !
666 : */
667 0 : raw_spin_lock_irq(&task->pi_lock);
668 :
669 : /*
670 : * [2] Get the waiter on which @task is blocked on.
671 : */
672 0 : waiter = task->pi_blocked_on;
673 :
674 : /*
675 : * [3] check_exit_conditions_1() protected by task->pi_lock.
676 : */
677 :
678 : /*
679 : * Check whether the end of the boosting chain has been
680 : * reached or the state of the chain has changed while we
681 : * dropped the locks.
682 : */
683 0 : if (!waiter)
684 : goto out_unlock_pi;
685 :
686 : /*
687 : * Check the orig_waiter state. After we dropped the locks,
688 : * the previous owner of the lock might have released the lock.
689 : */
690 0 : if (orig_waiter && !rt_mutex_owner(orig_lock))
691 : goto out_unlock_pi;
692 :
693 : /*
694 : * We dropped all locks after taking a refcount on @task, so
695 : * the task might have moved on in the lock chain or even left
696 : * the chain completely and blocks now on an unrelated lock or
697 : * on @orig_lock.
698 : *
699 : * We stored the lock on which @task was blocked in @next_lock,
700 : * so we can detect the chain change.
701 : */
702 0 : if (next_lock != waiter->lock)
703 : goto out_unlock_pi;
704 :
705 : /*
706 : * There could be 'spurious' loops in the lock graph due to ww_mutex,
707 : * consider:
708 : *
709 : * P1: A, ww_A, ww_B
710 : * P2: ww_B, ww_A
711 : * P3: A
712 : *
713 : * P3 should not return -EDEADLK because it gets trapped in the cycle
714 : * created by P1 and P2 (which will resolve -- and runs into
715 : * max_lock_depth above). Therefore disable detect_deadlock such that
716 : * the below termination condition can trigger once all relevant tasks
717 : * are boosted.
718 : *
719 : * Even when we start with ww_mutex we can disable deadlock detection,
720 : * since we would supress a ww_mutex induced deadlock at [6] anyway.
721 : * Supressing it here however is not sufficient since we might still
722 : * hit [6] due to adjustment driven iteration.
723 : *
724 : * NOTE: if someone were to create a deadlock between 2 ww_classes we'd
725 : * utterly fail to report it; lockdep should.
726 : */
727 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && waiter->ww_ctx && detect_deadlock)
728 : detect_deadlock = false;
729 :
730 : /*
731 : * Drop out, when the task has no waiters. Note,
732 : * top_waiter can be NULL, when we are in the deboosting
733 : * mode!
734 : */
735 0 : if (top_waiter) {
736 0 : if (!task_has_pi_waiters(task))
737 : goto out_unlock_pi;
738 : /*
739 : * If deadlock detection is off, we stop here if we
740 : * are not the top pi waiter of the task. If deadlock
741 : * detection is enabled we continue, but stop the
742 : * requeueing in the chain walk.
743 : */
744 0 : if (top_waiter != task_top_pi_waiter(task)) {
745 0 : if (!detect_deadlock)
746 : goto out_unlock_pi;
747 : else
748 : requeue = false;
749 : }
750 : }
751 :
752 : /*
753 : * If the waiter priority is the same as the task priority
754 : * then there is no further priority adjustment necessary. If
755 : * deadlock detection is off, we stop the chain walk. If its
756 : * enabled we continue, but stop the requeueing in the chain
757 : * walk.
758 : */
759 0 : if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
760 0 : if (!detect_deadlock)
761 : goto out_unlock_pi;
762 : else
763 : requeue = false;
764 : }
765 :
766 : /*
767 : * [4] Get the next lock
768 : */
769 0 : lock = waiter->lock;
770 : /*
771 : * [5] We need to trylock here as we are holding task->pi_lock,
772 : * which is the reverse lock order versus the other rtmutex
773 : * operations.
774 : */
775 0 : if (!raw_spin_trylock(&lock->wait_lock)) {
776 : raw_spin_unlock_irq(&task->pi_lock);
777 : cpu_relax();
778 : goto retry;
779 : }
780 :
781 : /*
782 : * [6] check_exit_conditions_2() protected by task->pi_lock and
783 : * lock->wait_lock.
784 : *
785 : * Deadlock detection. If the lock is the same as the original
786 : * lock which caused us to walk the lock chain or if the
787 : * current lock is owned by the task which initiated the chain
788 : * walk, we detected a deadlock.
789 : */
790 0 : if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
791 0 : ret = -EDEADLK;
792 :
793 : /*
794 : * When the deadlock is due to ww_mutex; also see above. Don't
795 : * report the deadlock and instead let the ww_mutex wound/die
796 : * logic pick which of the contending threads gets -EDEADLK.
797 : *
798 : * NOTE: assumes the cycle only contains a single ww_class; any
799 : * other configuration and we fail to report; also, see
800 : * lockdep.
801 : */
802 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
803 : ret = 0;
804 :
805 0 : raw_spin_unlock(&lock->wait_lock);
806 0 : goto out_unlock_pi;
807 : }
808 :
809 : /*
810 : * If we just follow the lock chain for deadlock detection, no
811 : * need to do all the requeue operations. To avoid a truckload
812 : * of conditionals around the various places below, just do the
813 : * minimum chain walk checks.
814 : */
815 0 : if (!requeue) {
816 : /*
817 : * No requeue[7] here. Just release @task [8]
818 : */
819 0 : raw_spin_unlock(&task->pi_lock);
820 0 : put_task_struct(task);
821 :
822 : /*
823 : * [9] check_exit_conditions_3 protected by lock->wait_lock.
824 : * If there is no owner of the lock, end of chain.
825 : */
826 0 : if (!rt_mutex_owner(lock)) {
827 0 : raw_spin_unlock_irq(&lock->wait_lock);
828 0 : return 0;
829 : }
830 :
831 : /* [10] Grab the next task, i.e. owner of @lock */
832 0 : task = get_task_struct(rt_mutex_owner(lock));
833 0 : raw_spin_lock(&task->pi_lock);
834 :
835 : /*
836 : * No requeue [11] here. We just do deadlock detection.
837 : *
838 : * [12] Store whether owner is blocked
839 : * itself. Decision is made after dropping the locks
840 : */
841 0 : next_lock = task_blocked_on_lock(task);
842 : /*
843 : * Get the top waiter for the next iteration
844 : */
845 0 : top_waiter = rt_mutex_top_waiter(lock);
846 :
847 : /* [13] Drop locks */
848 0 : raw_spin_unlock(&task->pi_lock);
849 0 : raw_spin_unlock_irq(&lock->wait_lock);
850 :
851 : /* If owner is not blocked, end of chain. */
852 0 : if (!next_lock)
853 : goto out_put_task;
854 : goto again;
855 : }
856 :
857 : /*
858 : * Store the current top waiter before doing the requeue
859 : * operation on @lock. We need it for the boost/deboost
860 : * decision below.
861 : */
862 0 : prerequeue_top_waiter = rt_mutex_top_waiter(lock);
863 :
864 : /* [7] Requeue the waiter in the lock waiter tree. */
865 0 : rt_mutex_dequeue(lock, waiter);
866 :
867 : /*
868 : * Update the waiter prio fields now that we're dequeued.
869 : *
870 : * These values can have changed through either:
871 : *
872 : * sys_sched_set_scheduler() / sys_sched_setattr()
873 : *
874 : * or
875 : *
876 : * DL CBS enforcement advancing the effective deadline.
877 : *
878 : * Even though pi_waiters also uses these fields, and that tree is only
879 : * updated in [11], we can do this here, since we hold [L], which
880 : * serializes all pi_waiters access and rb_erase() does not care about
881 : * the values of the node being removed.
882 : */
883 0 : waiter_update_prio(waiter, task);
884 :
885 0 : rt_mutex_enqueue(lock, waiter);
886 :
887 : /* [8] Release the task */
888 0 : raw_spin_unlock(&task->pi_lock);
889 0 : put_task_struct(task);
890 :
891 : /*
892 : * [9] check_exit_conditions_3 protected by lock->wait_lock.
893 : *
894 : * We must abort the chain walk if there is no lock owner even
895 : * in the dead lock detection case, as we have nothing to
896 : * follow here. This is the end of the chain we are walking.
897 : */
898 0 : if (!rt_mutex_owner(lock)) {
899 : /*
900 : * If the requeue [7] above changed the top waiter,
901 : * then we need to wake the new top waiter up to try
902 : * to get the lock.
903 : */
904 0 : top_waiter = rt_mutex_top_waiter(lock);
905 0 : if (prerequeue_top_waiter != top_waiter)
906 0 : wake_up_state(top_waiter->task, top_waiter->wake_state);
907 0 : raw_spin_unlock_irq(&lock->wait_lock);
908 0 : return 0;
909 : }
910 :
911 : /* [10] Grab the next task, i.e. the owner of @lock */
912 0 : task = get_task_struct(rt_mutex_owner(lock));
913 0 : raw_spin_lock(&task->pi_lock);
914 :
915 : /* [11] requeue the pi waiters if necessary */
916 0 : if (waiter == rt_mutex_top_waiter(lock)) {
917 : /*
918 : * The waiter became the new top (highest priority)
919 : * waiter on the lock. Replace the previous top waiter
920 : * in the owner tasks pi waiters tree with this waiter
921 : * and adjust the priority of the owner.
922 : */
923 0 : rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
924 0 : rt_mutex_enqueue_pi(task, waiter);
925 : rt_mutex_adjust_prio(task);
926 :
927 0 : } else if (prerequeue_top_waiter == waiter) {
928 : /*
929 : * The waiter was the top waiter on the lock, but is
930 : * no longer the top priority waiter. Replace waiter in
931 : * the owner tasks pi waiters tree with the new top
932 : * (highest priority) waiter and adjust the priority
933 : * of the owner.
934 : * The new top waiter is stored in @waiter so that
935 : * @waiter == @top_waiter evaluates to true below and
936 : * we continue to deboost the rest of the chain.
937 : */
938 0 : rt_mutex_dequeue_pi(task, waiter);
939 0 : waiter = rt_mutex_top_waiter(lock);
940 0 : rt_mutex_enqueue_pi(task, waiter);
941 : rt_mutex_adjust_prio(task);
942 : } else {
943 : /*
944 : * Nothing changed. No need to do any priority
945 : * adjustment.
946 : */
947 : }
948 :
949 : /*
950 : * [12] check_exit_conditions_4() protected by task->pi_lock
951 : * and lock->wait_lock. The actual decisions are made after we
952 : * dropped the locks.
953 : *
954 : * Check whether the task which owns the current lock is pi
955 : * blocked itself. If yes we store a pointer to the lock for
956 : * the lock chain change detection above. After we dropped
957 : * task->pi_lock next_lock cannot be dereferenced anymore.
958 : */
959 0 : next_lock = task_blocked_on_lock(task);
960 : /*
961 : * Store the top waiter of @lock for the end of chain walk
962 : * decision below.
963 : */
964 0 : top_waiter = rt_mutex_top_waiter(lock);
965 :
966 : /* [13] Drop the locks */
967 0 : raw_spin_unlock(&task->pi_lock);
968 0 : raw_spin_unlock_irq(&lock->wait_lock);
969 :
970 : /*
971 : * Make the actual exit decisions [12], based on the stored
972 : * values.
973 : *
974 : * We reached the end of the lock chain. Stop right here. No
975 : * point to go back just to figure that out.
976 : */
977 0 : if (!next_lock)
978 : goto out_put_task;
979 :
980 : /*
981 : * If the current waiter is not the top waiter on the lock,
982 : * then we can stop the chain walk here if we are not in full
983 : * deadlock detection mode.
984 : */
985 0 : if (!detect_deadlock && waiter != top_waiter)
986 : goto out_put_task;
987 :
988 : goto again;
989 :
990 : out_unlock_pi:
991 0 : raw_spin_unlock_irq(&task->pi_lock);
992 : out_put_task:
993 0 : put_task_struct(task);
994 :
995 0 : return ret;
996 : }
997 :
998 : /*
999 : * Try to take an rt-mutex
1000 : *
1001 : * Must be called with lock->wait_lock held and interrupts disabled
1002 : *
1003 : * @lock: The lock to be acquired.
1004 : * @task: The task which wants to acquire the lock
1005 : * @waiter: The waiter that is queued to the lock's wait tree if the
1006 : * callsite called task_blocked_on_lock(), otherwise NULL
1007 : */
1008 : static int __sched
1009 0 : try_to_take_rt_mutex(struct rt_mutex_base *lock, struct task_struct *task,
1010 : struct rt_mutex_waiter *waiter)
1011 : {
1012 : lockdep_assert_held(&lock->wait_lock);
1013 :
1014 : /*
1015 : * Before testing whether we can acquire @lock, we set the
1016 : * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
1017 : * other tasks which try to modify @lock into the slow path
1018 : * and they serialize on @lock->wait_lock.
1019 : *
1020 : * The RT_MUTEX_HAS_WAITERS bit can have a transitional state
1021 : * as explained at the top of this file if and only if:
1022 : *
1023 : * - There is a lock owner. The caller must fixup the
1024 : * transient state if it does a trylock or leaves the lock
1025 : * function due to a signal or timeout.
1026 : *
1027 : * - @task acquires the lock and there are no other
1028 : * waiters. This is undone in rt_mutex_set_owner(@task) at
1029 : * the end of this function.
1030 : */
1031 0 : mark_rt_mutex_waiters(lock);
1032 :
1033 : /*
1034 : * If @lock has an owner, give up.
1035 : */
1036 0 : if (rt_mutex_owner(lock))
1037 : return 0;
1038 :
1039 : /*
1040 : * If @waiter != NULL, @task has already enqueued the waiter
1041 : * into @lock waiter tree. If @waiter == NULL then this is a
1042 : * trylock attempt.
1043 : */
1044 0 : if (waiter) {
1045 0 : struct rt_mutex_waiter *top_waiter = rt_mutex_top_waiter(lock);
1046 :
1047 : /*
1048 : * If waiter is the highest priority waiter of @lock,
1049 : * or allowed to steal it, take it over.
1050 : */
1051 0 : if (waiter == top_waiter || rt_mutex_steal(waiter, top_waiter)) {
1052 : /*
1053 : * We can acquire the lock. Remove the waiter from the
1054 : * lock waiters tree.
1055 : */
1056 : rt_mutex_dequeue(lock, waiter);
1057 : } else {
1058 : return 0;
1059 : }
1060 : } else {
1061 : /*
1062 : * If the lock has waiters already we check whether @task is
1063 : * eligible to take over the lock.
1064 : *
1065 : * If there are no other waiters, @task can acquire
1066 : * the lock. @task->pi_blocked_on is NULL, so it does
1067 : * not need to be dequeued.
1068 : */
1069 0 : if (rt_mutex_has_waiters(lock)) {
1070 : /* Check whether the trylock can steal it. */
1071 0 : if (!rt_mutex_steal(task_to_waiter(task),
1072 : rt_mutex_top_waiter(lock)))
1073 : return 0;
1074 :
1075 : /*
1076 : * The current top waiter stays enqueued. We
1077 : * don't have to change anything in the lock
1078 : * waiters order.
1079 : */
1080 : } else {
1081 : /*
1082 : * No waiters. Take the lock without the
1083 : * pi_lock dance.@task->pi_blocked_on is NULL
1084 : * and we have no waiters to enqueue in @task
1085 : * pi waiters tree.
1086 : */
1087 : goto takeit;
1088 : }
1089 : }
1090 :
1091 : /*
1092 : * Clear @task->pi_blocked_on. Requires protection by
1093 : * @task->pi_lock. Redundant operation for the @waiter == NULL
1094 : * case, but conditionals are more expensive than a redundant
1095 : * store.
1096 : */
1097 0 : raw_spin_lock(&task->pi_lock);
1098 0 : task->pi_blocked_on = NULL;
1099 : /*
1100 : * Finish the lock acquisition. @task is the new owner. If
1101 : * other waiters exist we have to insert the highest priority
1102 : * waiter into @task->pi_waiters tree.
1103 : */
1104 0 : if (rt_mutex_has_waiters(lock))
1105 0 : rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
1106 0 : raw_spin_unlock(&task->pi_lock);
1107 :
1108 : takeit:
1109 : /*
1110 : * This either preserves the RT_MUTEX_HAS_WAITERS bit if there
1111 : * are still waiters or clears it.
1112 : */
1113 0 : rt_mutex_set_owner(lock, task);
1114 :
1115 0 : return 1;
1116 : }
1117 :
1118 : /*
1119 : * Task blocks on lock.
1120 : *
1121 : * Prepare waiter and propagate pi chain
1122 : *
1123 : * This must be called with lock->wait_lock held and interrupts disabled
1124 : */
1125 0 : static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
1126 : struct rt_mutex_waiter *waiter,
1127 : struct task_struct *task,
1128 : struct ww_acquire_ctx *ww_ctx,
1129 : enum rtmutex_chainwalk chwalk)
1130 : {
1131 0 : struct task_struct *owner = rt_mutex_owner(lock);
1132 0 : struct rt_mutex_waiter *top_waiter = waiter;
1133 : struct rt_mutex_base *next_lock;
1134 0 : int chain_walk = 0, res;
1135 :
1136 : lockdep_assert_held(&lock->wait_lock);
1137 :
1138 : /*
1139 : * Early deadlock detection. We really don't want the task to
1140 : * enqueue on itself just to untangle the mess later. It's not
1141 : * only an optimization. We drop the locks, so another waiter
1142 : * can come in before the chain walk detects the deadlock. So
1143 : * the other will detect the deadlock and return -EDEADLOCK,
1144 : * which is wrong, as the other waiter is not in a deadlock
1145 : * situation.
1146 : *
1147 : * Except for ww_mutex, in that case the chain walk must already deal
1148 : * with spurious cycles, see the comments at [3] and [6].
1149 : */
1150 0 : if (owner == task && !(build_ww_mutex() && ww_ctx))
1151 : return -EDEADLK;
1152 :
1153 0 : raw_spin_lock(&task->pi_lock);
1154 0 : waiter->task = task;
1155 0 : waiter->lock = lock;
1156 0 : waiter_update_prio(waiter, task);
1157 :
1158 : /* Get the top priority waiter on the lock */
1159 0 : if (rt_mutex_has_waiters(lock))
1160 0 : top_waiter = rt_mutex_top_waiter(lock);
1161 0 : rt_mutex_enqueue(lock, waiter);
1162 :
1163 0 : task->pi_blocked_on = waiter;
1164 :
1165 0 : raw_spin_unlock(&task->pi_lock);
1166 :
1167 : if (build_ww_mutex() && ww_ctx) {
1168 : struct rt_mutex *rtm;
1169 :
1170 : /* Check whether the waiter should back out immediately */
1171 : rtm = container_of(lock, struct rt_mutex, rtmutex);
1172 : res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx);
1173 : if (res) {
1174 : raw_spin_lock(&task->pi_lock);
1175 : rt_mutex_dequeue(lock, waiter);
1176 : task->pi_blocked_on = NULL;
1177 : raw_spin_unlock(&task->pi_lock);
1178 : return res;
1179 : }
1180 : }
1181 :
1182 0 : if (!owner)
1183 : return 0;
1184 :
1185 0 : raw_spin_lock(&owner->pi_lock);
1186 0 : if (waiter == rt_mutex_top_waiter(lock)) {
1187 0 : rt_mutex_dequeue_pi(owner, top_waiter);
1188 0 : rt_mutex_enqueue_pi(owner, waiter);
1189 :
1190 0 : rt_mutex_adjust_prio(owner);
1191 0 : if (owner->pi_blocked_on)
1192 0 : chain_walk = 1;
1193 0 : } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
1194 0 : chain_walk = 1;
1195 : }
1196 :
1197 : /* Store the lock on which owner is blocked or NULL */
1198 0 : next_lock = task_blocked_on_lock(owner);
1199 :
1200 0 : raw_spin_unlock(&owner->pi_lock);
1201 : /*
1202 : * Even if full deadlock detection is on, if the owner is not
1203 : * blocked itself, we can avoid finding this out in the chain
1204 : * walk.
1205 : */
1206 0 : if (!chain_walk || !next_lock)
1207 : return 0;
1208 :
1209 : /*
1210 : * The owner can't disappear while holding a lock,
1211 : * so the owner struct is protected by wait_lock.
1212 : * Gets dropped in rt_mutex_adjust_prio_chain()!
1213 : */
1214 0 : get_task_struct(owner);
1215 :
1216 0 : raw_spin_unlock_irq(&lock->wait_lock);
1217 :
1218 0 : res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
1219 : next_lock, waiter, task);
1220 :
1221 0 : raw_spin_lock_irq(&lock->wait_lock);
1222 :
1223 : return res;
1224 : }
1225 :
1226 : /*
1227 : * Remove the top waiter from the current tasks pi waiter tree and
1228 : * queue it up.
1229 : *
1230 : * Called with lock->wait_lock held and interrupts disabled.
1231 : */
1232 0 : static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
1233 : struct rt_mutex_base *lock)
1234 : {
1235 : struct rt_mutex_waiter *waiter;
1236 :
1237 0 : raw_spin_lock(¤t->pi_lock);
1238 :
1239 0 : waiter = rt_mutex_top_waiter(lock);
1240 :
1241 : /*
1242 : * Remove it from current->pi_waiters and deboost.
1243 : *
1244 : * We must in fact deboost here in order to ensure we call
1245 : * rt_mutex_setprio() to update p->pi_top_task before the
1246 : * task unblocks.
1247 : */
1248 0 : rt_mutex_dequeue_pi(current, waiter);
1249 0 : rt_mutex_adjust_prio(current);
1250 :
1251 : /*
1252 : * As we are waking up the top waiter, and the waiter stays
1253 : * queued on the lock until it gets the lock, this lock
1254 : * obviously has waiters. Just set the bit here and this has
1255 : * the added benefit of forcing all new tasks into the
1256 : * slow path making sure no task of lower priority than
1257 : * the top waiter can steal this lock.
1258 : */
1259 0 : lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
1260 :
1261 : /*
1262 : * We deboosted before waking the top waiter task such that we don't
1263 : * run two tasks with the 'same' priority (and ensure the
1264 : * p->pi_top_task pointer points to a blocked task). This however can
1265 : * lead to priority inversion if we would get preempted after the
1266 : * deboost but before waking our donor task, hence the preempt_disable()
1267 : * before unlock.
1268 : *
1269 : * Pairs with preempt_enable() in rt_mutex_wake_up_q();
1270 : */
1271 0 : preempt_disable();
1272 0 : rt_mutex_wake_q_add(wqh, waiter);
1273 0 : raw_spin_unlock(¤t->pi_lock);
1274 0 : }
1275 :
1276 0 : static int __sched __rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1277 : {
1278 0 : int ret = try_to_take_rt_mutex(lock, current, NULL);
1279 :
1280 : /*
1281 : * try_to_take_rt_mutex() sets the lock waiters bit
1282 : * unconditionally. Clean this up.
1283 : */
1284 0 : fixup_rt_mutex_waiters(lock, true);
1285 :
1286 0 : return ret;
1287 : }
1288 :
1289 : /*
1290 : * Slow path try-lock function:
1291 : */
1292 0 : static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock)
1293 : {
1294 : unsigned long flags;
1295 : int ret;
1296 :
1297 : /*
1298 : * If the lock already has an owner we fail to get the lock.
1299 : * This can be done without taking the @lock->wait_lock as
1300 : * it is only being read, and this is a trylock anyway.
1301 : */
1302 0 : if (rt_mutex_owner(lock))
1303 : return 0;
1304 :
1305 : /*
1306 : * The mutex has currently no owner. Lock the wait lock and try to
1307 : * acquire the lock. We use irqsave here to support early boot calls.
1308 : */
1309 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1310 :
1311 0 : ret = __rt_mutex_slowtrylock(lock);
1312 :
1313 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1314 :
1315 0 : return ret;
1316 : }
1317 :
1318 : static __always_inline int __rt_mutex_trylock(struct rt_mutex_base *lock)
1319 : {
1320 0 : if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1321 : return 1;
1322 :
1323 0 : return rt_mutex_slowtrylock(lock);
1324 : }
1325 :
1326 : /*
1327 : * Slow path to release a rt-mutex.
1328 : */
1329 0 : static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
1330 : {
1331 0 : DEFINE_RT_WAKE_Q(wqh);
1332 : unsigned long flags;
1333 :
1334 : /* irqsave required to support early boot calls */
1335 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1336 :
1337 0 : debug_rt_mutex_unlock(lock);
1338 :
1339 : /*
1340 : * We must be careful here if the fast path is enabled. If we
1341 : * have no waiters queued we cannot set owner to NULL here
1342 : * because of:
1343 : *
1344 : * foo->lock->owner = NULL;
1345 : * rtmutex_lock(foo->lock); <- fast path
1346 : * free = atomic_dec_and_test(foo->refcnt);
1347 : * rtmutex_unlock(foo->lock); <- fast path
1348 : * if (free)
1349 : * kfree(foo);
1350 : * raw_spin_unlock(foo->lock->wait_lock);
1351 : *
1352 : * So for the fastpath enabled kernel:
1353 : *
1354 : * Nothing can set the waiters bit as long as we hold
1355 : * lock->wait_lock. So we do the following sequence:
1356 : *
1357 : * owner = rt_mutex_owner(lock);
1358 : * clear_rt_mutex_waiters(lock);
1359 : * raw_spin_unlock(&lock->wait_lock);
1360 : * if (cmpxchg(&lock->owner, owner, 0) == owner)
1361 : * return;
1362 : * goto retry;
1363 : *
1364 : * The fastpath disabled variant is simple as all access to
1365 : * lock->owner is serialized by lock->wait_lock:
1366 : *
1367 : * lock->owner = NULL;
1368 : * raw_spin_unlock(&lock->wait_lock);
1369 : */
1370 0 : while (!rt_mutex_has_waiters(lock)) {
1371 : /* Drops lock->wait_lock ! */
1372 0 : if (unlock_rt_mutex_safe(lock, flags) == true)
1373 0 : return;
1374 : /* Relock the rtmutex and try again */
1375 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1376 : }
1377 :
1378 : /*
1379 : * The wakeup next waiter path does not suffer from the above
1380 : * race. See the comments there.
1381 : *
1382 : * Queue the next waiter for wakeup once we release the wait_lock.
1383 : */
1384 0 : mark_wakeup_next_waiter(&wqh, lock);
1385 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1386 :
1387 0 : rt_mutex_wake_up_q(&wqh);
1388 : }
1389 :
1390 : static __always_inline void __rt_mutex_unlock(struct rt_mutex_base *lock)
1391 : {
1392 0 : if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
1393 : return;
1394 :
1395 0 : rt_mutex_slowunlock(lock);
1396 : }
1397 :
1398 : #ifdef CONFIG_SMP
1399 : static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1400 : struct rt_mutex_waiter *waiter,
1401 : struct task_struct *owner)
1402 : {
1403 : bool res = true;
1404 :
1405 : rcu_read_lock();
1406 : for (;;) {
1407 : /* If owner changed, trylock again. */
1408 : if (owner != rt_mutex_owner(lock))
1409 : break;
1410 : /*
1411 : * Ensure that @owner is dereferenced after checking that
1412 : * the lock owner still matches @owner. If that fails,
1413 : * @owner might point to freed memory. If it still matches,
1414 : * the rcu_read_lock() ensures the memory stays valid.
1415 : */
1416 : barrier();
1417 : /*
1418 : * Stop spinning when:
1419 : * - the lock owner has been scheduled out
1420 : * - current is not longer the top waiter
1421 : * - current is requested to reschedule (redundant
1422 : * for CONFIG_PREEMPT_RCU=y)
1423 : * - the VCPU on which owner runs is preempted
1424 : */
1425 : if (!owner_on_cpu(owner) || need_resched() ||
1426 : !rt_mutex_waiter_is_top_waiter(lock, waiter)) {
1427 : res = false;
1428 : break;
1429 : }
1430 : cpu_relax();
1431 : }
1432 : rcu_read_unlock();
1433 : return res;
1434 : }
1435 : #else
1436 : static bool rtmutex_spin_on_owner(struct rt_mutex_base *lock,
1437 : struct rt_mutex_waiter *waiter,
1438 : struct task_struct *owner)
1439 : {
1440 : return false;
1441 : }
1442 : #endif
1443 :
1444 : #ifdef RT_MUTEX_BUILD_MUTEX
1445 : /*
1446 : * Functions required for:
1447 : * - rtmutex, futex on all kernels
1448 : * - mutex and rwsem substitutions on RT kernels
1449 : */
1450 :
1451 : /*
1452 : * Remove a waiter from a lock and give up
1453 : *
1454 : * Must be called with lock->wait_lock held and interrupts disabled. It must
1455 : * have just failed to try_to_take_rt_mutex().
1456 : */
1457 0 : static void __sched remove_waiter(struct rt_mutex_base *lock,
1458 : struct rt_mutex_waiter *waiter)
1459 : {
1460 0 : bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
1461 0 : struct task_struct *owner = rt_mutex_owner(lock);
1462 : struct rt_mutex_base *next_lock;
1463 :
1464 : lockdep_assert_held(&lock->wait_lock);
1465 :
1466 0 : raw_spin_lock(¤t->pi_lock);
1467 0 : rt_mutex_dequeue(lock, waiter);
1468 0 : current->pi_blocked_on = NULL;
1469 0 : raw_spin_unlock(¤t->pi_lock);
1470 :
1471 : /*
1472 : * Only update priority if the waiter was the highest priority
1473 : * waiter of the lock and there is an owner to update.
1474 : */
1475 0 : if (!owner || !is_top_waiter)
1476 : return;
1477 :
1478 0 : raw_spin_lock(&owner->pi_lock);
1479 :
1480 0 : rt_mutex_dequeue_pi(owner, waiter);
1481 :
1482 0 : if (rt_mutex_has_waiters(lock))
1483 0 : rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
1484 :
1485 0 : rt_mutex_adjust_prio(owner);
1486 :
1487 : /* Store the lock on which owner is blocked or NULL */
1488 0 : next_lock = task_blocked_on_lock(owner);
1489 :
1490 0 : raw_spin_unlock(&owner->pi_lock);
1491 :
1492 : /*
1493 : * Don't walk the chain, if the owner task is not blocked
1494 : * itself.
1495 : */
1496 0 : if (!next_lock)
1497 : return;
1498 :
1499 : /* gets dropped in rt_mutex_adjust_prio_chain()! */
1500 0 : get_task_struct(owner);
1501 :
1502 0 : raw_spin_unlock_irq(&lock->wait_lock);
1503 :
1504 0 : rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
1505 0 : next_lock, NULL, current);
1506 :
1507 0 : raw_spin_lock_irq(&lock->wait_lock);
1508 : }
1509 :
1510 : /**
1511 : * rt_mutex_slowlock_block() - Perform the wait-wake-try-to-take loop
1512 : * @lock: the rt_mutex to take
1513 : * @ww_ctx: WW mutex context pointer
1514 : * @state: the state the task should block in (TASK_INTERRUPTIBLE
1515 : * or TASK_UNINTERRUPTIBLE)
1516 : * @timeout: the pre-initialized and started timer, or NULL for none
1517 : * @waiter: the pre-initialized rt_mutex_waiter
1518 : *
1519 : * Must be called with lock->wait_lock held and interrupts disabled
1520 : */
1521 0 : static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
1522 : struct ww_acquire_ctx *ww_ctx,
1523 : unsigned int state,
1524 : struct hrtimer_sleeper *timeout,
1525 : struct rt_mutex_waiter *waiter)
1526 : {
1527 0 : struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1528 : struct task_struct *owner;
1529 0 : int ret = 0;
1530 :
1531 : for (;;) {
1532 : /* Try to acquire the lock: */
1533 0 : if (try_to_take_rt_mutex(lock, current, waiter))
1534 : break;
1535 :
1536 0 : if (timeout && !timeout->task) {
1537 : ret = -ETIMEDOUT;
1538 : break;
1539 : }
1540 0 : if (signal_pending_state(state, current)) {
1541 : ret = -EINTR;
1542 : break;
1543 : }
1544 :
1545 : if (build_ww_mutex() && ww_ctx) {
1546 : ret = __ww_mutex_check_kill(rtm, waiter, ww_ctx);
1547 : if (ret)
1548 : break;
1549 : }
1550 :
1551 0 : if (waiter == rt_mutex_top_waiter(lock))
1552 0 : owner = rt_mutex_owner(lock);
1553 : else
1554 : owner = NULL;
1555 0 : raw_spin_unlock_irq(&lock->wait_lock);
1556 :
1557 : if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
1558 0 : schedule();
1559 :
1560 0 : raw_spin_lock_irq(&lock->wait_lock);
1561 0 : set_current_state(state);
1562 : }
1563 :
1564 0 : __set_current_state(TASK_RUNNING);
1565 0 : return ret;
1566 : }
1567 :
1568 0 : static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
1569 : struct rt_mutex_waiter *w)
1570 : {
1571 : /*
1572 : * If the result is not -EDEADLOCK or the caller requested
1573 : * deadlock detection, nothing to do here.
1574 : */
1575 0 : if (res != -EDEADLOCK || detect_deadlock)
1576 : return;
1577 :
1578 : if (build_ww_mutex() && w->ww_ctx)
1579 : return;
1580 :
1581 : /*
1582 : * Yell loudly and stop the task right here.
1583 : */
1584 0 : WARN(1, "rtmutex deadlock detected\n");
1585 : while (1) {
1586 0 : set_current_state(TASK_INTERRUPTIBLE);
1587 0 : schedule();
1588 : }
1589 : }
1590 :
1591 : /**
1592 : * __rt_mutex_slowlock - Locking slowpath invoked with lock::wait_lock held
1593 : * @lock: The rtmutex to block lock
1594 : * @ww_ctx: WW mutex context pointer
1595 : * @state: The task state for sleeping
1596 : * @chwalk: Indicator whether full or partial chainwalk is requested
1597 : * @waiter: Initializer waiter for blocking
1598 : */
1599 0 : static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
1600 : struct ww_acquire_ctx *ww_ctx,
1601 : unsigned int state,
1602 : enum rtmutex_chainwalk chwalk,
1603 : struct rt_mutex_waiter *waiter)
1604 : {
1605 0 : struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex);
1606 0 : struct ww_mutex *ww = ww_container_of(rtm);
1607 : int ret;
1608 :
1609 : lockdep_assert_held(&lock->wait_lock);
1610 :
1611 : /* Try to acquire the lock again: */
1612 0 : if (try_to_take_rt_mutex(lock, current, NULL)) {
1613 : if (build_ww_mutex() && ww_ctx) {
1614 : __ww_mutex_check_waiters(rtm, ww_ctx);
1615 : ww_mutex_lock_acquired(ww, ww_ctx);
1616 : }
1617 : return 0;
1618 : }
1619 :
1620 0 : set_current_state(state);
1621 :
1622 0 : trace_contention_begin(lock, LCB_F_RT);
1623 :
1624 0 : ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk);
1625 0 : if (likely(!ret))
1626 0 : ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter);
1627 :
1628 0 : if (likely(!ret)) {
1629 : /* acquired the lock */
1630 : if (build_ww_mutex() && ww_ctx) {
1631 : if (!ww_ctx->is_wait_die)
1632 : __ww_mutex_check_waiters(rtm, ww_ctx);
1633 : ww_mutex_lock_acquired(ww, ww_ctx);
1634 : }
1635 : } else {
1636 0 : __set_current_state(TASK_RUNNING);
1637 0 : remove_waiter(lock, waiter);
1638 0 : rt_mutex_handle_deadlock(ret, chwalk, waiter);
1639 : }
1640 :
1641 : /*
1642 : * try_to_take_rt_mutex() sets the waiter bit
1643 : * unconditionally. We might have to fix that up.
1644 : */
1645 : fixup_rt_mutex_waiters(lock, true);
1646 :
1647 : trace_contention_end(lock, ret);
1648 :
1649 : return ret;
1650 : }
1651 :
1652 : static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock,
1653 : struct ww_acquire_ctx *ww_ctx,
1654 : unsigned int state)
1655 : {
1656 : struct rt_mutex_waiter waiter;
1657 : int ret;
1658 :
1659 0 : rt_mutex_init_waiter(&waiter);
1660 0 : waiter.ww_ctx = ww_ctx;
1661 :
1662 0 : ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK,
1663 : &waiter);
1664 :
1665 0 : debug_rt_mutex_free_waiter(&waiter);
1666 : return ret;
1667 : }
1668 :
1669 : /*
1670 : * rt_mutex_slowlock - Locking slowpath invoked when fast path fails
1671 : * @lock: The rtmutex to block lock
1672 : * @ww_ctx: WW mutex context pointer
1673 : * @state: The task state for sleeping
1674 : */
1675 0 : static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
1676 : struct ww_acquire_ctx *ww_ctx,
1677 : unsigned int state)
1678 : {
1679 : unsigned long flags;
1680 : int ret;
1681 :
1682 : /*
1683 : * Technically we could use raw_spin_[un]lock_irq() here, but this can
1684 : * be called in early boot if the cmpxchg() fast path is disabled
1685 : * (debug, no architecture support). In this case we will acquire the
1686 : * rtmutex with lock->wait_lock held. But we cannot unconditionally
1687 : * enable interrupts in that early boot case. So we need to use the
1688 : * irqsave/restore variants.
1689 : */
1690 0 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1691 0 : ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
1692 0 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1693 :
1694 0 : return ret;
1695 : }
1696 :
1697 : static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
1698 : unsigned int state)
1699 : {
1700 0 : if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
1701 : return 0;
1702 :
1703 0 : return rt_mutex_slowlock(lock, NULL, state);
1704 : }
1705 : #endif /* RT_MUTEX_BUILD_MUTEX */
1706 :
1707 : #ifdef RT_MUTEX_BUILD_SPINLOCKS
1708 : /*
1709 : * Functions required for spin/rw_lock substitution on RT kernels
1710 : */
1711 :
1712 : /**
1713 : * rtlock_slowlock_locked - Slow path lock acquisition for RT locks
1714 : * @lock: The underlying RT mutex
1715 : */
1716 : static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock)
1717 : {
1718 : struct rt_mutex_waiter waiter;
1719 : struct task_struct *owner;
1720 :
1721 : lockdep_assert_held(&lock->wait_lock);
1722 :
1723 : if (try_to_take_rt_mutex(lock, current, NULL))
1724 : return;
1725 :
1726 : rt_mutex_init_rtlock_waiter(&waiter);
1727 :
1728 : /* Save current state and set state to TASK_RTLOCK_WAIT */
1729 : current_save_and_set_rtlock_wait_state();
1730 :
1731 : trace_contention_begin(lock, LCB_F_RT);
1732 :
1733 : task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK);
1734 :
1735 : for (;;) {
1736 : /* Try to acquire the lock again */
1737 : if (try_to_take_rt_mutex(lock, current, &waiter))
1738 : break;
1739 :
1740 : if (&waiter == rt_mutex_top_waiter(lock))
1741 : owner = rt_mutex_owner(lock);
1742 : else
1743 : owner = NULL;
1744 : raw_spin_unlock_irq(&lock->wait_lock);
1745 :
1746 : if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner))
1747 : schedule_rtlock();
1748 :
1749 : raw_spin_lock_irq(&lock->wait_lock);
1750 : set_current_state(TASK_RTLOCK_WAIT);
1751 : }
1752 :
1753 : /* Restore the task state */
1754 : current_restore_rtlock_saved_state();
1755 :
1756 : /*
1757 : * try_to_take_rt_mutex() sets the waiter bit unconditionally.
1758 : * We might have to fix that up:
1759 : */
1760 : fixup_rt_mutex_waiters(lock, true);
1761 : debug_rt_mutex_free_waiter(&waiter);
1762 :
1763 : trace_contention_end(lock, 0);
1764 : }
1765 :
1766 : static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock)
1767 : {
1768 : unsigned long flags;
1769 :
1770 : raw_spin_lock_irqsave(&lock->wait_lock, flags);
1771 : rtlock_slowlock_locked(lock);
1772 : raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
1773 : }
1774 :
1775 : #endif /* RT_MUTEX_BUILD_SPINLOCKS */
|