LCOV - code coverage report
Current view: top level - kernel/futex - pi.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 278 0.0 %
Date: 2023-08-24 13:40:31 Functions: 0 15 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : 
       3             : #include <linux/slab.h>
       4             : #include <linux/sched/task.h>
       5             : 
       6             : #include "futex.h"
       7             : #include "../locking/rtmutex_common.h"
       8             : 
       9             : /*
      10             :  * PI code:
      11             :  */
      12           0 : int refill_pi_state_cache(void)
      13             : {
      14             :         struct futex_pi_state *pi_state;
      15             : 
      16           0 :         if (likely(current->pi_state_cache))
      17             :                 return 0;
      18             : 
      19           0 :         pi_state = kzalloc(sizeof(*pi_state), GFP_KERNEL);
      20             : 
      21           0 :         if (!pi_state)
      22             :                 return -ENOMEM;
      23             : 
      24           0 :         INIT_LIST_HEAD(&pi_state->list);
      25             :         /* pi_mutex gets initialized later */
      26           0 :         pi_state->owner = NULL;
      27           0 :         refcount_set(&pi_state->refcount, 1);
      28           0 :         pi_state->key = FUTEX_KEY_INIT;
      29             : 
      30           0 :         current->pi_state_cache = pi_state;
      31             : 
      32           0 :         return 0;
      33             : }
      34             : 
      35           0 : static struct futex_pi_state *alloc_pi_state(void)
      36             : {
      37           0 :         struct futex_pi_state *pi_state = current->pi_state_cache;
      38             : 
      39           0 :         WARN_ON(!pi_state);
      40           0 :         current->pi_state_cache = NULL;
      41             : 
      42           0 :         return pi_state;
      43             : }
      44             : 
      45           0 : static void pi_state_update_owner(struct futex_pi_state *pi_state,
      46             :                                   struct task_struct *new_owner)
      47             : {
      48           0 :         struct task_struct *old_owner = pi_state->owner;
      49             : 
      50             :         lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
      51             : 
      52           0 :         if (old_owner) {
      53           0 :                 raw_spin_lock(&old_owner->pi_lock);
      54           0 :                 WARN_ON(list_empty(&pi_state->list));
      55           0 :                 list_del_init(&pi_state->list);
      56           0 :                 raw_spin_unlock(&old_owner->pi_lock);
      57             :         }
      58             : 
      59           0 :         if (new_owner) {
      60           0 :                 raw_spin_lock(&new_owner->pi_lock);
      61           0 :                 WARN_ON(!list_empty(&pi_state->list));
      62           0 :                 list_add(&pi_state->list, &new_owner->pi_state_list);
      63           0 :                 pi_state->owner = new_owner;
      64           0 :                 raw_spin_unlock(&new_owner->pi_lock);
      65             :         }
      66           0 : }
      67             : 
      68           0 : void get_pi_state(struct futex_pi_state *pi_state)
      69             : {
      70           0 :         WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
      71           0 : }
      72             : 
      73             : /*
      74             :  * Drops a reference to the pi_state object and frees or caches it
      75             :  * when the last reference is gone.
      76             :  */
      77           0 : void put_pi_state(struct futex_pi_state *pi_state)
      78             : {
      79           0 :         if (!pi_state)
      80             :                 return;
      81             : 
      82           0 :         if (!refcount_dec_and_test(&pi_state->refcount))
      83             :                 return;
      84             : 
      85             :         /*
      86             :          * If pi_state->owner is NULL, the owner is most probably dying
      87             :          * and has cleaned up the pi_state already
      88             :          */
      89           0 :         if (pi_state->owner) {
      90             :                 unsigned long flags;
      91             : 
      92           0 :                 raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
      93           0 :                 pi_state_update_owner(pi_state, NULL);
      94           0 :                 rt_mutex_proxy_unlock(&pi_state->pi_mutex);
      95           0 :                 raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
      96             :         }
      97             : 
      98           0 :         if (current->pi_state_cache) {
      99           0 :                 kfree(pi_state);
     100             :         } else {
     101             :                 /*
     102             :                  * pi_state->list is already empty.
     103             :                  * clear pi_state->owner.
     104             :                  * refcount is at 0 - put it back to 1.
     105             :                  */
     106           0 :                 pi_state->owner = NULL;
     107           0 :                 refcount_set(&pi_state->refcount, 1);
     108           0 :                 current->pi_state_cache = pi_state;
     109             :         }
     110             : }
     111             : 
     112             : /*
     113             :  * We need to check the following states:
     114             :  *
     115             :  *      Waiter | pi_state | pi->owner | uTID      | uODIED | ?
     116             :  *
     117             :  * [1]  NULL   | ---      | ---       | 0         | 0/1    | Valid
     118             :  * [2]  NULL   | ---      | ---       | >0        | 0/1    | Valid
     119             :  *
     120             :  * [3]  Found  | NULL     | --        | Any       | 0/1    | Invalid
     121             :  *
     122             :  * [4]  Found  | Found    | NULL      | 0         | 1      | Valid
     123             :  * [5]  Found  | Found    | NULL      | >0        | 1      | Invalid
     124             :  *
     125             :  * [6]  Found  | Found    | task      | 0         | 1      | Valid
     126             :  *
     127             :  * [7]  Found  | Found    | NULL      | Any       | 0      | Invalid
     128             :  *
     129             :  * [8]  Found  | Found    | task      | ==taskTID | 0/1    | Valid
     130             :  * [9]  Found  | Found    | task      | 0         | 0      | Invalid
     131             :  * [10] Found  | Found    | task      | !=taskTID | 0/1    | Invalid
     132             :  *
     133             :  * [1]  Indicates that the kernel can acquire the futex atomically. We
     134             :  *      came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
     135             :  *
     136             :  * [2]  Valid, if TID does not belong to a kernel thread. If no matching
     137             :  *      thread is found then it indicates that the owner TID has died.
     138             :  *
     139             :  * [3]  Invalid. The waiter is queued on a non PI futex
     140             :  *
     141             :  * [4]  Valid state after exit_robust_list(), which sets the user space
     142             :  *      value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
     143             :  *
     144             :  * [5]  The user space value got manipulated between exit_robust_list()
     145             :  *      and exit_pi_state_list()
     146             :  *
     147             :  * [6]  Valid state after exit_pi_state_list() which sets the new owner in
     148             :  *      the pi_state but cannot access the user space value.
     149             :  *
     150             :  * [7]  pi_state->owner can only be NULL when the OWNER_DIED bit is set.
     151             :  *
     152             :  * [8]  Owner and user space value match
     153             :  *
     154             :  * [9]  There is no transient state which sets the user space TID to 0
     155             :  *      except exit_robust_list(), but this is indicated by the
     156             :  *      FUTEX_OWNER_DIED bit. See [4]
     157             :  *
     158             :  * [10] There is no transient state which leaves owner and user space
     159             :  *      TID out of sync. Except one error case where the kernel is denied
     160             :  *      write access to the user address, see fixup_pi_state_owner().
     161             :  *
     162             :  *
     163             :  * Serialization and lifetime rules:
     164             :  *
     165             :  * hb->lock:
     166             :  *
     167             :  *      hb -> futex_q, relation
     168             :  *      futex_q -> pi_state, relation
     169             :  *
     170             :  *      (cannot be raw because hb can contain arbitrary amount
     171             :  *       of futex_q's)
     172             :  *
     173             :  * pi_mutex->wait_lock:
     174             :  *
     175             :  *      {uval, pi_state}
     176             :  *
     177             :  *      (and pi_mutex 'obviously')
     178             :  *
     179             :  * p->pi_lock:
     180             :  *
     181             :  *      p->pi_state_list -> pi_state->list, relation
     182             :  *      pi_mutex->owner -> pi_state->owner, relation
     183             :  *
     184             :  * pi_state->refcount:
     185             :  *
     186             :  *      pi_state lifetime
     187             :  *
     188             :  *
     189             :  * Lock order:
     190             :  *
     191             :  *   hb->lock
     192             :  *     pi_mutex->wait_lock
     193             :  *       p->pi_lock
     194             :  *
     195             :  */
     196             : 
     197             : /*
     198             :  * Validate that the existing waiter has a pi_state and sanity check
     199             :  * the pi_state against the user space value. If correct, attach to
     200             :  * it.
     201             :  */
     202           0 : static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
     203             :                               struct futex_pi_state *pi_state,
     204             :                               struct futex_pi_state **ps)
     205             : {
     206           0 :         pid_t pid = uval & FUTEX_TID_MASK;
     207             :         u32 uval2;
     208             :         int ret;
     209             : 
     210             :         /*
     211             :          * Userspace might have messed up non-PI and PI futexes [3]
     212             :          */
     213           0 :         if (unlikely(!pi_state))
     214             :                 return -EINVAL;
     215             : 
     216             :         /*
     217             :          * We get here with hb->lock held, and having found a
     218             :          * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
     219             :          * has dropped the hb->lock in between futex_queue() and futex_unqueue_pi(),
     220             :          * which in turn means that futex_lock_pi() still has a reference on
     221             :          * our pi_state.
     222             :          *
     223             :          * The waiter holding a reference on @pi_state also protects against
     224             :          * the unlocked put_pi_state() in futex_unlock_pi(), futex_lock_pi()
     225             :          * and futex_wait_requeue_pi() as it cannot go to 0 and consequently
     226             :          * free pi_state before we can take a reference ourselves.
     227             :          */
     228           0 :         WARN_ON(!refcount_read(&pi_state->refcount));
     229             : 
     230             :         /*
     231             :          * Now that we have a pi_state, we can acquire wait_lock
     232             :          * and do the state validation.
     233             :          */
     234           0 :         raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
     235             : 
     236             :         /*
     237             :          * Since {uval, pi_state} is serialized by wait_lock, and our current
     238             :          * uval was read without holding it, it can have changed. Verify it
     239             :          * still is what we expect it to be, otherwise retry the entire
     240             :          * operation.
     241             :          */
     242           0 :         if (futex_get_value_locked(&uval2, uaddr))
     243             :                 goto out_efault;
     244             : 
     245           0 :         if (uval != uval2)
     246             :                 goto out_eagain;
     247             : 
     248             :         /*
     249             :          * Handle the owner died case:
     250             :          */
     251           0 :         if (uval & FUTEX_OWNER_DIED) {
     252             :                 /*
     253             :                  * exit_pi_state_list sets owner to NULL and wakes the
     254             :                  * topmost waiter. The task which acquires the
     255             :                  * pi_state->rt_mutex will fixup owner.
     256             :                  */
     257           0 :                 if (!pi_state->owner) {
     258             :                         /*
     259             :                          * No pi state owner, but the user space TID
     260             :                          * is not 0. Inconsistent state. [5]
     261             :                          */
     262           0 :                         if (pid)
     263             :                                 goto out_einval;
     264             :                         /*
     265             :                          * Take a ref on the state and return success. [4]
     266             :                          */
     267             :                         goto out_attach;
     268             :                 }
     269             : 
     270             :                 /*
     271             :                  * If TID is 0, then either the dying owner has not
     272             :                  * yet executed exit_pi_state_list() or some waiter
     273             :                  * acquired the rtmutex in the pi state, but did not
     274             :                  * yet fixup the TID in user space.
     275             :                  *
     276             :                  * Take a ref on the state and return success. [6]
     277             :                  */
     278           0 :                 if (!pid)
     279             :                         goto out_attach;
     280             :         } else {
     281             :                 /*
     282             :                  * If the owner died bit is not set, then the pi_state
     283             :                  * must have an owner. [7]
     284             :                  */
     285           0 :                 if (!pi_state->owner)
     286             :                         goto out_einval;
     287             :         }
     288             : 
     289             :         /*
     290             :          * Bail out if user space manipulated the futex value. If pi
     291             :          * state exists then the owner TID must be the same as the
     292             :          * user space TID. [9/10]
     293             :          */
     294           0 :         if (pid != task_pid_vnr(pi_state->owner))
     295             :                 goto out_einval;
     296             : 
     297             : out_attach:
     298           0 :         get_pi_state(pi_state);
     299           0 :         raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
     300           0 :         *ps = pi_state;
     301           0 :         return 0;
     302             : 
     303             : out_einval:
     304             :         ret = -EINVAL;
     305             :         goto out_error;
     306             : 
     307             : out_eagain:
     308             :         ret = -EAGAIN;
     309             :         goto out_error;
     310             : 
     311             : out_efault:
     312             :         ret = -EFAULT;
     313             :         goto out_error;
     314             : 
     315             : out_error:
     316           0 :         raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
     317           0 :         return ret;
     318             : }
     319             : 
     320           0 : static int handle_exit_race(u32 __user *uaddr, u32 uval,
     321             :                             struct task_struct *tsk)
     322             : {
     323             :         u32 uval2;
     324             : 
     325             :         /*
     326             :          * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
     327             :          * caller that the alleged owner is busy.
     328             :          */
     329           0 :         if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
     330             :                 return -EBUSY;
     331             : 
     332             :         /*
     333             :          * Reread the user space value to handle the following situation:
     334             :          *
     335             :          * CPU0                         CPU1
     336             :          *
     337             :          * sys_exit()                   sys_futex()
     338             :          *  do_exit()                    futex_lock_pi()
     339             :          *                                futex_lock_pi_atomic()
     340             :          *   exit_signals(tsk)              No waiters:
     341             :          *    tsk->flags |= PF_EXITING;          *uaddr == 0x00000PID
     342             :          *  mm_release(tsk)                 Set waiter bit
     343             :          *   exit_robust_list(tsk) {        *uaddr = 0x80000PID;
     344             :          *      Set owner died              attach_to_pi_owner() {
     345             :          *    *uaddr = 0xC0000000;           tsk = get_task(PID);
     346             :          *   }                               if (!tsk->flags & PF_EXITING) {
     347             :          *  ...                                attach();
     348             :          *  tsk->futex_state =               } else {
     349             :          *      FUTEX_STATE_DEAD;              if (tsk->futex_state !=
     350             :          *                                        FUTEX_STATE_DEAD)
     351             :          *                                       return -EAGAIN;
     352             :          *                                     return -ESRCH; <--- FAIL
     353             :          *                                   }
     354             :          *
     355             :          * Returning ESRCH unconditionally is wrong here because the
     356             :          * user space value has been changed by the exiting task.
     357             :          *
     358             :          * The same logic applies to the case where the exiting task is
     359             :          * already gone.
     360             :          */
     361           0 :         if (futex_get_value_locked(&uval2, uaddr))
     362             :                 return -EFAULT;
     363             : 
     364             :         /* If the user space value has changed, try again. */
     365           0 :         if (uval2 != uval)
     366             :                 return -EAGAIN;
     367             : 
     368             :         /*
     369             :          * The exiting task did not have a robust list, the robust list was
     370             :          * corrupted or the user space value in *uaddr is simply bogus.
     371             :          * Give up and tell user space.
     372             :          */
     373           0 :         return -ESRCH;
     374             : }
     375             : 
     376           0 : static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
     377             :                                  struct futex_pi_state **ps)
     378             : {
     379             :         /*
     380             :          * No existing pi state. First waiter. [2]
     381             :          *
     382             :          * This creates pi_state, we have hb->lock held, this means nothing can
     383             :          * observe this state, wait_lock is irrelevant.
     384             :          */
     385           0 :         struct futex_pi_state *pi_state = alloc_pi_state();
     386             : 
     387             :         /*
     388             :          * Initialize the pi_mutex in locked state and make @p
     389             :          * the owner of it:
     390             :          */
     391           0 :         rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
     392             : 
     393             :         /* Store the key for possible exit cleanups: */
     394           0 :         pi_state->key = *key;
     395             : 
     396           0 :         WARN_ON(!list_empty(&pi_state->list));
     397           0 :         list_add(&pi_state->list, &p->pi_state_list);
     398             :         /*
     399             :          * Assignment without holding pi_state->pi_mutex.wait_lock is safe
     400             :          * because there is no concurrency as the object is not published yet.
     401             :          */
     402           0 :         pi_state->owner = p;
     403             : 
     404           0 :         *ps = pi_state;
     405           0 : }
     406             : /*
     407             :  * Lookup the task for the TID provided from user space and attach to
     408             :  * it after doing proper sanity checks.
     409             :  */
     410           0 : static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
     411             :                               struct futex_pi_state **ps,
     412             :                               struct task_struct **exiting)
     413             : {
     414           0 :         pid_t pid = uval & FUTEX_TID_MASK;
     415             :         struct task_struct *p;
     416             : 
     417             :         /*
     418             :          * We are the first waiter - try to look up the real owner and attach
     419             :          * the new pi_state to it, but bail out when TID = 0 [1]
     420             :          *
     421             :          * The !pid check is paranoid. None of the call sites should end up
     422             :          * with pid == 0, but better safe than sorry. Let the caller retry
     423             :          */
     424           0 :         if (!pid)
     425             :                 return -EAGAIN;
     426           0 :         p = find_get_task_by_vpid(pid);
     427           0 :         if (!p)
     428           0 :                 return handle_exit_race(uaddr, uval, NULL);
     429             : 
     430           0 :         if (unlikely(p->flags & PF_KTHREAD)) {
     431           0 :                 put_task_struct(p);
     432           0 :                 return -EPERM;
     433             :         }
     434             : 
     435             :         /*
     436             :          * We need to look at the task state to figure out, whether the
     437             :          * task is exiting. To protect against the change of the task state
     438             :          * in futex_exit_release(), we do this protected by p->pi_lock:
     439             :          */
     440           0 :         raw_spin_lock_irq(&p->pi_lock);
     441           0 :         if (unlikely(p->futex_state != FUTEX_STATE_OK)) {
     442             :                 /*
     443             :                  * The task is on the way out. When the futex state is
     444             :                  * FUTEX_STATE_DEAD, we know that the task has finished
     445             :                  * the cleanup:
     446             :                  */
     447           0 :                 int ret = handle_exit_race(uaddr, uval, p);
     448             : 
     449           0 :                 raw_spin_unlock_irq(&p->pi_lock);
     450             :                 /*
     451             :                  * If the owner task is between FUTEX_STATE_EXITING and
     452             :                  * FUTEX_STATE_DEAD then store the task pointer and keep
     453             :                  * the reference on the task struct. The calling code will
     454             :                  * drop all locks, wait for the task to reach
     455             :                  * FUTEX_STATE_DEAD and then drop the refcount. This is
     456             :                  * required to prevent a live lock when the current task
     457             :                  * preempted the exiting task between the two states.
     458             :                  */
     459           0 :                 if (ret == -EBUSY)
     460           0 :                         *exiting = p;
     461             :                 else
     462           0 :                         put_task_struct(p);
     463             :                 return ret;
     464             :         }
     465             : 
     466           0 :         __attach_to_pi_owner(p, key, ps);
     467           0 :         raw_spin_unlock_irq(&p->pi_lock);
     468             : 
     469           0 :         put_task_struct(p);
     470             : 
     471           0 :         return 0;
     472             : }
     473             : 
     474             : static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
     475             : {
     476             :         int err;
     477             :         u32 curval;
     478             : 
     479           0 :         if (unlikely(should_fail_futex(true)))
     480             :                 return -EFAULT;
     481             : 
     482           0 :         err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
     483           0 :         if (unlikely(err))
     484             :                 return err;
     485             : 
     486             :         /* If user space value changed, let the caller retry */
     487           0 :         return curval != uval ? -EAGAIN : 0;
     488             : }
     489             : 
     490             : /**
     491             :  * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex
     492             :  * @uaddr:              the pi futex user address
     493             :  * @hb:                 the pi futex hash bucket
     494             :  * @key:                the futex key associated with uaddr and hb
     495             :  * @ps:                 the pi_state pointer where we store the result of the
     496             :  *                      lookup
     497             :  * @task:               the task to perform the atomic lock work for.  This will
     498             :  *                      be "current" except in the case of requeue pi.
     499             :  * @exiting:            Pointer to store the task pointer of the owner task
     500             :  *                      which is in the middle of exiting
     501             :  * @set_waiters:        force setting the FUTEX_WAITERS bit (1) or not (0)
     502             :  *
     503             :  * Return:
     504             :  *  -  0 - ready to wait;
     505             :  *  -  1 - acquired the lock;
     506             :  *  - <0 - error
     507             :  *
     508             :  * The hb->lock must be held by the caller.
     509             :  *
     510             :  * @exiting is only set when the return value is -EBUSY. If so, this holds
     511             :  * a refcount on the exiting task on return and the caller needs to drop it
     512             :  * after waiting for the exit to complete.
     513             :  */
     514           0 : int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
     515             :                          union futex_key *key,
     516             :                          struct futex_pi_state **ps,
     517             :                          struct task_struct *task,
     518             :                          struct task_struct **exiting,
     519             :                          int set_waiters)
     520             : {
     521           0 :         u32 uval, newval, vpid = task_pid_vnr(task);
     522             :         struct futex_q *top_waiter;
     523             :         int ret;
     524             : 
     525             :         /*
     526             :          * Read the user space value first so we can validate a few
     527             :          * things before proceeding further.
     528             :          */
     529           0 :         if (futex_get_value_locked(&uval, uaddr))
     530             :                 return -EFAULT;
     531             : 
     532           0 :         if (unlikely(should_fail_futex(true)))
     533             :                 return -EFAULT;
     534             : 
     535             :         /*
     536             :          * Detect deadlocks.
     537             :          */
     538           0 :         if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
     539             :                 return -EDEADLK;
     540             : 
     541           0 :         if ((unlikely(should_fail_futex(true))))
     542             :                 return -EDEADLK;
     543             : 
     544             :         /*
     545             :          * Lookup existing state first. If it exists, try to attach to
     546             :          * its pi_state.
     547             :          */
     548           0 :         top_waiter = futex_top_waiter(hb, key);
     549           0 :         if (top_waiter)
     550           0 :                 return attach_to_pi_state(uaddr, uval, top_waiter->pi_state, ps);
     551             : 
     552             :         /*
     553             :          * No waiter and user TID is 0. We are here because the
     554             :          * waiters or the owner died bit is set or called from
     555             :          * requeue_cmp_pi or for whatever reason something took the
     556             :          * syscall.
     557             :          */
     558           0 :         if (!(uval & FUTEX_TID_MASK)) {
     559             :                 /*
     560             :                  * We take over the futex. No other waiters and the user space
     561             :                  * TID is 0. We preserve the owner died bit.
     562             :                  */
     563           0 :                 newval = uval & FUTEX_OWNER_DIED;
     564           0 :                 newval |= vpid;
     565             : 
     566             :                 /* The futex requeue_pi code can enforce the waiters bit */
     567           0 :                 if (set_waiters)
     568           0 :                         newval |= FUTEX_WAITERS;
     569             : 
     570           0 :                 ret = lock_pi_update_atomic(uaddr, uval, newval);
     571           0 :                 if (ret)
     572             :                         return ret;
     573             : 
     574             :                 /*
     575             :                  * If the waiter bit was requested the caller also needs PI
     576             :                  * state attached to the new owner of the user space futex.
     577             :                  *
     578             :                  * @task is guaranteed to be alive and it cannot be exiting
     579             :                  * because it is either sleeping or waiting in
     580             :                  * futex_requeue_pi_wakeup_sync().
     581             :                  *
     582             :                  * No need to do the full attach_to_pi_owner() exercise
     583             :                  * because @task is known and valid.
     584             :                  */
     585           0 :                 if (set_waiters) {
     586           0 :                         raw_spin_lock_irq(&task->pi_lock);
     587           0 :                         __attach_to_pi_owner(task, key, ps);
     588           0 :                         raw_spin_unlock_irq(&task->pi_lock);
     589             :                 }
     590             :                 return 1;
     591             :         }
     592             : 
     593             :         /*
     594             :          * First waiter. Set the waiters bit before attaching ourself to
     595             :          * the owner. If owner tries to unlock, it will be forced into
     596             :          * the kernel and blocked on hb->lock.
     597             :          */
     598           0 :         newval = uval | FUTEX_WAITERS;
     599           0 :         ret = lock_pi_update_atomic(uaddr, uval, newval);
     600           0 :         if (ret)
     601             :                 return ret;
     602             :         /*
     603             :          * If the update of the user space value succeeded, we try to
     604             :          * attach to the owner. If that fails, no harm done, we only
     605             :          * set the FUTEX_WAITERS bit in the user space variable.
     606             :          */
     607           0 :         return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
     608             : }
     609             : 
     610             : /*
     611             :  * Caller must hold a reference on @pi_state.
     612             :  */
     613           0 : static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
     614             : {
     615             :         struct rt_mutex_waiter *top_waiter;
     616             :         struct task_struct *new_owner;
     617           0 :         bool postunlock = false;
     618           0 :         DEFINE_RT_WAKE_Q(wqh);
     619             :         u32 curval, newval;
     620           0 :         int ret = 0;
     621             : 
     622           0 :         top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
     623           0 :         if (WARN_ON_ONCE(!top_waiter)) {
     624             :                 /*
     625             :                  * As per the comment in futex_unlock_pi() this should not happen.
     626             :                  *
     627             :                  * When this happens, give up our locks and try again, giving
     628             :                  * the futex_lock_pi() instance time to complete, either by
     629             :                  * waiting on the rtmutex or removing itself from the futex
     630             :                  * queue.
     631             :                  */
     632             :                 ret = -EAGAIN;
     633             :                 goto out_unlock;
     634             :         }
     635             : 
     636           0 :         new_owner = top_waiter->task;
     637             : 
     638             :         /*
     639             :          * We pass it to the next owner. The WAITERS bit is always kept
     640             :          * enabled while there is PI state around. We cleanup the owner
     641             :          * died bit, because we are the owner.
     642             :          */
     643           0 :         newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
     644             : 
     645           0 :         if (unlikely(should_fail_futex(true))) {
     646             :                 ret = -EFAULT;
     647             :                 goto out_unlock;
     648             :         }
     649             : 
     650           0 :         ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
     651           0 :         if (!ret && (curval != uval)) {
     652             :                 /*
     653             :                  * If a unconditional UNLOCK_PI operation (user space did not
     654             :                  * try the TID->0 transition) raced with a waiter setting the
     655             :                  * FUTEX_WAITERS flag between get_user() and locking the hash
     656             :                  * bucket lock, retry the operation.
     657             :                  */
     658           0 :                 if ((FUTEX_TID_MASK & curval) == uval)
     659             :                         ret = -EAGAIN;
     660             :                 else
     661           0 :                         ret = -EINVAL;
     662             :         }
     663             : 
     664           0 :         if (!ret) {
     665             :                 /*
     666             :                  * This is a point of no return; once we modified the uval
     667             :                  * there is no going back and subsequent operations must
     668             :                  * not fail.
     669             :                  */
     670           0 :                 pi_state_update_owner(pi_state, new_owner);
     671           0 :                 postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wqh);
     672             :         }
     673             : 
     674             : out_unlock:
     675           0 :         raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
     676             : 
     677           0 :         if (postunlock)
     678           0 :                 rt_mutex_postunlock(&wqh);
     679             : 
     680           0 :         return ret;
     681             : }
     682             : 
     683           0 : static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
     684             :                                   struct task_struct *argowner)
     685             : {
     686           0 :         struct futex_pi_state *pi_state = q->pi_state;
     687             :         struct task_struct *oldowner, *newowner;
     688             :         u32 uval, curval, newval, newtid;
     689           0 :         int err = 0;
     690             : 
     691           0 :         oldowner = pi_state->owner;
     692             : 
     693             :         /*
     694             :          * We are here because either:
     695             :          *
     696             :          *  - we stole the lock and pi_state->owner needs updating to reflect
     697             :          *    that (@argowner == current),
     698             :          *
     699             :          * or:
     700             :          *
     701             :          *  - someone stole our lock and we need to fix things to point to the
     702             :          *    new owner (@argowner == NULL).
     703             :          *
     704             :          * Either way, we have to replace the TID in the user space variable.
     705             :          * This must be atomic as we have to preserve the owner died bit here.
     706             :          *
     707             :          * Note: We write the user space value _before_ changing the pi_state
     708             :          * because we can fault here. Imagine swapped out pages or a fork
     709             :          * that marked all the anonymous memory readonly for cow.
     710             :          *
     711             :          * Modifying pi_state _before_ the user space value would leave the
     712             :          * pi_state in an inconsistent state when we fault here, because we
     713             :          * need to drop the locks to handle the fault. This might be observed
     714             :          * in the PID checks when attaching to PI state .
     715             :          */
     716             : retry:
     717           0 :         if (!argowner) {
     718           0 :                 if (oldowner != current) {
     719             :                         /*
     720             :                          * We raced against a concurrent self; things are
     721             :                          * already fixed up. Nothing to do.
     722             :                          */
     723             :                         return 0;
     724             :                 }
     725             : 
     726           0 :                 if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
     727             :                         /* We got the lock. pi_state is correct. Tell caller. */
     728             :                         return 1;
     729             :                 }
     730             : 
     731             :                 /*
     732             :                  * The trylock just failed, so either there is an owner or
     733             :                  * there is a higher priority waiter than this one.
     734             :                  */
     735           0 :                 newowner = rt_mutex_owner(&pi_state->pi_mutex);
     736             :                 /*
     737             :                  * If the higher priority waiter has not yet taken over the
     738             :                  * rtmutex then newowner is NULL. We can't return here with
     739             :                  * that state because it's inconsistent vs. the user space
     740             :                  * state. So drop the locks and try again. It's a valid
     741             :                  * situation and not any different from the other retry
     742             :                  * conditions.
     743             :                  */
     744           0 :                 if (unlikely(!newowner)) {
     745             :                         err = -EAGAIN;
     746             :                         goto handle_err;
     747             :                 }
     748             :         } else {
     749           0 :                 WARN_ON_ONCE(argowner != current);
     750           0 :                 if (oldowner == current) {
     751             :                         /*
     752             :                          * We raced against a concurrent self; things are
     753             :                          * already fixed up. Nothing to do.
     754             :                          */
     755             :                         return 1;
     756             :                 }
     757             :                 newowner = argowner;
     758             :         }
     759             : 
     760           0 :         newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
     761             :         /* Owner died? */
     762           0 :         if (!pi_state->owner)
     763           0 :                 newtid |= FUTEX_OWNER_DIED;
     764             : 
     765           0 :         err = futex_get_value_locked(&uval, uaddr);
     766           0 :         if (err)
     767             :                 goto handle_err;
     768             : 
     769             :         for (;;) {
     770           0 :                 newval = (uval & FUTEX_OWNER_DIED) | newtid;
     771             : 
     772           0 :                 err = futex_cmpxchg_value_locked(&curval, uaddr, uval, newval);
     773           0 :                 if (err)
     774             :                         goto handle_err;
     775             : 
     776           0 :                 if (curval == uval)
     777             :                         break;
     778           0 :                 uval = curval;
     779             :         }
     780             : 
     781             :         /*
     782             :          * We fixed up user space. Now we need to fix the pi_state
     783             :          * itself.
     784             :          */
     785           0 :         pi_state_update_owner(pi_state, newowner);
     786             : 
     787           0 :         return argowner == current;
     788             : 
     789             :         /*
     790             :          * In order to reschedule or handle a page fault, we need to drop the
     791             :          * locks here. In the case of a fault, this gives the other task
     792             :          * (either the highest priority waiter itself or the task which stole
     793             :          * the rtmutex) the chance to try the fixup of the pi_state. So once we
     794             :          * are back from handling the fault we need to check the pi_state after
     795             :          * reacquiring the locks and before trying to do another fixup. When
     796             :          * the fixup has been done already we simply return.
     797             :          *
     798             :          * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
     799             :          * drop hb->lock since the caller owns the hb -> futex_q relation.
     800             :          * Dropping the pi_mutex->wait_lock requires the state revalidate.
     801             :          */
     802             : handle_err:
     803           0 :         raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
     804           0 :         spin_unlock(q->lock_ptr);
     805             : 
     806           0 :         switch (err) {
     807             :         case -EFAULT:
     808           0 :                 err = fault_in_user_writeable(uaddr);
     809             :                 break;
     810             : 
     811             :         case -EAGAIN:
     812           0 :                 cond_resched();
     813           0 :                 err = 0;
     814             :                 break;
     815             : 
     816             :         default:
     817           0 :                 WARN_ON_ONCE(1);
     818             :                 break;
     819             :         }
     820             : 
     821           0 :         spin_lock(q->lock_ptr);
     822           0 :         raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
     823             : 
     824             :         /*
     825             :          * Check if someone else fixed it for us:
     826             :          */
     827           0 :         if (pi_state->owner != oldowner)
     828           0 :                 return argowner == current;
     829             : 
     830             :         /* Retry if err was -EAGAIN or the fault in succeeded */
     831           0 :         if (!err)
     832             :                 goto retry;
     833             : 
     834             :         /*
     835             :          * fault_in_user_writeable() failed so user state is immutable. At
     836             :          * best we can make the kernel state consistent but user state will
     837             :          * be most likely hosed and any subsequent unlock operation will be
     838             :          * rejected due to PI futex rule [10].
     839             :          *
     840             :          * Ensure that the rtmutex owner is also the pi_state owner despite
     841             :          * the user space value claiming something different. There is no
     842             :          * point in unlocking the rtmutex if current is the owner as it
     843             :          * would need to wait until the next waiter has taken the rtmutex
     844             :          * to guarantee consistent state. Keep it simple. Userspace asked
     845             :          * for this wreckaged state.
     846             :          *
     847             :          * The rtmutex has an owner - either current or some other
     848             :          * task. See the EAGAIN loop above.
     849             :          */
     850           0 :         pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
     851             : 
     852             :         return err;
     853             : }
     854             : 
     855             : static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
     856             :                                 struct task_struct *argowner)
     857             : {
     858           0 :         struct futex_pi_state *pi_state = q->pi_state;
     859             :         int ret;
     860             : 
     861             :         lockdep_assert_held(q->lock_ptr);
     862             : 
     863           0 :         raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
     864           0 :         ret = __fixup_pi_state_owner(uaddr, q, argowner);
     865           0 :         raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
     866             :         return ret;
     867             : }
     868             : 
     869             : /**
     870             :  * fixup_pi_owner() - Post lock pi_state and corner case management
     871             :  * @uaddr:      user address of the futex
     872             :  * @q:          futex_q (contains pi_state and access to the rt_mutex)
     873             :  * @locked:     if the attempt to take the rt_mutex succeeded (1) or not (0)
     874             :  *
     875             :  * After attempting to lock an rt_mutex, this function is called to cleanup
     876             :  * the pi_state owner as well as handle race conditions that may allow us to
     877             :  * acquire the lock. Must be called with the hb lock held.
     878             :  *
     879             :  * Return:
     880             :  *  -  1 - success, lock taken;
     881             :  *  -  0 - success, lock not taken;
     882             :  *  - <0 - on error (-EFAULT)
     883             :  */
     884           0 : int fixup_pi_owner(u32 __user *uaddr, struct futex_q *q, int locked)
     885             : {
     886           0 :         if (locked) {
     887             :                 /*
     888             :                  * Got the lock. We might not be the anticipated owner if we
     889             :                  * did a lock-steal - fix up the PI-state in that case:
     890             :                  *
     891             :                  * Speculative pi_state->owner read (we don't hold wait_lock);
     892             :                  * since we own the lock pi_state->owner == current is the
     893             :                  * stable state, anything else needs more attention.
     894             :                  */
     895           0 :                 if (q->pi_state->owner != current)
     896           0 :                         return fixup_pi_state_owner(uaddr, q, current);
     897             :                 return 1;
     898             :         }
     899             : 
     900             :         /*
     901             :          * If we didn't get the lock; check if anybody stole it from us. In
     902             :          * that case, we need to fix up the uval to point to them instead of
     903             :          * us, otherwise bad things happen. [10]
     904             :          *
     905             :          * Another speculative read; pi_state->owner == current is unstable
     906             :          * but needs our attention.
     907             :          */
     908           0 :         if (q->pi_state->owner == current)
     909           0 :                 return fixup_pi_state_owner(uaddr, q, NULL);
     910             : 
     911             :         /*
     912             :          * Paranoia check. If we did not take the lock, then we should not be
     913             :          * the owner of the rt_mutex. Warn and establish consistent state.
     914             :          */
     915           0 :         if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
     916           0 :                 return fixup_pi_state_owner(uaddr, q, current);
     917             : 
     918             :         return 0;
     919             : }
     920             : 
     921             : /*
     922             :  * Userspace tried a 0 -> TID atomic transition of the futex value
     923             :  * and failed. The kernel side here does the whole locking operation:
     924             :  * if there are waiters then it will block as a consequence of relying
     925             :  * on rt-mutexes, it does PI, etc. (Due to races the kernel might see
     926             :  * a 0 value of the futex too.).
     927             :  *
     928             :  * Also serves as futex trylock_pi()'ing, and due semantics.
     929             :  */
     930           0 : int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock)
     931             : {
     932             :         struct hrtimer_sleeper timeout, *to;
     933           0 :         struct task_struct *exiting = NULL;
     934             :         struct rt_mutex_waiter rt_waiter;
     935             :         struct futex_hash_bucket *hb;
     936           0 :         struct futex_q q = futex_q_init;
     937             :         int res, ret;
     938             : 
     939             :         if (!IS_ENABLED(CONFIG_FUTEX_PI))
     940             :                 return -ENOSYS;
     941             : 
     942           0 :         if (refill_pi_state_cache())
     943             :                 return -ENOMEM;
     944             : 
     945           0 :         to = futex_setup_timer(time, &timeout, flags, 0);
     946             : 
     947             : retry:
     948           0 :         ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
     949           0 :         if (unlikely(ret != 0))
     950             :                 goto out;
     951             : 
     952             : retry_private:
     953           0 :         hb = futex_q_lock(&q);
     954             : 
     955           0 :         ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
     956             :                                    &exiting, 0);
     957           0 :         if (unlikely(ret)) {
     958             :                 /*
     959             :                  * Atomic work succeeded and we got the lock,
     960             :                  * or failed. Either way, we do _not_ block.
     961             :                  */
     962           0 :                 switch (ret) {
     963             :                 case 1:
     964             :                         /* We got the lock. */
     965           0 :                         ret = 0;
     966           0 :                         goto out_unlock_put_key;
     967             :                 case -EFAULT:
     968             :                         goto uaddr_faulted;
     969             :                 case -EBUSY:
     970             :                 case -EAGAIN:
     971             :                         /*
     972             :                          * Two reasons for this:
     973             :                          * - EBUSY: Task is exiting and we just wait for the
     974             :                          *   exit to complete.
     975             :                          * - EAGAIN: The user space value changed.
     976             :                          */
     977           0 :                         futex_q_unlock(hb);
     978             :                         /*
     979             :                          * Handle the case where the owner is in the middle of
     980             :                          * exiting. Wait for the exit to complete otherwise
     981             :                          * this task might loop forever, aka. live lock.
     982             :                          */
     983           0 :                         wait_for_owner_exiting(ret, exiting);
     984           0 :                         cond_resched();
     985           0 :                         goto retry;
     986             :                 default:
     987             :                         goto out_unlock_put_key;
     988             :                 }
     989             :         }
     990             : 
     991           0 :         WARN_ON(!q.pi_state);
     992             : 
     993             :         /*
     994             :          * Only actually queue now that the atomic ops are done:
     995             :          */
     996           0 :         __futex_queue(&q, hb);
     997             : 
     998           0 :         if (trylock) {
     999           0 :                 ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
    1000             :                 /* Fixup the trylock return value: */
    1001           0 :                 ret = ret ? 0 : -EWOULDBLOCK;
    1002             :                 goto no_block;
    1003             :         }
    1004             : 
    1005           0 :         rt_mutex_init_waiter(&rt_waiter);
    1006             : 
    1007             :         /*
    1008             :          * On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
    1009             :          * hold it while doing rt_mutex_start_proxy(), because then it will
    1010             :          * include hb->lock in the blocking chain, even through we'll not in
    1011             :          * fact hold it while blocking. This will lead it to report -EDEADLK
    1012             :          * and BUG when futex_unlock_pi() interleaves with this.
    1013             :          *
    1014             :          * Therefore acquire wait_lock while holding hb->lock, but drop the
    1015             :          * latter before calling __rt_mutex_start_proxy_lock(). This
    1016             :          * interleaves with futex_unlock_pi() -- which does a similar lock
    1017             :          * handoff -- such that the latter can observe the futex_q::pi_state
    1018             :          * before __rt_mutex_start_proxy_lock() is done.
    1019             :          */
    1020           0 :         raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
    1021           0 :         spin_unlock(q.lock_ptr);
    1022             :         /*
    1023             :          * __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
    1024             :          * such that futex_unlock_pi() is guaranteed to observe the waiter when
    1025             :          * it sees the futex_q::pi_state.
    1026             :          */
    1027           0 :         ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current);
    1028           0 :         raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock);
    1029             : 
    1030           0 :         if (ret) {
    1031           0 :                 if (ret == 1)
    1032           0 :                         ret = 0;
    1033             :                 goto cleanup;
    1034             :         }
    1035             : 
    1036           0 :         if (unlikely(to))
    1037           0 :                 hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
    1038             : 
    1039           0 :         ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
    1040             : 
    1041             : cleanup:
    1042           0 :         spin_lock(q.lock_ptr);
    1043             :         /*
    1044             :          * If we failed to acquire the lock (deadlock/signal/timeout), we must
    1045             :          * first acquire the hb->lock before removing the lock from the
    1046             :          * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
    1047             :          * lists consistent.
    1048             :          *
    1049             :          * In particular; it is important that futex_unlock_pi() can not
    1050             :          * observe this inconsistency.
    1051             :          */
    1052           0 :         if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
    1053           0 :                 ret = 0;
    1054             : 
    1055             : no_block:
    1056             :         /*
    1057             :          * Fixup the pi_state owner and possibly acquire the lock if we
    1058             :          * haven't already.
    1059             :          */
    1060           0 :         res = fixup_pi_owner(uaddr, &q, !ret);
    1061             :         /*
    1062             :          * If fixup_pi_owner() returned an error, propagate that.  If it acquired
    1063             :          * the lock, clear our -ETIMEDOUT or -EINTR.
    1064             :          */
    1065           0 :         if (res)
    1066           0 :                 ret = (res < 0) ? res : 0;
    1067             : 
    1068           0 :         futex_unqueue_pi(&q);
    1069           0 :         spin_unlock(q.lock_ptr);
    1070             :         goto out;
    1071             : 
    1072             : out_unlock_put_key:
    1073           0 :         futex_q_unlock(hb);
    1074             : 
    1075             : out:
    1076           0 :         if (to) {
    1077           0 :                 hrtimer_cancel(&to->timer);
    1078           0 :                 destroy_hrtimer_on_stack(&to->timer);
    1079             :         }
    1080           0 :         return ret != -EINTR ? ret : -ERESTARTNOINTR;
    1081             : 
    1082             : uaddr_faulted:
    1083           0 :         futex_q_unlock(hb);
    1084             : 
    1085           0 :         ret = fault_in_user_writeable(uaddr);
    1086           0 :         if (ret)
    1087             :                 goto out;
    1088             : 
    1089           0 :         if (!(flags & FLAGS_SHARED))
    1090             :                 goto retry_private;
    1091             : 
    1092             :         goto retry;
    1093             : }
    1094             : 
    1095             : /*
    1096             :  * Userspace attempted a TID -> 0 atomic transition, and failed.
    1097             :  * This is the in-kernel slowpath: we look up the PI state (if any),
    1098             :  * and do the rt-mutex unlock.
    1099             :  */
    1100           0 : int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
    1101             : {
    1102           0 :         u32 curval, uval, vpid = task_pid_vnr(current);
    1103           0 :         union futex_key key = FUTEX_KEY_INIT;
    1104             :         struct futex_hash_bucket *hb;
    1105             :         struct futex_q *top_waiter;
    1106             :         int ret;
    1107             : 
    1108             :         if (!IS_ENABLED(CONFIG_FUTEX_PI))
    1109             :                 return -ENOSYS;
    1110             : 
    1111             : retry:
    1112           0 :         if (get_user(uval, uaddr))
    1113             :                 return -EFAULT;
    1114             :         /*
    1115             :          * We release only a lock we actually own:
    1116             :          */
    1117           0 :         if ((uval & FUTEX_TID_MASK) != vpid)
    1118             :                 return -EPERM;
    1119             : 
    1120           0 :         ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
    1121           0 :         if (ret)
    1122             :                 return ret;
    1123             : 
    1124           0 :         hb = futex_hash(&key);
    1125           0 :         spin_lock(&hb->lock);
    1126             : 
    1127             :         /*
    1128             :          * Check waiters first. We do not trust user space values at
    1129             :          * all and we at least want to know if user space fiddled
    1130             :          * with the futex value instead of blindly unlocking.
    1131             :          */
    1132           0 :         top_waiter = futex_top_waiter(hb, &key);
    1133           0 :         if (top_waiter) {
    1134           0 :                 struct futex_pi_state *pi_state = top_waiter->pi_state;
    1135             : 
    1136           0 :                 ret = -EINVAL;
    1137           0 :                 if (!pi_state)
    1138             :                         goto out_unlock;
    1139             : 
    1140             :                 /*
    1141             :                  * If current does not own the pi_state then the futex is
    1142             :                  * inconsistent and user space fiddled with the futex value.
    1143             :                  */
    1144           0 :                 if (pi_state->owner != current)
    1145             :                         goto out_unlock;
    1146             : 
    1147           0 :                 get_pi_state(pi_state);
    1148             :                 /*
    1149             :                  * By taking wait_lock while still holding hb->lock, we ensure
    1150             :                  * there is no point where we hold neither; and therefore
    1151             :                  * wake_futex_p() must observe a state consistent with what we
    1152             :                  * observed.
    1153             :                  *
    1154             :                  * In particular; this forces __rt_mutex_start_proxy() to
    1155             :                  * complete such that we're guaranteed to observe the
    1156             :                  * rt_waiter. Also see the WARN in wake_futex_pi().
    1157             :                  */
    1158           0 :                 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
    1159           0 :                 spin_unlock(&hb->lock);
    1160             : 
    1161             :                 /* drops pi_state->pi_mutex.wait_lock */
    1162           0 :                 ret = wake_futex_pi(uaddr, uval, pi_state);
    1163             : 
    1164           0 :                 put_pi_state(pi_state);
    1165             : 
    1166             :                 /*
    1167             :                  * Success, we're done! No tricky corner cases.
    1168             :                  */
    1169           0 :                 if (!ret)
    1170             :                         return ret;
    1171             :                 /*
    1172             :                  * The atomic access to the futex value generated a
    1173             :                  * pagefault, so retry the user-access and the wakeup:
    1174             :                  */
    1175           0 :                 if (ret == -EFAULT)
    1176             :                         goto pi_faulted;
    1177             :                 /*
    1178             :                  * A unconditional UNLOCK_PI op raced against a waiter
    1179             :                  * setting the FUTEX_WAITERS bit. Try again.
    1180             :                  */
    1181           0 :                 if (ret == -EAGAIN)
    1182             :                         goto pi_retry;
    1183             :                 /*
    1184             :                  * wake_futex_pi has detected invalid state. Tell user
    1185             :                  * space.
    1186             :                  */
    1187             :                 return ret;
    1188             :         }
    1189             : 
    1190             :         /*
    1191             :          * We have no kernel internal state, i.e. no waiters in the
    1192             :          * kernel. Waiters which are about to queue themselves are stuck
    1193             :          * on hb->lock. So we can safely ignore them. We do neither
    1194             :          * preserve the WAITERS bit not the OWNER_DIED one. We are the
    1195             :          * owner.
    1196             :          */
    1197           0 :         if ((ret = futex_cmpxchg_value_locked(&curval, uaddr, uval, 0))) {
    1198           0 :                 spin_unlock(&hb->lock);
    1199           0 :                 switch (ret) {
    1200             :                 case -EFAULT:
    1201             :                         goto pi_faulted;
    1202             : 
    1203             :                 case -EAGAIN:
    1204             :                         goto pi_retry;
    1205             : 
    1206             :                 default:
    1207           0 :                         WARN_ON_ONCE(1);
    1208             :                         return ret;
    1209             :                 }
    1210             :         }
    1211             : 
    1212             :         /*
    1213             :          * If uval has changed, let user space handle it.
    1214             :          */
    1215           0 :         ret = (curval == uval) ? 0 : -EAGAIN;
    1216             : 
    1217             : out_unlock:
    1218           0 :         spin_unlock(&hb->lock);
    1219           0 :         return ret;
    1220             : 
    1221             : pi_retry:
    1222           0 :         cond_resched();
    1223           0 :         goto retry;
    1224             : 
    1225             : pi_faulted:
    1226             : 
    1227           0 :         ret = fault_in_user_writeable(uaddr);
    1228           0 :         if (!ret)
    1229             :                 goto retry;
    1230             : 
    1231             :         return ret;
    1232             : }
    1233             : 

Generated by: LCOV version 1.14