LCOV - code coverage report
Current view: top level - kernel/futex - waitwake.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 186 0.0 %
Date: 2023-07-19 18:55:55 Functions: 0 11 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : 
       3             : #include <linux/sched/task.h>
       4             : #include <linux/sched/signal.h>
       5             : #include <linux/freezer.h>
       6             : 
       7             : #include "futex.h"
       8             : 
       9             : /*
      10             :  * READ this before attempting to hack on futexes!
      11             :  *
      12             :  * Basic futex operation and ordering guarantees
      13             :  * =============================================
      14             :  *
      15             :  * The waiter reads the futex value in user space and calls
      16             :  * futex_wait(). This function computes the hash bucket and acquires
      17             :  * the hash bucket lock. After that it reads the futex user space value
      18             :  * again and verifies that the data has not changed. If it has not changed
      19             :  * it enqueues itself into the hash bucket, releases the hash bucket lock
      20             :  * and schedules.
      21             :  *
      22             :  * The waker side modifies the user space value of the futex and calls
      23             :  * futex_wake(). This function computes the hash bucket and acquires the
      24             :  * hash bucket lock. Then it looks for waiters on that futex in the hash
      25             :  * bucket and wakes them.
      26             :  *
      27             :  * In futex wake up scenarios where no tasks are blocked on a futex, taking
      28             :  * the hb spinlock can be avoided and simply return. In order for this
      29             :  * optimization to work, ordering guarantees must exist so that the waiter
      30             :  * being added to the list is acknowledged when the list is concurrently being
      31             :  * checked by the waker, avoiding scenarios like the following:
      32             :  *
      33             :  * CPU 0                               CPU 1
      34             :  * val = *futex;
      35             :  * sys_futex(WAIT, futex, val);
      36             :  *   futex_wait(futex, val);
      37             :  *   uval = *futex;
      38             :  *                                     *futex = newval;
      39             :  *                                     sys_futex(WAKE, futex);
      40             :  *                                       futex_wake(futex);
      41             :  *                                       if (queue_empty())
      42             :  *                                         return;
      43             :  *   if (uval == val)
      44             :  *      lock(hash_bucket(futex));
      45             :  *      queue();
      46             :  *     unlock(hash_bucket(futex));
      47             :  *     schedule();
      48             :  *
      49             :  * This would cause the waiter on CPU 0 to wait forever because it
      50             :  * missed the transition of the user space value from val to newval
      51             :  * and the waker did not find the waiter in the hash bucket queue.
      52             :  *
      53             :  * The correct serialization ensures that a waiter either observes
      54             :  * the changed user space value before blocking or is woken by a
      55             :  * concurrent waker:
      56             :  *
      57             :  * CPU 0                                 CPU 1
      58             :  * val = *futex;
      59             :  * sys_futex(WAIT, futex, val);
      60             :  *   futex_wait(futex, val);
      61             :  *
      62             :  *   waiters++; (a)
      63             :  *   smp_mb(); (A) <-- paired with -.
      64             :  *                                  |
      65             :  *   lock(hash_bucket(futex));      |
      66             :  *                                  |
      67             :  *   uval = *futex;                 |
      68             :  *                                  |        *futex = newval;
      69             :  *                                  |        sys_futex(WAKE, futex);
      70             :  *                                  |          futex_wake(futex);
      71             :  *                                  |
      72             :  *                                  `--------> smp_mb(); (B)
      73             :  *   if (uval == val)
      74             :  *     queue();
      75             :  *     unlock(hash_bucket(futex));
      76             :  *     schedule();                         if (waiters)
      77             :  *                                           lock(hash_bucket(futex));
      78             :  *   else                                    wake_waiters(futex);
      79             :  *     waiters--; (b)                        unlock(hash_bucket(futex));
      80             :  *
      81             :  * Where (A) orders the waiters increment and the futex value read through
      82             :  * atomic operations (see futex_hb_waiters_inc) and where (B) orders the write
      83             :  * to futex and the waiters read (see futex_hb_waiters_pending()).
      84             :  *
      85             :  * This yields the following case (where X:=waiters, Y:=futex):
      86             :  *
      87             :  *      X = Y = 0
      88             :  *
      89             :  *      w[X]=1          w[Y]=1
      90             :  *      MB              MB
      91             :  *      r[Y]=y          r[X]=x
      92             :  *
      93             :  * Which guarantees that x==0 && y==0 is impossible; which translates back into
      94             :  * the guarantee that we cannot both miss the futex variable change and the
      95             :  * enqueue.
      96             :  *
      97             :  * Note that a new waiter is accounted for in (a) even when it is possible that
      98             :  * the wait call can return error, in which case we backtrack from it in (b).
      99             :  * Refer to the comment in futex_q_lock().
     100             :  *
     101             :  * Similarly, in order to account for waiters being requeued on another
     102             :  * address we always increment the waiters for the destination bucket before
     103             :  * acquiring the lock. It then decrements them again  after releasing it -
     104             :  * the code that actually moves the futex(es) between hash buckets (requeue_futex)
     105             :  * will do the additional required waiter count housekeeping. This is done for
     106             :  * double_lock_hb() and double_unlock_hb(), respectively.
     107             :  */
     108             : 
     109             : /*
     110             :  * The hash bucket lock must be held when this is called.
     111             :  * Afterwards, the futex_q must not be accessed. Callers
     112             :  * must ensure to later call wake_up_q() for the actual
     113             :  * wakeups to occur.
     114             :  */
     115           0 : void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
     116             : {
     117           0 :         struct task_struct *p = q->task;
     118             : 
     119           0 :         if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
     120             :                 return;
     121             : 
     122           0 :         get_task_struct(p);
     123           0 :         __futex_unqueue(q);
     124             :         /*
     125             :          * The waiting task can free the futex_q as soon as q->lock_ptr = NULL
     126             :          * is written, without taking any locks. This is possible in the event
     127             :          * of a spurious wakeup, for example. A memory barrier is required here
     128             :          * to prevent the following store to lock_ptr from getting ahead of the
     129             :          * plist_del in __futex_unqueue().
     130             :          */
     131           0 :         smp_store_release(&q->lock_ptr, NULL);
     132             : 
     133             :         /*
     134             :          * Queue the task for later wakeup for after we've released
     135             :          * the hb->lock.
     136             :          */
     137           0 :         wake_q_add_safe(wake_q, p);
     138             : }
     139             : 
     140             : /*
     141             :  * Wake up waiters matching bitset queued on this futex (uaddr).
     142             :  */
     143           0 : int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
     144             : {
     145             :         struct futex_hash_bucket *hb;
     146             :         struct futex_q *this, *next;
     147           0 :         union futex_key key = FUTEX_KEY_INIT;
     148             :         int ret;
     149           0 :         DEFINE_WAKE_Q(wake_q);
     150             : 
     151           0 :         if (!bitset)
     152             :                 return -EINVAL;
     153             : 
     154           0 :         ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
     155           0 :         if (unlikely(ret != 0))
     156             :                 return ret;
     157             : 
     158           0 :         hb = futex_hash(&key);
     159             : 
     160             :         /* Make sure we really have tasks to wakeup */
     161           0 :         if (!futex_hb_waiters_pending(hb))
     162             :                 return ret;
     163             : 
     164           0 :         spin_lock(&hb->lock);
     165             : 
     166           0 :         plist_for_each_entry_safe(this, next, &hb->chain, list) {
     167           0 :                 if (futex_match (&this->key, &key)) {
     168           0 :                         if (this->pi_state || this->rt_waiter) {
     169             :                                 ret = -EINVAL;
     170             :                                 break;
     171             :                         }
     172             : 
     173             :                         /* Check if one of the bits is set in both bitsets */
     174           0 :                         if (!(this->bitset & bitset))
     175           0 :                                 continue;
     176             : 
     177           0 :                         futex_wake_mark(&wake_q, this);
     178           0 :                         if (++ret >= nr_wake)
     179             :                                 break;
     180             :                 }
     181             :         }
     182             : 
     183           0 :         spin_unlock(&hb->lock);
     184           0 :         wake_up_q(&wake_q);
     185           0 :         return ret;
     186             : }
     187             : 
     188           0 : static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
     189             : {
     190           0 :         unsigned int op =         (encoded_op & 0x70000000) >> 28;
     191           0 :         unsigned int cmp =        (encoded_op & 0x0f000000) >> 24;
     192           0 :         int oparg = sign_extend32((encoded_op & 0x00fff000) >> 12, 11);
     193           0 :         int cmparg = sign_extend32(encoded_op & 0x00000fff, 11);
     194             :         int oldval, ret;
     195             : 
     196           0 :         if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
     197           0 :                 if (oparg < 0 || oparg > 31) {
     198             :                         char comm[sizeof(current->comm)];
     199             :                         /*
     200             :                          * kill this print and return -EINVAL when userspace
     201             :                          * is sane again
     202             :                          */
     203           0 :                         pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n",
     204             :                                         get_task_comm(comm, current), oparg);
     205           0 :                         oparg &= 31;
     206             :                 }
     207           0 :                 oparg = 1 << oparg;
     208             :         }
     209             : 
     210           0 :         pagefault_disable();
     211           0 :         ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr);
     212           0 :         pagefault_enable();
     213           0 :         if (ret)
     214             :                 return ret;
     215             : 
     216           0 :         switch (cmp) {
     217             :         case FUTEX_OP_CMP_EQ:
     218           0 :                 return oldval == cmparg;
     219             :         case FUTEX_OP_CMP_NE:
     220           0 :                 return oldval != cmparg;
     221             :         case FUTEX_OP_CMP_LT:
     222           0 :                 return oldval < cmparg;
     223             :         case FUTEX_OP_CMP_GE:
     224           0 :                 return oldval >= cmparg;
     225             :         case FUTEX_OP_CMP_LE:
     226           0 :                 return oldval <= cmparg;
     227             :         case FUTEX_OP_CMP_GT:
     228           0 :                 return oldval > cmparg;
     229             :         default:
     230             :                 return -ENOSYS;
     231             :         }
     232             : }
     233             : 
     234             : /*
     235             :  * Wake up all waiters hashed on the physical page that is mapped
     236             :  * to this virtual address:
     237             :  */
     238           0 : int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
     239             :                   int nr_wake, int nr_wake2, int op)
     240             : {
     241           0 :         union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
     242             :         struct futex_hash_bucket *hb1, *hb2;
     243             :         struct futex_q *this, *next;
     244             :         int ret, op_ret;
     245           0 :         DEFINE_WAKE_Q(wake_q);
     246             : 
     247             : retry:
     248           0 :         ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
     249           0 :         if (unlikely(ret != 0))
     250             :                 return ret;
     251           0 :         ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
     252           0 :         if (unlikely(ret != 0))
     253             :                 return ret;
     254             : 
     255           0 :         hb1 = futex_hash(&key1);
     256           0 :         hb2 = futex_hash(&key2);
     257             : 
     258             : retry_private:
     259           0 :         double_lock_hb(hb1, hb2);
     260           0 :         op_ret = futex_atomic_op_inuser(op, uaddr2);
     261           0 :         if (unlikely(op_ret < 0)) {
     262           0 :                 double_unlock_hb(hb1, hb2);
     263             : 
     264           0 :                 if (!IS_ENABLED(CONFIG_MMU) ||
     265           0 :                     unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
     266             :                         /*
     267             :                          * we don't get EFAULT from MMU faults if we don't have
     268             :                          * an MMU, but we might get them from range checking
     269             :                          */
     270             :                         ret = op_ret;
     271             :                         return ret;
     272             :                 }
     273             : 
     274           0 :                 if (op_ret == -EFAULT) {
     275           0 :                         ret = fault_in_user_writeable(uaddr2);
     276           0 :                         if (ret)
     277             :                                 return ret;
     278             :                 }
     279             : 
     280           0 :                 cond_resched();
     281           0 :                 if (!(flags & FLAGS_SHARED))
     282             :                         goto retry_private;
     283             :                 goto retry;
     284             :         }
     285             : 
     286           0 :         plist_for_each_entry_safe(this, next, &hb1->chain, list) {
     287           0 :                 if (futex_match (&this->key, &key1)) {
     288           0 :                         if (this->pi_state || this->rt_waiter) {
     289             :                                 ret = -EINVAL;
     290             :                                 goto out_unlock;
     291             :                         }
     292           0 :                         futex_wake_mark(&wake_q, this);
     293           0 :                         if (++ret >= nr_wake)
     294             :                                 break;
     295             :                 }
     296             :         }
     297             : 
     298           0 :         if (op_ret > 0) {
     299           0 :                 op_ret = 0;
     300           0 :                 plist_for_each_entry_safe(this, next, &hb2->chain, list) {
     301           0 :                         if (futex_match (&this->key, &key2)) {
     302           0 :                                 if (this->pi_state || this->rt_waiter) {
     303             :                                         ret = -EINVAL;
     304             :                                         goto out_unlock;
     305             :                                 }
     306           0 :                                 futex_wake_mark(&wake_q, this);
     307           0 :                                 if (++op_ret >= nr_wake2)
     308             :                                         break;
     309             :                         }
     310             :                 }
     311           0 :                 ret += op_ret;
     312             :         }
     313             : 
     314             : out_unlock:
     315           0 :         double_unlock_hb(hb1, hb2);
     316           0 :         wake_up_q(&wake_q);
     317           0 :         return ret;
     318             : }
     319             : 
     320             : static long futex_wait_restart(struct restart_block *restart);
     321             : 
     322             : /**
     323             :  * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
     324             :  * @hb:         the futex hash bucket, must be locked by the caller
     325             :  * @q:          the futex_q to queue up on
     326             :  * @timeout:    the prepared hrtimer_sleeper, or null for no timeout
     327             :  */
     328           0 : void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
     329             :                             struct hrtimer_sleeper *timeout)
     330             : {
     331             :         /*
     332             :          * The task state is guaranteed to be set before another task can
     333             :          * wake it. set_current_state() is implemented using smp_store_mb() and
     334             :          * futex_queue() calls spin_unlock() upon completion, both serializing
     335             :          * access to the hash list and forcing another memory barrier.
     336             :          */
     337           0 :         set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
     338           0 :         futex_queue(q, hb);
     339             : 
     340             :         /* Arm the timer */
     341           0 :         if (timeout)
     342           0 :                 hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
     343             : 
     344             :         /*
     345             :          * If we have been removed from the hash list, then another task
     346             :          * has tried to wake us, and we can skip the call to schedule().
     347             :          */
     348           0 :         if (likely(!plist_node_empty(&q->list))) {
     349             :                 /*
     350             :                  * If the timer has already expired, current will already be
     351             :                  * flagged for rescheduling. Only call schedule if there
     352             :                  * is no timeout, or if it has yet to expire.
     353             :                  */
     354           0 :                 if (!timeout || timeout->task)
     355           0 :                         schedule();
     356             :         }
     357           0 :         __set_current_state(TASK_RUNNING);
     358           0 : }
     359             : 
     360             : /**
     361             :  * unqueue_multiple - Remove various futexes from their hash bucket
     362             :  * @v:     The list of futexes to unqueue
     363             :  * @count: Number of futexes in the list
     364             :  *
     365             :  * Helper to unqueue a list of futexes. This can't fail.
     366             :  *
     367             :  * Return:
     368             :  *  - >=0 - Index of the last futex that was awoken;
     369             :  *  - -1  - No futex was awoken
     370             :  */
     371             : static int unqueue_multiple(struct futex_vector *v, int count)
     372             : {
     373             :         int ret = -1, i;
     374             : 
     375           0 :         for (i = 0; i < count; i++) {
     376           0 :                 if (!futex_unqueue(&v[i].q))
     377           0 :                         ret = i;
     378             :         }
     379             : 
     380             :         return ret;
     381             : }
     382             : 
     383             : /**
     384             :  * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
     385             :  * @vs:         The futex list to wait on
     386             :  * @count:      The size of the list
     387             :  * @woken:      Index of the last woken futex, if any. Used to notify the
     388             :  *              caller that it can return this index to userspace (return parameter)
     389             :  *
     390             :  * Prepare multiple futexes in a single step and enqueue them. This may fail if
     391             :  * the futex list is invalid or if any futex was already awoken. On success the
     392             :  * task is ready to interruptible sleep.
     393             :  *
     394             :  * Return:
     395             :  *  -  1 - One of the futexes was woken by another thread
     396             :  *  -  0 - Success
     397             :  *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
     398             :  */
     399           0 : static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
     400             : {
     401             :         struct futex_hash_bucket *hb;
     402           0 :         bool retry = false;
     403             :         int ret, i;
     404             :         u32 uval;
     405             : 
     406             :         /*
     407             :          * Enqueuing multiple futexes is tricky, because we need to enqueue
     408             :          * each futex on the list before dealing with the next one to avoid
     409             :          * deadlocking on the hash bucket. But, before enqueuing, we need to
     410             :          * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
     411             :          * lose any wake events, which cannot be done before the get_futex_key
     412             :          * of the next key, because it calls get_user_pages, which can sleep.
     413             :          * Thus, we fetch the list of futexes keys in two steps, by first
     414             :          * pinning all the memory keys in the futex key, and only then we read
     415             :          * each key and queue the corresponding futex.
     416             :          *
     417             :          * Private futexes doesn't need to recalculate hash in retry, so skip
     418             :          * get_futex_key() when retrying.
     419             :          */
     420             : retry:
     421           0 :         for (i = 0; i < count; i++) {
     422           0 :                 if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
     423           0 :                         continue;
     424             : 
     425           0 :                 ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
     426             :                                     !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
     427             :                                     &vs[i].q.key, FUTEX_READ);
     428             : 
     429           0 :                 if (unlikely(ret))
     430             :                         return ret;
     431             :         }
     432             : 
     433           0 :         set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
     434             : 
     435           0 :         for (i = 0; i < count; i++) {
     436           0 :                 u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
     437           0 :                 struct futex_q *q = &vs[i].q;
     438           0 :                 u32 val = (u32)vs[i].w.val;
     439             : 
     440           0 :                 hb = futex_q_lock(q);
     441           0 :                 ret = futex_get_value_locked(&uval, uaddr);
     442             : 
     443           0 :                 if (!ret && uval == val) {
     444             :                         /*
     445             :                          * The bucket lock can't be held while dealing with the
     446             :                          * next futex. Queue each futex at this moment so hb can
     447             :                          * be unlocked.
     448             :                          */
     449           0 :                         futex_queue(q, hb);
     450           0 :                         continue;
     451             :                 }
     452             : 
     453           0 :                 futex_q_unlock(hb);
     454           0 :                 __set_current_state(TASK_RUNNING);
     455             : 
     456             :                 /*
     457             :                  * Even if something went wrong, if we find out that a futex
     458             :                  * was woken, we don't return error and return this index to
     459             :                  * userspace
     460             :                  */
     461           0 :                 *woken = unqueue_multiple(vs, i);
     462           0 :                 if (*woken >= 0)
     463             :                         return 1;
     464             : 
     465           0 :                 if (ret) {
     466             :                         /*
     467             :                          * If we need to handle a page fault, we need to do so
     468             :                          * without any lock and any enqueued futex (otherwise
     469             :                          * we could lose some wakeup). So we do it here, after
     470             :                          * undoing all the work done so far. In success, we
     471             :                          * retry all the work.
     472             :                          */
     473           0 :                         if (get_user(uval, uaddr))
     474             :                                 return -EFAULT;
     475             : 
     476             :                         retry = true;
     477             :                         goto retry;
     478             :                 }
     479             : 
     480           0 :                 if (uval != val)
     481             :                         return -EWOULDBLOCK;
     482             :         }
     483             : 
     484             :         return 0;
     485             : }
     486             : 
     487             : /**
     488             :  * futex_sleep_multiple - Check sleeping conditions and sleep
     489             :  * @vs:    List of futexes to wait for
     490             :  * @count: Length of vs
     491             :  * @to:    Timeout
     492             :  *
     493             :  * Sleep if and only if the timeout hasn't expired and no futex on the list has
     494             :  * been woken up.
     495             :  */
     496           0 : static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
     497             :                                  struct hrtimer_sleeper *to)
     498             : {
     499           0 :         if (to && !to->task)
     500             :                 return;
     501             : 
     502           0 :         for (; count; count--, vs++) {
     503           0 :                 if (!READ_ONCE(vs->q.lock_ptr))
     504             :                         return;
     505             :         }
     506             : 
     507           0 :         schedule();
     508             : }
     509             : 
     510             : /**
     511             :  * futex_wait_multiple - Prepare to wait on and enqueue several futexes
     512             :  * @vs:         The list of futexes to wait on
     513             :  * @count:      The number of objects
     514             :  * @to:         Timeout before giving up and returning to userspace
     515             :  *
     516             :  * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
     517             :  * sleeps on a group of futexes and returns on the first futex that is
     518             :  * wake, or after the timeout has elapsed.
     519             :  *
     520             :  * Return:
     521             :  *  - >=0 - Hint to the futex that was awoken
     522             :  *  - <0  - On error
     523             :  */
     524           0 : int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
     525             :                         struct hrtimer_sleeper *to)
     526             : {
     527           0 :         int ret, hint = 0;
     528             : 
     529           0 :         if (to)
     530           0 :                 hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
     531             : 
     532             :         while (1) {
     533           0 :                 ret = futex_wait_multiple_setup(vs, count, &hint);
     534           0 :                 if (ret) {
     535           0 :                         if (ret > 0) {
     536             :                                 /* A futex was woken during setup */
     537           0 :                                 ret = hint;
     538             :                         }
     539             :                         return ret;
     540             :                 }
     541             : 
     542           0 :                 futex_sleep_multiple(vs, count, to);
     543             : 
     544           0 :                 __set_current_state(TASK_RUNNING);
     545             : 
     546           0 :                 ret = unqueue_multiple(vs, count);
     547           0 :                 if (ret >= 0)
     548             :                         return ret;
     549             : 
     550           0 :                 if (to && !to->task)
     551             :                         return -ETIMEDOUT;
     552           0 :                 else if (signal_pending(current))
     553             :                         return -ERESTARTSYS;
     554             :                 /*
     555             :                  * The final case is a spurious wakeup, for
     556             :                  * which just retry.
     557             :                  */
     558             :         }
     559             : }
     560             : 
     561             : /**
     562             :  * futex_wait_setup() - Prepare to wait on a futex
     563             :  * @uaddr:      the futex userspace address
     564             :  * @val:        the expected value
     565             :  * @flags:      futex flags (FLAGS_SHARED, etc.)
     566             :  * @q:          the associated futex_q
     567             :  * @hb:         storage for hash_bucket pointer to be returned to caller
     568             :  *
     569             :  * Setup the futex_q and locate the hash_bucket.  Get the futex value and
     570             :  * compare it with the expected value.  Handle atomic faults internally.
     571             :  * Return with the hb lock held on success, and unlocked on failure.
     572             :  *
     573             :  * Return:
     574             :  *  -  0 - uaddr contains val and hb has been locked;
     575             :  *  - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
     576             :  */
     577           0 : int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
     578             :                      struct futex_q *q, struct futex_hash_bucket **hb)
     579             : {
     580             :         u32 uval;
     581             :         int ret;
     582             : 
     583             :         /*
     584             :          * Access the page AFTER the hash-bucket is locked.
     585             :          * Order is important:
     586             :          *
     587             :          *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
     588             :          *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
     589             :          *
     590             :          * The basic logical guarantee of a futex is that it blocks ONLY
     591             :          * if cond(var) is known to be true at the time of blocking, for
     592             :          * any cond.  If we locked the hash-bucket after testing *uaddr, that
     593             :          * would open a race condition where we could block indefinitely with
     594             :          * cond(var) false, which would violate the guarantee.
     595             :          *
     596             :          * On the other hand, we insert q and release the hash-bucket only
     597             :          * after testing *uaddr.  This guarantees that futex_wait() will NOT
     598             :          * absorb a wakeup if *uaddr does not match the desired values
     599             :          * while the syscall executes.
     600             :          */
     601             : retry:
     602           0 :         ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
     603           0 :         if (unlikely(ret != 0))
     604             :                 return ret;
     605             : 
     606             : retry_private:
     607           0 :         *hb = futex_q_lock(q);
     608             : 
     609           0 :         ret = futex_get_value_locked(&uval, uaddr);
     610             : 
     611           0 :         if (ret) {
     612           0 :                 futex_q_unlock(*hb);
     613             : 
     614           0 :                 ret = get_user(uval, uaddr);
     615           0 :                 if (ret)
     616             :                         return ret;
     617             : 
     618           0 :                 if (!(flags & FLAGS_SHARED))
     619             :                         goto retry_private;
     620             : 
     621             :                 goto retry;
     622             :         }
     623             : 
     624           0 :         if (uval != val) {
     625           0 :                 futex_q_unlock(*hb);
     626           0 :                 ret = -EWOULDBLOCK;
     627             :         }
     628             : 
     629             :         return ret;
     630             : }
     631             : 
     632           0 : int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
     633             : {
     634             :         struct hrtimer_sleeper timeout, *to;
     635             :         struct restart_block *restart;
     636             :         struct futex_hash_bucket *hb;
     637           0 :         struct futex_q q = futex_q_init;
     638             :         int ret;
     639             : 
     640           0 :         if (!bitset)
     641             :                 return -EINVAL;
     642           0 :         q.bitset = bitset;
     643             : 
     644           0 :         to = futex_setup_timer(abs_time, &timeout, flags,
     645           0 :                                current->timer_slack_ns);
     646             : retry:
     647             :         /*
     648             :          * Prepare to wait on uaddr. On success, it holds hb->lock and q
     649             :          * is initialized.
     650             :          */
     651           0 :         ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
     652           0 :         if (ret)
     653             :                 goto out;
     654             : 
     655             :         /* futex_queue and wait for wakeup, timeout, or a signal. */
     656           0 :         futex_wait_queue(hb, &q, to);
     657             : 
     658             :         /* If we were woken (and unqueued), we succeeded, whatever. */
     659           0 :         ret = 0;
     660           0 :         if (!futex_unqueue(&q))
     661             :                 goto out;
     662           0 :         ret = -ETIMEDOUT;
     663           0 :         if (to && !to->task)
     664             :                 goto out;
     665             : 
     666             :         /*
     667             :          * We expect signal_pending(current), but we might be the
     668             :          * victim of a spurious wakeup as well.
     669             :          */
     670           0 :         if (!signal_pending(current))
     671             :                 goto retry;
     672             : 
     673           0 :         ret = -ERESTARTSYS;
     674           0 :         if (!abs_time)
     675             :                 goto out;
     676             : 
     677           0 :         restart = &current->restart_block;
     678           0 :         restart->futex.uaddr = uaddr;
     679           0 :         restart->futex.val = val;
     680           0 :         restart->futex.time = *abs_time;
     681           0 :         restart->futex.bitset = bitset;
     682           0 :         restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
     683             : 
     684           0 :         ret = set_restart_fn(restart, futex_wait_restart);
     685             : 
     686             : out:
     687           0 :         if (to) {
     688           0 :                 hrtimer_cancel(&to->timer);
     689           0 :                 destroy_hrtimer_on_stack(&to->timer);
     690             :         }
     691             :         return ret;
     692             : }
     693             : 
     694           0 : static long futex_wait_restart(struct restart_block *restart)
     695             : {
     696           0 :         u32 __user *uaddr = restart->futex.uaddr;
     697           0 :         ktime_t t, *tp = NULL;
     698             : 
     699           0 :         if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
     700           0 :                 t = restart->futex.time;
     701           0 :                 tp = &t;
     702             :         }
     703           0 :         restart->fn = do_no_restart_syscall;
     704             : 
     705           0 :         return (long)futex_wait(uaddr, restart->futex.flags,
     706             :                                 restart->futex.val, tp, restart->futex.bitset);
     707             : }
     708             : 

Generated by: LCOV version 1.14