LCOV - code coverage report
Current view: top level - kernel/sched - membarrier.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 73 0.0 %
Date: 2023-07-19 18:55:55 Functions: 0 8 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2010-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
       4             :  *
       5             :  * membarrier system call
       6             :  */
       7             : 
       8             : /*
       9             :  * For documentation purposes, here are some membarrier ordering
      10             :  * scenarios to keep in mind:
      11             :  *
      12             :  * A) Userspace thread execution after IPI vs membarrier's memory
      13             :  *    barrier before sending the IPI
      14             :  *
      15             :  * Userspace variables:
      16             :  *
      17             :  * int x = 0, y = 0;
      18             :  *
      19             :  * The memory barrier at the start of membarrier() on CPU0 is necessary in
      20             :  * order to enforce the guarantee that any writes occurring on CPU0 before
      21             :  * the membarrier() is executed will be visible to any code executing on
      22             :  * CPU1 after the IPI-induced memory barrier:
      23             :  *
      24             :  *         CPU0                              CPU1
      25             :  *
      26             :  *         x = 1
      27             :  *         membarrier():
      28             :  *           a: smp_mb()
      29             :  *           b: send IPI                       IPI-induced mb
      30             :  *           c: smp_mb()
      31             :  *         r2 = y
      32             :  *                                           y = 1
      33             :  *                                           barrier()
      34             :  *                                           r1 = x
      35             :  *
      36             :  *                     BUG_ON(r1 == 0 && r2 == 0)
      37             :  *
      38             :  * The write to y and load from x by CPU1 are unordered by the hardware,
      39             :  * so it's possible to have "r1 = x" reordered before "y = 1" at any
      40             :  * point after (b).  If the memory barrier at (a) is omitted, then "x = 1"
      41             :  * can be reordered after (a) (although not after (c)), so we get r1 == 0
      42             :  * and r2 == 0.  This violates the guarantee that membarrier() is
      43             :  * supposed by provide.
      44             :  *
      45             :  * The timing of the memory barrier at (a) has to ensure that it executes
      46             :  * before the IPI-induced memory barrier on CPU1.
      47             :  *
      48             :  * B) Userspace thread execution before IPI vs membarrier's memory
      49             :  *    barrier after completing the IPI
      50             :  *
      51             :  * Userspace variables:
      52             :  *
      53             :  * int x = 0, y = 0;
      54             :  *
      55             :  * The memory barrier at the end of membarrier() on CPU0 is necessary in
      56             :  * order to enforce the guarantee that any writes occurring on CPU1 before
      57             :  * the membarrier() is executed will be visible to any code executing on
      58             :  * CPU0 after the membarrier():
      59             :  *
      60             :  *         CPU0                              CPU1
      61             :  *
      62             :  *                                           x = 1
      63             :  *                                           barrier()
      64             :  *                                           y = 1
      65             :  *         r2 = y
      66             :  *         membarrier():
      67             :  *           a: smp_mb()
      68             :  *           b: send IPI                       IPI-induced mb
      69             :  *           c: smp_mb()
      70             :  *         r1 = x
      71             :  *         BUG_ON(r1 == 0 && r2 == 1)
      72             :  *
      73             :  * The writes to x and y are unordered by the hardware, so it's possible to
      74             :  * have "r2 = 1" even though the write to x doesn't execute until (b).  If
      75             :  * the memory barrier at (c) is omitted then "r1 = x" can be reordered
      76             :  * before (b) (although not before (a)), so we get "r1 = 0".  This violates
      77             :  * the guarantee that membarrier() is supposed to provide.
      78             :  *
      79             :  * The timing of the memory barrier at (c) has to ensure that it executes
      80             :  * after the IPI-induced memory barrier on CPU1.
      81             :  *
      82             :  * C) Scheduling userspace thread -> kthread -> userspace thread vs membarrier
      83             :  *
      84             :  *           CPU0                            CPU1
      85             :  *
      86             :  *           membarrier():
      87             :  *           a: smp_mb()
      88             :  *                                           d: switch to kthread (includes mb)
      89             :  *           b: read rq->curr->mm == NULL
      90             :  *                                           e: switch to user (includes mb)
      91             :  *           c: smp_mb()
      92             :  *
      93             :  * Using the scenario from (A), we can show that (a) needs to be paired
      94             :  * with (e). Using the scenario from (B), we can show that (c) needs to
      95             :  * be paired with (d).
      96             :  *
      97             :  * D) exit_mm vs membarrier
      98             :  *
      99             :  * Two thread groups are created, A and B.  Thread group B is created by
     100             :  * issuing clone from group A with flag CLONE_VM set, but not CLONE_THREAD.
     101             :  * Let's assume we have a single thread within each thread group (Thread A
     102             :  * and Thread B).  Thread A runs on CPU0, Thread B runs on CPU1.
     103             :  *
     104             :  *           CPU0                            CPU1
     105             :  *
     106             :  *           membarrier():
     107             :  *             a: smp_mb()
     108             :  *                                           exit_mm():
     109             :  *                                             d: smp_mb()
     110             :  *                                             e: current->mm = NULL
     111             :  *             b: read rq->curr->mm == NULL
     112             :  *             c: smp_mb()
     113             :  *
     114             :  * Using scenario (B), we can show that (c) needs to be paired with (d).
     115             :  *
     116             :  * E) kthread_{use,unuse}_mm vs membarrier
     117             :  *
     118             :  *           CPU0                            CPU1
     119             :  *
     120             :  *           membarrier():
     121             :  *           a: smp_mb()
     122             :  *                                           kthread_unuse_mm()
     123             :  *                                             d: smp_mb()
     124             :  *                                             e: current->mm = NULL
     125             :  *           b: read rq->curr->mm == NULL
     126             :  *                                           kthread_use_mm()
     127             :  *                                             f: current->mm = mm
     128             :  *                                             g: smp_mb()
     129             :  *           c: smp_mb()
     130             :  *
     131             :  * Using the scenario from (A), we can show that (a) needs to be paired
     132             :  * with (g). Using the scenario from (B), we can show that (c) needs to
     133             :  * be paired with (d).
     134             :  */
     135             : 
     136             : /*
     137             :  * Bitmask made from a "or" of all commands within enum membarrier_cmd,
     138             :  * except MEMBARRIER_CMD_QUERY.
     139             :  */
     140             : #ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
     141             : #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK                  \
     142             :         (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE                     \
     143             :         | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
     144             : #else
     145             : #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK  0
     146             : #endif
     147             : 
     148             : #ifdef CONFIG_RSEQ
     149             : #define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK               \
     150             :         (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ                  \
     151             :         | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ)
     152             : #else
     153             : #define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK       0
     154             : #endif
     155             : 
     156             : #define MEMBARRIER_CMD_BITMASK                                          \
     157             :         (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED        \
     158             :         | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED                      \
     159             :         | MEMBARRIER_CMD_PRIVATE_EXPEDITED                              \
     160             :         | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED                     \
     161             :         | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK                \
     162             :         | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK                     \
     163             :         | MEMBARRIER_CMD_GET_REGISTRATIONS)
     164             : 
     165             : static void ipi_mb(void *info)
     166             : {
     167             :         smp_mb();       /* IPIs should be serializing but paranoid. */
     168             : }
     169             : 
     170             : static void ipi_sync_core(void *info)
     171             : {
     172             :         /*
     173             :          * The smp_mb() in membarrier after all the IPIs is supposed to
     174             :          * ensure that memory on remote CPUs that occur before the IPI
     175             :          * become visible to membarrier()'s caller -- see scenario B in
     176             :          * the big comment at the top of this file.
     177             :          *
     178             :          * A sync_core() would provide this guarantee, but
     179             :          * sync_core_before_usermode() might end up being deferred until
     180             :          * after membarrier()'s smp_mb().
     181             :          */
     182             :         smp_mb();       /* IPIs should be serializing but paranoid. */
     183             : 
     184             :         sync_core_before_usermode();
     185             : }
     186             : 
     187             : static void ipi_rseq(void *info)
     188             : {
     189             :         /*
     190             :          * Ensure that all stores done by the calling thread are visible
     191             :          * to the current task before the current task resumes.  We could
     192             :          * probably optimize this away on most architectures, but by the
     193             :          * time we've already sent an IPI, the cost of the extra smp_mb()
     194             :          * is negligible.
     195             :          */
     196             :         smp_mb();
     197             :         rseq_preempt(current);
     198             : }
     199             : 
     200             : static void ipi_sync_rq_state(void *info)
     201             : {
     202             :         struct mm_struct *mm = (struct mm_struct *) info;
     203             : 
     204             :         if (current->mm != mm)
     205             :                 return;
     206             :         this_cpu_write(runqueues.membarrier_state,
     207             :                        atomic_read(&mm->membarrier_state));
     208             :         /*
     209             :          * Issue a memory barrier after setting
     210             :          * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
     211             :          * guarantee that no memory access following registration is reordered
     212             :          * before registration.
     213             :          */
     214             :         smp_mb();
     215             : }
     216             : 
     217           0 : void membarrier_exec_mmap(struct mm_struct *mm)
     218             : {
     219             :         /*
     220             :          * Issue a memory barrier before clearing membarrier_state to
     221             :          * guarantee that no memory access prior to exec is reordered after
     222             :          * clearing this state.
     223             :          */
     224           0 :         smp_mb();
     225           0 :         atomic_set(&mm->membarrier_state, 0);
     226             :         /*
     227             :          * Keep the runqueue membarrier_state in sync with this mm
     228             :          * membarrier_state.
     229             :          */
     230           0 :         this_cpu_write(runqueues.membarrier_state, 0);
     231           0 : }
     232             : 
     233           0 : void membarrier_update_current_mm(struct mm_struct *next_mm)
     234             : {
     235           0 :         struct rq *rq = this_rq();
     236           0 :         int membarrier_state = 0;
     237             : 
     238           0 :         if (next_mm)
     239           0 :                 membarrier_state = atomic_read(&next_mm->membarrier_state);
     240           0 :         if (READ_ONCE(rq->membarrier_state) == membarrier_state)
     241             :                 return;
     242           0 :         WRITE_ONCE(rq->membarrier_state, membarrier_state);
     243             : }
     244             : 
     245             : static int membarrier_global_expedited(void)
     246             : {
     247             :         int cpu;
     248             :         cpumask_var_t tmpmask;
     249             : 
     250             :         if (num_online_cpus() == 1)
     251             :                 return 0;
     252             : 
     253             :         /*
     254             :          * Matches memory barriers around rq->curr modification in
     255             :          * scheduler.
     256             :          */
     257             :         smp_mb();       /* system call entry is not a mb. */
     258             : 
     259             :         if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
     260             :                 return -ENOMEM;
     261             : 
     262             :         cpus_read_lock();
     263             :         rcu_read_lock();
     264             :         for_each_online_cpu(cpu) {
     265             :                 struct task_struct *p;
     266             : 
     267             :                 /*
     268             :                  * Skipping the current CPU is OK even through we can be
     269             :                  * migrated at any point. The current CPU, at the point
     270             :                  * where we read raw_smp_processor_id(), is ensured to
     271             :                  * be in program order with respect to the caller
     272             :                  * thread. Therefore, we can skip this CPU from the
     273             :                  * iteration.
     274             :                  */
     275             :                 if (cpu == raw_smp_processor_id())
     276             :                         continue;
     277             : 
     278             :                 if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
     279             :                     MEMBARRIER_STATE_GLOBAL_EXPEDITED))
     280             :                         continue;
     281             : 
     282             :                 /*
     283             :                  * Skip the CPU if it runs a kernel thread which is not using
     284             :                  * a task mm.
     285             :                  */
     286             :                 p = rcu_dereference(cpu_rq(cpu)->curr);
     287             :                 if (!p->mm)
     288             :                         continue;
     289             : 
     290             :                 __cpumask_set_cpu(cpu, tmpmask);
     291             :         }
     292             :         rcu_read_unlock();
     293             : 
     294             :         preempt_disable();
     295             :         smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
     296             :         preempt_enable();
     297             : 
     298             :         free_cpumask_var(tmpmask);
     299             :         cpus_read_unlock();
     300             : 
     301             :         /*
     302             :          * Memory barrier on the caller thread _after_ we finished
     303             :          * waiting for the last IPI. Matches memory barriers around
     304             :          * rq->curr modification in scheduler.
     305             :          */
     306             :         smp_mb();       /* exit from system call is not a mb */
     307             :         return 0;
     308             : }
     309             : 
     310           0 : static int membarrier_private_expedited(int flags, int cpu_id)
     311             : {
     312             :         cpumask_var_t tmpmask;
     313           0 :         struct mm_struct *mm = current->mm;
     314           0 :         smp_call_func_t ipi_func = ipi_mb;
     315             : 
     316           0 :         if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
     317             :                 if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
     318             :                         return -EINVAL;
     319             :                 if (!(atomic_read(&mm->membarrier_state) &
     320             :                       MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
     321             :                         return -EPERM;
     322             :                 ipi_func = ipi_sync_core;
     323           0 :         } else if (flags == MEMBARRIER_FLAG_RSEQ) {
     324             :                 if (!IS_ENABLED(CONFIG_RSEQ))
     325             :                         return -EINVAL;
     326             :                 if (!(atomic_read(&mm->membarrier_state) &
     327             :                       MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY))
     328             :                         return -EPERM;
     329             :                 ipi_func = ipi_rseq;
     330             :         } else {
     331           0 :                 WARN_ON_ONCE(flags);
     332           0 :                 if (!(atomic_read(&mm->membarrier_state) &
     333             :                       MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
     334             :                         return -EPERM;
     335             :         }
     336             : 
     337             :         if (flags != MEMBARRIER_FLAG_SYNC_CORE &&
     338           0 :             (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1))
     339             :                 return 0;
     340             : 
     341             :         /*
     342             :          * Matches memory barriers around rq->curr modification in
     343             :          * scheduler.
     344             :          */
     345             :         smp_mb();       /* system call entry is not a mb. */
     346             : 
     347             :         if (cpu_id < 0 && !zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
     348             :                 return -ENOMEM;
     349             : 
     350             :         cpus_read_lock();
     351             : 
     352             :         if (cpu_id >= 0) {
     353             :                 struct task_struct *p;
     354             : 
     355             :                 if (cpu_id >= nr_cpu_ids || !cpu_online(cpu_id))
     356             :                         goto out;
     357             :                 rcu_read_lock();
     358             :                 p = rcu_dereference(cpu_rq(cpu_id)->curr);
     359             :                 if (!p || p->mm != mm) {
     360             :                         rcu_read_unlock();
     361             :                         goto out;
     362             :                 }
     363             :                 rcu_read_unlock();
     364             :         } else {
     365             :                 int cpu;
     366             : 
     367             :                 rcu_read_lock();
     368             :                 for_each_online_cpu(cpu) {
     369             :                         struct task_struct *p;
     370             : 
     371             :                         p = rcu_dereference(cpu_rq(cpu)->curr);
     372             :                         if (p && p->mm == mm)
     373             :                                 __cpumask_set_cpu(cpu, tmpmask);
     374             :                 }
     375             :                 rcu_read_unlock();
     376             :         }
     377             : 
     378             :         if (cpu_id >= 0) {
     379             :                 /*
     380             :                  * smp_call_function_single() will call ipi_func() if cpu_id
     381             :                  * is the calling CPU.
     382             :                  */
     383             :                 smp_call_function_single(cpu_id, ipi_func, NULL, 1);
     384             :         } else {
     385             :                 /*
     386             :                  * For regular membarrier, we can save a few cycles by
     387             :                  * skipping the current cpu -- we're about to do smp_mb()
     388             :                  * below, and if we migrate to a different cpu, this cpu
     389             :                  * and the new cpu will execute a full barrier in the
     390             :                  * scheduler.
     391             :                  *
     392             :                  * For SYNC_CORE, we do need a barrier on the current cpu --
     393             :                  * otherwise, if we are migrated and replaced by a different
     394             :                  * task in the same mm just before, during, or after
     395             :                  * membarrier, we will end up with some thread in the mm
     396             :                  * running without a core sync.
     397             :                  *
     398             :                  * For RSEQ, don't rseq_preempt() the caller.  User code
     399             :                  * is not supposed to issue syscalls at all from inside an
     400             :                  * rseq critical section.
     401             :                  */
     402             :                 if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
     403             :                         preempt_disable();
     404             :                         smp_call_function_many(tmpmask, ipi_func, NULL, true);
     405             :                         preempt_enable();
     406             :                 } else {
     407             :                         on_each_cpu_mask(tmpmask, ipi_func, NULL, true);
     408             :                 }
     409             :         }
     410             : 
     411             : out:
     412             :         if (cpu_id < 0)
     413             :                 free_cpumask_var(tmpmask);
     414             :         cpus_read_unlock();
     415             : 
     416             :         /*
     417             :          * Memory barrier on the caller thread _after_ we finished
     418             :          * waiting for the last IPI. Matches memory barriers around
     419             :          * rq->curr modification in scheduler.
     420             :          */
     421             :         smp_mb();       /* exit from system call is not a mb */
     422             : 
     423             :         return 0;
     424             : }
     425             : 
     426             : static int sync_runqueues_membarrier_state(struct mm_struct *mm)
     427             : {
     428           0 :         int membarrier_state = atomic_read(&mm->membarrier_state);
     429             :         cpumask_var_t tmpmask;
     430             :         int cpu;
     431             : 
     432           0 :         if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
     433           0 :                 this_cpu_write(runqueues.membarrier_state, membarrier_state);
     434             : 
     435             :                 /*
     436             :                  * For single mm user, we can simply issue a memory barrier
     437             :                  * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
     438             :                  * mm and in the current runqueue to guarantee that no memory
     439             :                  * access following registration is reordered before
     440             :                  * registration.
     441             :                  */
     442           0 :                 smp_mb();
     443             :                 return 0;
     444             :         }
     445             : 
     446             :         if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
     447             :                 return -ENOMEM;
     448             : 
     449             :         /*
     450             :          * For mm with multiple users, we need to ensure all future
     451             :          * scheduler executions will observe @mm's new membarrier
     452             :          * state.
     453             :          */
     454             :         synchronize_rcu();
     455             : 
     456             :         /*
     457             :          * For each cpu runqueue, if the task's mm match @mm, ensure that all
     458             :          * @mm's membarrier state set bits are also set in the runqueue's
     459             :          * membarrier state. This ensures that a runqueue scheduling
     460             :          * between threads which are users of @mm has its membarrier state
     461             :          * updated.
     462             :          */
     463             :         cpus_read_lock();
     464             :         rcu_read_lock();
     465             :         for_each_online_cpu(cpu) {
     466             :                 struct rq *rq = cpu_rq(cpu);
     467             :                 struct task_struct *p;
     468             : 
     469             :                 p = rcu_dereference(rq->curr);
     470             :                 if (p && p->mm == mm)
     471             :                         __cpumask_set_cpu(cpu, tmpmask);
     472             :         }
     473             :         rcu_read_unlock();
     474             : 
     475             :         on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
     476             : 
     477             :         free_cpumask_var(tmpmask);
     478             :         cpus_read_unlock();
     479             : 
     480             :         return 0;
     481             : }
     482             : 
     483           0 : static int membarrier_register_global_expedited(void)
     484             : {
     485           0 :         struct task_struct *p = current;
     486           0 :         struct mm_struct *mm = p->mm;
     487             :         int ret;
     488             : 
     489           0 :         if (atomic_read(&mm->membarrier_state) &
     490             :             MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
     491             :                 return 0;
     492           0 :         atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
     493           0 :         ret = sync_runqueues_membarrier_state(mm);
     494             :         if (ret)
     495             :                 return ret;
     496           0 :         atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
     497             :                   &mm->membarrier_state);
     498             : 
     499           0 :         return 0;
     500             : }
     501             : 
     502           0 : static int membarrier_register_private_expedited(int flags)
     503             : {
     504           0 :         struct task_struct *p = current;
     505           0 :         struct mm_struct *mm = p->mm;
     506           0 :         int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
     507           0 :             set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
     508             :             ret;
     509             : 
     510           0 :         if (flags == MEMBARRIER_FLAG_SYNC_CORE) {
     511             :                 if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
     512             :                         return -EINVAL;
     513             :                 ready_state =
     514             :                         MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
     515           0 :         } else if (flags == MEMBARRIER_FLAG_RSEQ) {
     516             :                 if (!IS_ENABLED(CONFIG_RSEQ))
     517             :                         return -EINVAL;
     518             :                 ready_state =
     519             :                         MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY;
     520             :         } else {
     521           0 :                 WARN_ON_ONCE(flags);
     522             :         }
     523             : 
     524             :         /*
     525             :          * We need to consider threads belonging to different thread
     526             :          * groups, which use the same mm. (CLONE_VM but not
     527             :          * CLONE_THREAD).
     528             :          */
     529           0 :         if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
     530             :                 return 0;
     531           0 :         if (flags & MEMBARRIER_FLAG_SYNC_CORE)
     532           0 :                 set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
     533           0 :         if (flags & MEMBARRIER_FLAG_RSEQ)
     534           0 :                 set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ;
     535           0 :         atomic_or(set_state, &mm->membarrier_state);
     536           0 :         ret = sync_runqueues_membarrier_state(mm);
     537             :         if (ret)
     538             :                 return ret;
     539           0 :         atomic_or(ready_state, &mm->membarrier_state);
     540             : 
     541           0 :         return 0;
     542             : }
     543             : 
     544           0 : static int membarrier_get_registrations(void)
     545             : {
     546           0 :         struct task_struct *p = current;
     547           0 :         struct mm_struct *mm = p->mm;
     548           0 :         int registrations_mask = 0, membarrier_state, i;
     549             :         static const int states[] = {
     550             :                 MEMBARRIER_STATE_GLOBAL_EXPEDITED |
     551             :                         MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
     552             :                 MEMBARRIER_STATE_PRIVATE_EXPEDITED |
     553             :                         MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
     554             :                 MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE |
     555             :                         MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY,
     556             :                 MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ |
     557             :                         MEMBARRIER_STATE_PRIVATE_EXPEDITED_RSEQ_READY
     558             :         };
     559             :         static const int registration_cmds[] = {
     560             :                 MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED,
     561             :                 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED,
     562             :                 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE,
     563             :                 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ
     564             :         };
     565             :         BUILD_BUG_ON(ARRAY_SIZE(states) != ARRAY_SIZE(registration_cmds));
     566             : 
     567           0 :         membarrier_state = atomic_read(&mm->membarrier_state);
     568           0 :         for (i = 0; i < ARRAY_SIZE(states); ++i) {
     569           0 :                 if (membarrier_state & states[i]) {
     570           0 :                         registrations_mask |= registration_cmds[i];
     571           0 :                         membarrier_state &= ~states[i];
     572             :                 }
     573             :         }
     574           0 :         WARN_ON_ONCE(membarrier_state != 0);
     575           0 :         return registrations_mask;
     576             : }
     577             : 
     578             : /**
     579             :  * sys_membarrier - issue memory barriers on a set of threads
     580             :  * @cmd:    Takes command values defined in enum membarrier_cmd.
     581             :  * @flags:  Currently needs to be 0 for all commands other than
     582             :  *          MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ: in the latter
     583             :  *          case it can be MEMBARRIER_CMD_FLAG_CPU, indicating that @cpu_id
     584             :  *          contains the CPU on which to interrupt (= restart)
     585             :  *          the RSEQ critical section.
     586             :  * @cpu_id: if @flags == MEMBARRIER_CMD_FLAG_CPU, indicates the cpu on which
     587             :  *          RSEQ CS should be interrupted (@cmd must be
     588             :  *          MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ).
     589             :  *
     590             :  * If this system call is not implemented, -ENOSYS is returned. If the
     591             :  * command specified does not exist, not available on the running
     592             :  * kernel, or if the command argument is invalid, this system call
     593             :  * returns -EINVAL. For a given command, with flags argument set to 0,
     594             :  * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
     595             :  * always return the same value until reboot. In addition, it can return
     596             :  * -ENOMEM if there is not enough memory available to perform the system
     597             :  * call.
     598             :  *
     599             :  * All memory accesses performed in program order from each targeted thread
     600             :  * is guaranteed to be ordered with respect to sys_membarrier(). If we use
     601             :  * the semantic "barrier()" to represent a compiler barrier forcing memory
     602             :  * accesses to be performed in program order across the barrier, and
     603             :  * smp_mb() to represent explicit memory barriers forcing full memory
     604             :  * ordering across the barrier, we have the following ordering table for
     605             :  * each pair of barrier(), sys_membarrier() and smp_mb():
     606             :  *
     607             :  * The pair ordering is detailed as (O: ordered, X: not ordered):
     608             :  *
     609             :  *                        barrier()   smp_mb() sys_membarrier()
     610             :  *        barrier()          X           X            O
     611             :  *        smp_mb()           X           O            O
     612             :  *        sys_membarrier()   O           O            O
     613             :  */
     614           0 : SYSCALL_DEFINE3(membarrier, int, cmd, unsigned int, flags, int, cpu_id)
     615             : {
     616           0 :         switch (cmd) {
     617             :         case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
     618           0 :                 if (unlikely(flags && flags != MEMBARRIER_CMD_FLAG_CPU))
     619             :                         return -EINVAL;
     620             :                 break;
     621             :         default:
     622           0 :                 if (unlikely(flags))
     623             :                         return -EINVAL;
     624             :         }
     625             : 
     626             :         if (!(flags & MEMBARRIER_CMD_FLAG_CPU))
     627             :                 cpu_id = -1;
     628             : 
     629           0 :         switch (cmd) {
     630             :         case MEMBARRIER_CMD_QUERY:
     631             :         {
     632             :                 int cmd_mask = MEMBARRIER_CMD_BITMASK;
     633             : 
     634             :                 if (tick_nohz_full_enabled())
     635             :                         cmd_mask &= ~MEMBARRIER_CMD_GLOBAL;
     636             :                 return cmd_mask;
     637             :         }
     638             :         case MEMBARRIER_CMD_GLOBAL:
     639             :                 /* MEMBARRIER_CMD_GLOBAL is not compatible with nohz_full. */
     640             :                 if (tick_nohz_full_enabled())
     641             :                         return -EINVAL;
     642             :                 if (num_online_cpus() > 1)
     643             :                         synchronize_rcu();
     644             :                 return 0;
     645             :         case MEMBARRIER_CMD_GLOBAL_EXPEDITED:
     646             :                 return membarrier_global_expedited();
     647             :         case MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED:
     648           0 :                 return membarrier_register_global_expedited();
     649             :         case MEMBARRIER_CMD_PRIVATE_EXPEDITED:
     650           0 :                 return membarrier_private_expedited(0, cpu_id);
     651             :         case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED:
     652           0 :                 return membarrier_register_private_expedited(0);
     653             :         case MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE:
     654           0 :                 return membarrier_private_expedited(MEMBARRIER_FLAG_SYNC_CORE, cpu_id);
     655             :         case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE:
     656           0 :                 return membarrier_register_private_expedited(MEMBARRIER_FLAG_SYNC_CORE);
     657             :         case MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ:
     658           0 :                 return membarrier_private_expedited(MEMBARRIER_FLAG_RSEQ, cpu_id);
     659             :         case MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ:
     660           0 :                 return membarrier_register_private_expedited(MEMBARRIER_FLAG_RSEQ);
     661             :         case MEMBARRIER_CMD_GET_REGISTRATIONS:
     662           0 :                 return membarrier_get_registrations();
     663             :         default:
     664             :                 return -EINVAL;
     665             :         }
     666             : }

Generated by: LCOV version 1.14