LCOV - code coverage report
Current view: top level - io_uring - sqpoll.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 221 0.0 %
Date: 2023-08-24 13:40:31 Functions: 0 13 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Contains the core associated with submission side polling of the SQ
       4             :  * ring, offloading submissions from the application to a kernel thread.
       5             :  */
       6             : #include <linux/kernel.h>
       7             : #include <linux/errno.h>
       8             : #include <linux/file.h>
       9             : #include <linux/mm.h>
      10             : #include <linux/slab.h>
      11             : #include <linux/audit.h>
      12             : #include <linux/security.h>
      13             : #include <linux/io_uring.h>
      14             : 
      15             : #include <uapi/linux/io_uring.h>
      16             : 
      17             : #include "io_uring.h"
      18             : #include "sqpoll.h"
      19             : 
      20             : #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
      21             : 
      22             : enum {
      23             :         IO_SQ_THREAD_SHOULD_STOP = 0,
      24             :         IO_SQ_THREAD_SHOULD_PARK,
      25             : };
      26             : 
      27           0 : void io_sq_thread_unpark(struct io_sq_data *sqd)
      28             :         __releases(&sqd->lock)
      29             : {
      30           0 :         WARN_ON_ONCE(sqd->thread == current);
      31             : 
      32             :         /*
      33             :          * Do the dance but not conditional clear_bit() because it'd race with
      34             :          * other threads incrementing park_pending and setting the bit.
      35             :          */
      36           0 :         clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
      37           0 :         if (atomic_dec_return(&sqd->park_pending))
      38           0 :                 set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
      39           0 :         mutex_unlock(&sqd->lock);
      40           0 : }
      41             : 
      42           0 : void io_sq_thread_park(struct io_sq_data *sqd)
      43             :         __acquires(&sqd->lock)
      44             : {
      45           0 :         WARN_ON_ONCE(sqd->thread == current);
      46             : 
      47           0 :         atomic_inc(&sqd->park_pending);
      48           0 :         set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
      49           0 :         mutex_lock(&sqd->lock);
      50           0 :         if (sqd->thread)
      51           0 :                 wake_up_process(sqd->thread);
      52           0 : }
      53             : 
      54           0 : void io_sq_thread_stop(struct io_sq_data *sqd)
      55             : {
      56           0 :         WARN_ON_ONCE(sqd->thread == current);
      57           0 :         WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
      58             : 
      59           0 :         set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
      60           0 :         mutex_lock(&sqd->lock);
      61           0 :         if (sqd->thread)
      62           0 :                 wake_up_process(sqd->thread);
      63           0 :         mutex_unlock(&sqd->lock);
      64           0 :         wait_for_completion(&sqd->exited);
      65           0 : }
      66             : 
      67           0 : void io_put_sq_data(struct io_sq_data *sqd)
      68             : {
      69           0 :         if (refcount_dec_and_test(&sqd->refs)) {
      70           0 :                 WARN_ON_ONCE(atomic_read(&sqd->park_pending));
      71             : 
      72           0 :                 io_sq_thread_stop(sqd);
      73           0 :                 kfree(sqd);
      74             :         }
      75           0 : }
      76             : 
      77           0 : static __cold void io_sqd_update_thread_idle(struct io_sq_data *sqd)
      78             : {
      79             :         struct io_ring_ctx *ctx;
      80           0 :         unsigned sq_thread_idle = 0;
      81             : 
      82           0 :         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
      83           0 :                 sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle);
      84           0 :         sqd->sq_thread_idle = sq_thread_idle;
      85           0 : }
      86             : 
      87           0 : void io_sq_thread_finish(struct io_ring_ctx *ctx)
      88             : {
      89           0 :         struct io_sq_data *sqd = ctx->sq_data;
      90             : 
      91           0 :         if (sqd) {
      92           0 :                 io_sq_thread_park(sqd);
      93           0 :                 list_del_init(&ctx->sqd_list);
      94           0 :                 io_sqd_update_thread_idle(sqd);
      95           0 :                 io_sq_thread_unpark(sqd);
      96             : 
      97           0 :                 io_put_sq_data(sqd);
      98           0 :                 ctx->sq_data = NULL;
      99             :         }
     100           0 : }
     101             : 
     102           0 : static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
     103             : {
     104             :         struct io_ring_ctx *ctx_attach;
     105             :         struct io_sq_data *sqd;
     106             :         struct fd f;
     107             : 
     108           0 :         f = fdget(p->wq_fd);
     109           0 :         if (!f.file)
     110             :                 return ERR_PTR(-ENXIO);
     111           0 :         if (!io_is_uring_fops(f.file)) {
     112           0 :                 fdput(f);
     113             :                 return ERR_PTR(-EINVAL);
     114             :         }
     115             : 
     116           0 :         ctx_attach = f.file->private_data;
     117           0 :         sqd = ctx_attach->sq_data;
     118           0 :         if (!sqd) {
     119           0 :                 fdput(f);
     120             :                 return ERR_PTR(-EINVAL);
     121             :         }
     122           0 :         if (sqd->task_tgid != current->tgid) {
     123           0 :                 fdput(f);
     124             :                 return ERR_PTR(-EPERM);
     125             :         }
     126             : 
     127           0 :         refcount_inc(&sqd->refs);
     128           0 :         fdput(f);
     129             :         return sqd;
     130             : }
     131             : 
     132           0 : static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
     133             :                                          bool *attached)
     134             : {
     135             :         struct io_sq_data *sqd;
     136             : 
     137           0 :         *attached = false;
     138           0 :         if (p->flags & IORING_SETUP_ATTACH_WQ) {
     139           0 :                 sqd = io_attach_sq_data(p);
     140           0 :                 if (!IS_ERR(sqd)) {
     141           0 :                         *attached = true;
     142           0 :                         return sqd;
     143             :                 }
     144             :                 /* fall through for EPERM case, setup new sqd/task */
     145           0 :                 if (PTR_ERR(sqd) != -EPERM)
     146             :                         return sqd;
     147             :         }
     148             : 
     149           0 :         sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
     150           0 :         if (!sqd)
     151             :                 return ERR_PTR(-ENOMEM);
     152             : 
     153           0 :         atomic_set(&sqd->park_pending, 0);
     154           0 :         refcount_set(&sqd->refs, 1);
     155           0 :         INIT_LIST_HEAD(&sqd->ctx_list);
     156           0 :         mutex_init(&sqd->lock);
     157           0 :         init_waitqueue_head(&sqd->wait);
     158           0 :         init_completion(&sqd->exited);
     159           0 :         return sqd;
     160             : }
     161             : 
     162             : static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
     163             : {
     164           0 :         return READ_ONCE(sqd->state);
     165             : }
     166             : 
     167           0 : static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
     168             : {
     169             :         unsigned int to_submit;
     170           0 :         int ret = 0;
     171             : 
     172           0 :         to_submit = io_sqring_entries(ctx);
     173             :         /* if we're handling multiple rings, cap submit size for fairness */
     174           0 :         if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE)
     175           0 :                 to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE;
     176             : 
     177           0 :         if (!wq_list_empty(&ctx->iopoll_list) || to_submit) {
     178           0 :                 const struct cred *creds = NULL;
     179             : 
     180           0 :                 if (ctx->sq_creds != current_cred())
     181           0 :                         creds = override_creds(ctx->sq_creds);
     182             : 
     183           0 :                 mutex_lock(&ctx->uring_lock);
     184           0 :                 if (!wq_list_empty(&ctx->iopoll_list))
     185           0 :                         io_do_iopoll(ctx, true);
     186             : 
     187             :                 /*
     188             :                  * Don't submit if refs are dying, good for io_uring_register(),
     189             :                  * but also it is relied upon by io_ring_exit_work()
     190             :                  */
     191           0 :                 if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) &&
     192           0 :                     !(ctx->flags & IORING_SETUP_R_DISABLED))
     193           0 :                         ret = io_submit_sqes(ctx, to_submit);
     194           0 :                 mutex_unlock(&ctx->uring_lock);
     195             : 
     196           0 :                 if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
     197           0 :                         wake_up(&ctx->sqo_sq_wait);
     198           0 :                 if (creds)
     199           0 :                         revert_creds(creds);
     200             :         }
     201             : 
     202           0 :         return ret;
     203             : }
     204             : 
     205           0 : static bool io_sqd_handle_event(struct io_sq_data *sqd)
     206             : {
     207           0 :         bool did_sig = false;
     208             :         struct ksignal ksig;
     209             : 
     210           0 :         if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
     211           0 :             signal_pending(current)) {
     212           0 :                 mutex_unlock(&sqd->lock);
     213           0 :                 if (signal_pending(current))
     214           0 :                         did_sig = get_signal(&ksig);
     215           0 :                 cond_resched();
     216           0 :                 mutex_lock(&sqd->lock);
     217             :         }
     218           0 :         return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
     219             : }
     220             : 
     221           0 : static int io_sq_thread(void *data)
     222             : {
     223           0 :         struct io_sq_data *sqd = data;
     224             :         struct io_ring_ctx *ctx;
     225           0 :         unsigned long timeout = 0;
     226             :         char buf[TASK_COMM_LEN];
     227           0 :         DEFINE_WAIT(wait);
     228             : 
     229           0 :         snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
     230           0 :         set_task_comm(current, buf);
     231             : 
     232           0 :         if (sqd->sq_cpu != -1)
     233           0 :                 set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
     234             :         else
     235           0 :                 set_cpus_allowed_ptr(current, cpu_online_mask);
     236             : 
     237           0 :         mutex_lock(&sqd->lock);
     238             :         while (1) {
     239           0 :                 bool cap_entries, sqt_spin = false;
     240             : 
     241           0 :                 if (io_sqd_events_pending(sqd) || signal_pending(current)) {
     242           0 :                         if (io_sqd_handle_event(sqd))
     243             :                                 break;
     244           0 :                         timeout = jiffies + sqd->sq_thread_idle;
     245             :                 }
     246             : 
     247           0 :                 cap_entries = !list_is_singular(&sqd->ctx_list);
     248           0 :                 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
     249           0 :                         int ret = __io_sq_thread(ctx, cap_entries);
     250             : 
     251           0 :                         if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
     252           0 :                                 sqt_spin = true;
     253             :                 }
     254           0 :                 if (io_run_task_work())
     255           0 :                         sqt_spin = true;
     256             : 
     257           0 :                 if (sqt_spin || !time_after(jiffies, timeout)) {
     258           0 :                         if (sqt_spin)
     259           0 :                                 timeout = jiffies + sqd->sq_thread_idle;
     260           0 :                         if (unlikely(need_resched())) {
     261           0 :                                 mutex_unlock(&sqd->lock);
     262           0 :                                 cond_resched();
     263           0 :                                 mutex_lock(&sqd->lock);
     264             :                         }
     265           0 :                         continue;
     266             :                 }
     267             : 
     268           0 :                 prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
     269           0 :                 if (!io_sqd_events_pending(sqd) && !task_work_pending(current)) {
     270           0 :                         bool needs_sched = true;
     271             : 
     272           0 :                         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
     273           0 :                                 atomic_or(IORING_SQ_NEED_WAKEUP,
     274           0 :                                                 &ctx->rings->sq_flags);
     275           0 :                                 if ((ctx->flags & IORING_SETUP_IOPOLL) &&
     276           0 :                                     !wq_list_empty(&ctx->iopoll_list)) {
     277             :                                         needs_sched = false;
     278             :                                         break;
     279             :                                 }
     280             : 
     281             :                                 /*
     282             :                                  * Ensure the store of the wakeup flag is not
     283             :                                  * reordered with the load of the SQ tail
     284             :                                  */
     285           0 :                                 smp_mb__after_atomic();
     286             : 
     287           0 :                                 if (io_sqring_entries(ctx)) {
     288             :                                         needs_sched = false;
     289             :                                         break;
     290             :                                 }
     291             :                         }
     292             : 
     293           0 :                         if (needs_sched) {
     294           0 :                                 mutex_unlock(&sqd->lock);
     295           0 :                                 schedule();
     296           0 :                                 mutex_lock(&sqd->lock);
     297             :                         }
     298           0 :                         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
     299           0 :                                 atomic_andnot(IORING_SQ_NEED_WAKEUP,
     300           0 :                                                 &ctx->rings->sq_flags);
     301             :                 }
     302             : 
     303           0 :                 finish_wait(&sqd->wait, &wait);
     304           0 :                 timeout = jiffies + sqd->sq_thread_idle;
     305             :         }
     306             : 
     307           0 :         io_uring_cancel_generic(true, sqd);
     308           0 :         sqd->thread = NULL;
     309           0 :         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
     310           0 :                 atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
     311           0 :         io_run_task_work();
     312           0 :         mutex_unlock(&sqd->lock);
     313             : 
     314           0 :         complete(&sqd->exited);
     315           0 :         do_exit(0);
     316             : }
     317             : 
     318           0 : void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
     319             : {
     320           0 :         DEFINE_WAIT(wait);
     321             : 
     322             :         do {
     323           0 :                 if (!io_sqring_full(ctx))
     324             :                         break;
     325           0 :                 prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
     326             : 
     327           0 :                 if (!io_sqring_full(ctx))
     328             :                         break;
     329           0 :                 schedule();
     330           0 :         } while (!signal_pending(current));
     331             : 
     332           0 :         finish_wait(&ctx->sqo_sq_wait, &wait);
     333           0 : }
     334             : 
     335           0 : __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
     336             :                                 struct io_uring_params *p)
     337             : {
     338             :         int ret;
     339             : 
     340             :         /* Retain compatibility with failing for an invalid attach attempt */
     341           0 :         if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) ==
     342             :                                 IORING_SETUP_ATTACH_WQ) {
     343             :                 struct fd f;
     344             : 
     345           0 :                 f = fdget(p->wq_fd);
     346           0 :                 if (!f.file)
     347           0 :                         return -ENXIO;
     348           0 :                 if (!io_is_uring_fops(f.file)) {
     349           0 :                         fdput(f);
     350             :                         return -EINVAL;
     351             :                 }
     352           0 :                 fdput(f);
     353             :         }
     354           0 :         if (ctx->flags & IORING_SETUP_SQPOLL) {
     355             :                 struct task_struct *tsk;
     356             :                 struct io_sq_data *sqd;
     357             :                 bool attached;
     358             : 
     359           0 :                 ret = security_uring_sqpoll();
     360             :                 if (ret)
     361           0 :                         return ret;
     362             : 
     363           0 :                 sqd = io_get_sq_data(p, &attached);
     364           0 :                 if (IS_ERR(sqd)) {
     365           0 :                         ret = PTR_ERR(sqd);
     366           0 :                         goto err;
     367             :                 }
     368             : 
     369           0 :                 ctx->sq_creds = get_current_cred();
     370           0 :                 ctx->sq_data = sqd;
     371           0 :                 ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
     372           0 :                 if (!ctx->sq_thread_idle)
     373           0 :                         ctx->sq_thread_idle = HZ;
     374             : 
     375           0 :                 io_sq_thread_park(sqd);
     376           0 :                 list_add(&ctx->sqd_list, &sqd->ctx_list);
     377           0 :                 io_sqd_update_thread_idle(sqd);
     378             :                 /* don't attach to a dying SQPOLL thread, would be racy */
     379           0 :                 ret = (attached && !sqd->thread) ? -ENXIO : 0;
     380           0 :                 io_sq_thread_unpark(sqd);
     381             : 
     382           0 :                 if (ret < 0)
     383             :                         goto err;
     384           0 :                 if (attached)
     385             :                         return 0;
     386             : 
     387           0 :                 if (p->flags & IORING_SETUP_SQ_AFF) {
     388           0 :                         int cpu = p->sq_thread_cpu;
     389             : 
     390           0 :                         ret = -EINVAL;
     391           0 :                         if (cpu >= nr_cpu_ids || !cpu_online(cpu))
     392             :                                 goto err_sqpoll;
     393           0 :                         sqd->sq_cpu = cpu;
     394             :                 } else {
     395           0 :                         sqd->sq_cpu = -1;
     396             :                 }
     397             : 
     398           0 :                 sqd->task_pid = current->pid;
     399           0 :                 sqd->task_tgid = current->tgid;
     400           0 :                 tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
     401           0 :                 if (IS_ERR(tsk)) {
     402           0 :                         ret = PTR_ERR(tsk);
     403           0 :                         goto err_sqpoll;
     404             :                 }
     405             : 
     406           0 :                 sqd->thread = tsk;
     407           0 :                 ret = io_uring_alloc_task_context(tsk, ctx);
     408           0 :                 wake_up_new_task(tsk);
     409           0 :                 if (ret)
     410             :                         goto err;
     411           0 :         } else if (p->flags & IORING_SETUP_SQ_AFF) {
     412             :                 /* Can't have SQ_AFF without SQPOLL */
     413             :                 ret = -EINVAL;
     414             :                 goto err;
     415             :         }
     416             : 
     417             :         return 0;
     418             : err_sqpoll:
     419           0 :         complete(&ctx->sq_data->exited);
     420             : err:
     421           0 :         io_sq_thread_finish(ctx);
     422           0 :         return ret;
     423             : }

Generated by: LCOV version 1.14