LCOV - code coverage report
Current view: top level - io_uring - sqpoll.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 219 0.0 %
Date: 2023-03-27 20:00:47 Functions: 0 13 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Contains the core associated with submission side polling of the SQ
       4             :  * ring, offloading submissions from the application to a kernel thread.
       5             :  */
       6             : #include <linux/kernel.h>
       7             : #include <linux/errno.h>
       8             : #include <linux/file.h>
       9             : #include <linux/mm.h>
      10             : #include <linux/slab.h>
      11             : #include <linux/audit.h>
      12             : #include <linux/security.h>
      13             : #include <linux/io_uring.h>
      14             : 
      15             : #include <uapi/linux/io_uring.h>
      16             : 
      17             : #include "io_uring.h"
      18             : #include "sqpoll.h"
      19             : 
      20             : #define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
      21             : 
      22             : enum {
      23             :         IO_SQ_THREAD_SHOULD_STOP = 0,
      24             :         IO_SQ_THREAD_SHOULD_PARK,
      25             : };
      26             : 
      27           0 : void io_sq_thread_unpark(struct io_sq_data *sqd)
      28             :         __releases(&sqd->lock)
      29             : {
      30           0 :         WARN_ON_ONCE(sqd->thread == current);
      31             : 
      32             :         /*
      33             :          * Do the dance but not conditional clear_bit() because it'd race with
      34             :          * other threads incrementing park_pending and setting the bit.
      35             :          */
      36           0 :         clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
      37           0 :         if (atomic_dec_return(&sqd->park_pending))
      38           0 :                 set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
      39           0 :         mutex_unlock(&sqd->lock);
      40           0 : }
      41             : 
      42           0 : void io_sq_thread_park(struct io_sq_data *sqd)
      43             :         __acquires(&sqd->lock)
      44             : {
      45           0 :         WARN_ON_ONCE(sqd->thread == current);
      46             : 
      47           0 :         atomic_inc(&sqd->park_pending);
      48           0 :         set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
      49           0 :         mutex_lock(&sqd->lock);
      50           0 :         if (sqd->thread)
      51           0 :                 wake_up_process(sqd->thread);
      52           0 : }
      53             : 
      54           0 : void io_sq_thread_stop(struct io_sq_data *sqd)
      55             : {
      56           0 :         WARN_ON_ONCE(sqd->thread == current);
      57           0 :         WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state));
      58             : 
      59           0 :         set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
      60           0 :         mutex_lock(&sqd->lock);
      61           0 :         if (sqd->thread)
      62           0 :                 wake_up_process(sqd->thread);
      63           0 :         mutex_unlock(&sqd->lock);
      64           0 :         wait_for_completion(&sqd->exited);
      65           0 : }
      66             : 
      67           0 : void io_put_sq_data(struct io_sq_data *sqd)
      68             : {
      69           0 :         if (refcount_dec_and_test(&sqd->refs)) {
      70           0 :                 WARN_ON_ONCE(atomic_read(&sqd->park_pending));
      71             : 
      72           0 :                 io_sq_thread_stop(sqd);
      73           0 :                 kfree(sqd);
      74             :         }
      75           0 : }
      76             : 
      77           0 : static __cold void io_sqd_update_thread_idle(struct io_sq_data *sqd)
      78             : {
      79             :         struct io_ring_ctx *ctx;
      80           0 :         unsigned sq_thread_idle = 0;
      81             : 
      82           0 :         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
      83           0 :                 sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle);
      84           0 :         sqd->sq_thread_idle = sq_thread_idle;
      85           0 : }
      86             : 
      87           0 : void io_sq_thread_finish(struct io_ring_ctx *ctx)
      88             : {
      89           0 :         struct io_sq_data *sqd = ctx->sq_data;
      90             : 
      91           0 :         if (sqd) {
      92           0 :                 io_sq_thread_park(sqd);
      93           0 :                 list_del_init(&ctx->sqd_list);
      94           0 :                 io_sqd_update_thread_idle(sqd);
      95           0 :                 io_sq_thread_unpark(sqd);
      96             : 
      97           0 :                 io_put_sq_data(sqd);
      98           0 :                 ctx->sq_data = NULL;
      99             :         }
     100           0 : }
     101             : 
     102           0 : static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
     103             : {
     104             :         struct io_ring_ctx *ctx_attach;
     105             :         struct io_sq_data *sqd;
     106             :         struct fd f;
     107             : 
     108           0 :         f = fdget(p->wq_fd);
     109           0 :         if (!f.file)
     110             :                 return ERR_PTR(-ENXIO);
     111           0 :         if (!io_is_uring_fops(f.file)) {
     112           0 :                 fdput(f);
     113             :                 return ERR_PTR(-EINVAL);
     114             :         }
     115             : 
     116           0 :         ctx_attach = f.file->private_data;
     117           0 :         sqd = ctx_attach->sq_data;
     118           0 :         if (!sqd) {
     119           0 :                 fdput(f);
     120             :                 return ERR_PTR(-EINVAL);
     121             :         }
     122           0 :         if (sqd->task_tgid != current->tgid) {
     123           0 :                 fdput(f);
     124             :                 return ERR_PTR(-EPERM);
     125             :         }
     126             : 
     127           0 :         refcount_inc(&sqd->refs);
     128           0 :         fdput(f);
     129             :         return sqd;
     130             : }
     131             : 
     132           0 : static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
     133             :                                          bool *attached)
     134             : {
     135             :         struct io_sq_data *sqd;
     136             : 
     137           0 :         *attached = false;
     138           0 :         if (p->flags & IORING_SETUP_ATTACH_WQ) {
     139           0 :                 sqd = io_attach_sq_data(p);
     140           0 :                 if (!IS_ERR(sqd)) {
     141           0 :                         *attached = true;
     142           0 :                         return sqd;
     143             :                 }
     144             :                 /* fall through for EPERM case, setup new sqd/task */
     145           0 :                 if (PTR_ERR(sqd) != -EPERM)
     146             :                         return sqd;
     147             :         }
     148             : 
     149           0 :         sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
     150           0 :         if (!sqd)
     151             :                 return ERR_PTR(-ENOMEM);
     152             : 
     153           0 :         atomic_set(&sqd->park_pending, 0);
     154           0 :         refcount_set(&sqd->refs, 1);
     155           0 :         INIT_LIST_HEAD(&sqd->ctx_list);
     156           0 :         mutex_init(&sqd->lock);
     157           0 :         init_waitqueue_head(&sqd->wait);
     158           0 :         init_completion(&sqd->exited);
     159           0 :         return sqd;
     160             : }
     161             : 
     162             : static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
     163             : {
     164           0 :         return READ_ONCE(sqd->state);
     165             : }
     166             : 
     167           0 : static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
     168             : {
     169             :         unsigned int to_submit;
     170           0 :         int ret = 0;
     171             : 
     172           0 :         to_submit = io_sqring_entries(ctx);
     173             :         /* if we're handling multiple rings, cap submit size for fairness */
     174           0 :         if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE)
     175           0 :                 to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE;
     176             : 
     177           0 :         if (!wq_list_empty(&ctx->iopoll_list) || to_submit) {
     178           0 :                 const struct cred *creds = NULL;
     179             : 
     180           0 :                 if (ctx->sq_creds != current_cred())
     181           0 :                         creds = override_creds(ctx->sq_creds);
     182             : 
     183           0 :                 mutex_lock(&ctx->uring_lock);
     184           0 :                 if (!wq_list_empty(&ctx->iopoll_list))
     185           0 :                         io_do_iopoll(ctx, true);
     186             : 
     187             :                 /*
     188             :                  * Don't submit if refs are dying, good for io_uring_register(),
     189             :                  * but also it is relied upon by io_ring_exit_work()
     190             :                  */
     191           0 :                 if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) &&
     192           0 :                     !(ctx->flags & IORING_SETUP_R_DISABLED))
     193           0 :                         ret = io_submit_sqes(ctx, to_submit);
     194           0 :                 mutex_unlock(&ctx->uring_lock);
     195             : 
     196           0 :                 if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait))
     197           0 :                         wake_up(&ctx->sqo_sq_wait);
     198           0 :                 if (creds)
     199           0 :                         revert_creds(creds);
     200             :         }
     201             : 
     202           0 :         return ret;
     203             : }
     204             : 
     205           0 : static bool io_sqd_handle_event(struct io_sq_data *sqd)
     206             : {
     207           0 :         bool did_sig = false;
     208             :         struct ksignal ksig;
     209             : 
     210           0 :         if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
     211           0 :             signal_pending(current)) {
     212           0 :                 mutex_unlock(&sqd->lock);
     213           0 :                 if (signal_pending(current))
     214           0 :                         did_sig = get_signal(&ksig);
     215           0 :                 cond_resched();
     216           0 :                 mutex_lock(&sqd->lock);
     217             :         }
     218           0 :         return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
     219             : }
     220             : 
     221           0 : static int io_sq_thread(void *data)
     222             : {
     223           0 :         struct io_sq_data *sqd = data;
     224             :         struct io_ring_ctx *ctx;
     225           0 :         unsigned long timeout = 0;
     226             :         char buf[TASK_COMM_LEN];
     227           0 :         DEFINE_WAIT(wait);
     228             : 
     229           0 :         snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
     230           0 :         set_task_comm(current, buf);
     231             : 
     232           0 :         if (sqd->sq_cpu != -1)
     233           0 :                 set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
     234             :         else
     235           0 :                 set_cpus_allowed_ptr(current, cpu_online_mask);
     236           0 :         current->flags |= PF_NO_SETAFFINITY;
     237             : 
     238           0 :         mutex_lock(&sqd->lock);
     239             :         while (1) {
     240           0 :                 bool cap_entries, sqt_spin = false;
     241             : 
     242           0 :                 if (io_sqd_events_pending(sqd) || signal_pending(current)) {
     243           0 :                         if (io_sqd_handle_event(sqd))
     244             :                                 break;
     245           0 :                         timeout = jiffies + sqd->sq_thread_idle;
     246             :                 }
     247             : 
     248           0 :                 cap_entries = !list_is_singular(&sqd->ctx_list);
     249           0 :                 list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
     250           0 :                         int ret = __io_sq_thread(ctx, cap_entries);
     251             : 
     252           0 :                         if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
     253           0 :                                 sqt_spin = true;
     254             :                 }
     255           0 :                 if (io_run_task_work())
     256           0 :                         sqt_spin = true;
     257             : 
     258           0 :                 if (sqt_spin || !time_after(jiffies, timeout)) {
     259           0 :                         cond_resched();
     260           0 :                         if (sqt_spin)
     261           0 :                                 timeout = jiffies + sqd->sq_thread_idle;
     262           0 :                         continue;
     263             :                 }
     264             : 
     265           0 :                 prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE);
     266           0 :                 if (!io_sqd_events_pending(sqd) && !task_work_pending(current)) {
     267           0 :                         bool needs_sched = true;
     268             : 
     269           0 :                         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
     270           0 :                                 atomic_or(IORING_SQ_NEED_WAKEUP,
     271           0 :                                                 &ctx->rings->sq_flags);
     272           0 :                                 if ((ctx->flags & IORING_SETUP_IOPOLL) &&
     273           0 :                                     !wq_list_empty(&ctx->iopoll_list)) {
     274             :                                         needs_sched = false;
     275             :                                         break;
     276             :                                 }
     277             : 
     278             :                                 /*
     279             :                                  * Ensure the store of the wakeup flag is not
     280             :                                  * reordered with the load of the SQ tail
     281             :                                  */
     282           0 :                                 smp_mb__after_atomic();
     283             : 
     284           0 :                                 if (io_sqring_entries(ctx)) {
     285             :                                         needs_sched = false;
     286             :                                         break;
     287             :                                 }
     288             :                         }
     289             : 
     290           0 :                         if (needs_sched) {
     291           0 :                                 mutex_unlock(&sqd->lock);
     292           0 :                                 schedule();
     293           0 :                                 mutex_lock(&sqd->lock);
     294             :                         }
     295           0 :                         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
     296           0 :                                 atomic_andnot(IORING_SQ_NEED_WAKEUP,
     297           0 :                                                 &ctx->rings->sq_flags);
     298             :                 }
     299             : 
     300           0 :                 finish_wait(&sqd->wait, &wait);
     301           0 :                 timeout = jiffies + sqd->sq_thread_idle;
     302             :         }
     303             : 
     304           0 :         io_uring_cancel_generic(true, sqd);
     305           0 :         sqd->thread = NULL;
     306           0 :         list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
     307           0 :                 atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
     308           0 :         io_run_task_work();
     309           0 :         mutex_unlock(&sqd->lock);
     310             : 
     311           0 :         complete(&sqd->exited);
     312           0 :         do_exit(0);
     313             : }
     314             : 
     315           0 : void io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
     316             : {
     317           0 :         DEFINE_WAIT(wait);
     318             : 
     319             :         do {
     320           0 :                 if (!io_sqring_full(ctx))
     321             :                         break;
     322           0 :                 prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
     323             : 
     324           0 :                 if (!io_sqring_full(ctx))
     325             :                         break;
     326           0 :                 schedule();
     327           0 :         } while (!signal_pending(current));
     328             : 
     329           0 :         finish_wait(&ctx->sqo_sq_wait, &wait);
     330           0 : }
     331             : 
     332           0 : __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
     333             :                                 struct io_uring_params *p)
     334             : {
     335             :         int ret;
     336             : 
     337             :         /* Retain compatibility with failing for an invalid attach attempt */
     338           0 :         if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) ==
     339             :                                 IORING_SETUP_ATTACH_WQ) {
     340             :                 struct fd f;
     341             : 
     342           0 :                 f = fdget(p->wq_fd);
     343           0 :                 if (!f.file)
     344           0 :                         return -ENXIO;
     345           0 :                 if (!io_is_uring_fops(f.file)) {
     346           0 :                         fdput(f);
     347             :                         return -EINVAL;
     348             :                 }
     349           0 :                 fdput(f);
     350             :         }
     351           0 :         if (ctx->flags & IORING_SETUP_SQPOLL) {
     352             :                 struct task_struct *tsk;
     353             :                 struct io_sq_data *sqd;
     354             :                 bool attached;
     355             : 
     356           0 :                 ret = security_uring_sqpoll();
     357             :                 if (ret)
     358           0 :                         return ret;
     359             : 
     360           0 :                 sqd = io_get_sq_data(p, &attached);
     361           0 :                 if (IS_ERR(sqd)) {
     362           0 :                         ret = PTR_ERR(sqd);
     363           0 :                         goto err;
     364             :                 }
     365             : 
     366           0 :                 ctx->sq_creds = get_current_cred();
     367           0 :                 ctx->sq_data = sqd;
     368           0 :                 ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
     369           0 :                 if (!ctx->sq_thread_idle)
     370           0 :                         ctx->sq_thread_idle = HZ;
     371             : 
     372           0 :                 io_sq_thread_park(sqd);
     373           0 :                 list_add(&ctx->sqd_list, &sqd->ctx_list);
     374           0 :                 io_sqd_update_thread_idle(sqd);
     375             :                 /* don't attach to a dying SQPOLL thread, would be racy */
     376           0 :                 ret = (attached && !sqd->thread) ? -ENXIO : 0;
     377           0 :                 io_sq_thread_unpark(sqd);
     378             : 
     379           0 :                 if (ret < 0)
     380             :                         goto err;
     381           0 :                 if (attached)
     382             :                         return 0;
     383             : 
     384           0 :                 if (p->flags & IORING_SETUP_SQ_AFF) {
     385           0 :                         int cpu = p->sq_thread_cpu;
     386             : 
     387           0 :                         ret = -EINVAL;
     388           0 :                         if (cpu >= nr_cpu_ids || !cpu_online(cpu))
     389             :                                 goto err_sqpoll;
     390           0 :                         sqd->sq_cpu = cpu;
     391             :                 } else {
     392           0 :                         sqd->sq_cpu = -1;
     393             :                 }
     394             : 
     395           0 :                 sqd->task_pid = current->pid;
     396           0 :                 sqd->task_tgid = current->tgid;
     397           0 :                 tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
     398           0 :                 if (IS_ERR(tsk)) {
     399           0 :                         ret = PTR_ERR(tsk);
     400           0 :                         goto err_sqpoll;
     401             :                 }
     402             : 
     403           0 :                 sqd->thread = tsk;
     404           0 :                 ret = io_uring_alloc_task_context(tsk, ctx);
     405           0 :                 wake_up_new_task(tsk);
     406           0 :                 if (ret)
     407             :                         goto err;
     408           0 :         } else if (p->flags & IORING_SETUP_SQ_AFF) {
     409             :                 /* Can't have SQ_AFF without SQPOLL */
     410             :                 ret = -EINVAL;
     411             :                 goto err;
     412             :         }
     413             : 
     414             :         return 0;
     415             : err_sqpoll:
     416           0 :         complete(&ctx->sq_data->exited);
     417             : err:
     418           0 :         io_sq_thread_finish(ctx);
     419           0 :         return ret;
     420             : }

Generated by: LCOV version 1.14