LCOV - code coverage report
Current view: top level - io_uring - io_uring.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 85 0.0 %
Date: 2023-08-24 13:40:31 Functions: 0 5 0.0 %

          Line data    Source code
       1             : #ifndef IOU_CORE_H
       2             : #define IOU_CORE_H
       3             : 
       4             : #include <linux/errno.h>
       5             : #include <linux/lockdep.h>
       6             : #include <linux/resume_user_mode.h>
       7             : #include <linux/kasan.h>
       8             : #include <linux/io_uring_types.h>
       9             : #include <uapi/linux/eventpoll.h>
      10             : #include "io-wq.h"
      11             : #include "slist.h"
      12             : #include "filetable.h"
      13             : 
      14             : #ifndef CREATE_TRACE_POINTS
      15             : #include <trace/events/io_uring.h>
      16             : #endif
      17             : 
      18             : enum {
      19             :         /*
      20             :          * A hint to not wake right away but delay until there are enough of
      21             :          * tw's queued to match the number of CQEs the task is waiting for.
      22             :          *
      23             :          * Must not be used wirh requests generating more than one CQE.
      24             :          * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
      25             :          */
      26             :         IOU_F_TWQ_LAZY_WAKE                     = 1,
      27             : };
      28             : 
      29             : enum {
      30             :         IOU_OK                  = 0,
      31             :         IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
      32             : 
      33             :         /*
      34             :          * Intended only when both IO_URING_F_MULTISHOT is passed
      35             :          * to indicate to the poll runner that multishot should be
      36             :          * removed and the result is set on req->cqe.res.
      37             :          */
      38             :         IOU_STOP_MULTISHOT      = -ECANCELED,
      39             : };
      40             : 
      41             : struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx, bool overflow);
      42             : bool io_req_cqe_overflow(struct io_kiocb *req);
      43             : int io_run_task_work_sig(struct io_ring_ctx *ctx);
      44             : void io_req_defer_failed(struct io_kiocb *req, s32 res);
      45             : void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags);
      46             : bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags);
      47             : bool io_aux_cqe(const struct io_kiocb *req, bool defer, s32 res, u32 cflags,
      48             :                 bool allow_overflow);
      49             : void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
      50             : 
      51             : struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
      52             : 
      53             : struct file *io_file_get_normal(struct io_kiocb *req, int fd);
      54             : struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
      55             :                                unsigned issue_flags);
      56             : 
      57             : void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
      58             : bool io_is_uring_fops(struct file *file);
      59             : bool io_alloc_async_data(struct io_kiocb *req);
      60             : void io_req_task_queue(struct io_kiocb *req);
      61             : void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
      62             : void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
      63             : void io_req_task_queue_fail(struct io_kiocb *req, int ret);
      64             : void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts);
      65             : void tctx_task_work(struct callback_head *cb);
      66             : __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
      67             : int io_uring_alloc_task_context(struct task_struct *task,
      68             :                                 struct io_ring_ctx *ctx);
      69             : 
      70             : int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
      71             :                                      int start, int end);
      72             : 
      73             : int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
      74             : int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
      75             : int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
      76             : void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node);
      77             : int io_req_prep_async(struct io_kiocb *req);
      78             : 
      79             : struct io_wq_work *io_wq_free_work(struct io_wq_work *work);
      80             : void io_wq_submit_work(struct io_wq_work *work);
      81             : 
      82             : void io_free_req(struct io_kiocb *req);
      83             : void io_queue_next(struct io_kiocb *req);
      84             : void io_task_refs_refill(struct io_uring_task *tctx);
      85             : bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
      86             : 
      87             : bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
      88             :                         bool cancel_all);
      89             : 
      90             : #define io_lockdep_assert_cq_locked(ctx)                                \
      91             :         do {                                                            \
      92             :                 lockdep_assert(in_task());                              \
      93             :                                                                         \
      94             :                 if (ctx->flags & IORING_SETUP_IOPOLL) {                  \
      95             :                         lockdep_assert_held(&ctx->uring_lock);           \
      96             :                 } else if (!ctx->task_complete) {                    \
      97             :                         lockdep_assert_held(&ctx->completion_lock);      \
      98             :                 } else if (ctx->submitter_task->flags & PF_EXITING) { \
      99             :                         lockdep_assert(current_work());                 \
     100             :                 } else {                                                \
     101             :                         lockdep_assert(current == ctx->submitter_task);      \
     102             :                 }                                                       \
     103             :         } while (0)
     104             : 
     105             : static inline void io_req_task_work_add(struct io_kiocb *req)
     106             : {
     107           0 :         __io_req_task_work_add(req, 0);
     108             : }
     109             : 
     110             : #define io_for_each_link(pos, head) \
     111             :         for (pos = (head); pos; pos = pos->link)
     112             : 
     113           0 : static inline struct io_uring_cqe *io_get_cqe_overflow(struct io_ring_ctx *ctx,
     114             :                                                        bool overflow)
     115             : {
     116           0 :         io_lockdep_assert_cq_locked(ctx);
     117             : 
     118           0 :         if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) {
     119           0 :                 struct io_uring_cqe *cqe = ctx->cqe_cached;
     120             : 
     121           0 :                 ctx->cached_cq_tail++;
     122           0 :                 ctx->cqe_cached++;
     123           0 :                 if (ctx->flags & IORING_SETUP_CQE32)
     124           0 :                         ctx->cqe_cached++;
     125             :                 return cqe;
     126             :         }
     127             : 
     128           0 :         return __io_get_cqe(ctx, overflow);
     129             : }
     130             : 
     131             : static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx)
     132             : {
     133           0 :         return io_get_cqe_overflow(ctx, false);
     134             : }
     135             : 
     136           0 : static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
     137             :                                      struct io_kiocb *req)
     138             : {
     139             :         struct io_uring_cqe *cqe;
     140             : 
     141             :         /*
     142             :          * If we can't get a cq entry, userspace overflowed the
     143             :          * submission (by quite a lot). Increment the overflow count in
     144             :          * the ring.
     145             :          */
     146           0 :         cqe = io_get_cqe(ctx);
     147           0 :         if (unlikely(!cqe))
     148             :                 return false;
     149             : 
     150           0 :         trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
     151             :                                 req->cqe.res, req->cqe.flags,
     152             :                                 (req->flags & REQ_F_CQE32_INIT) ? req->extra1 : 0,
     153             :                                 (req->flags & REQ_F_CQE32_INIT) ? req->extra2 : 0);
     154             : 
     155           0 :         memcpy(cqe, &req->cqe, sizeof(*cqe));
     156             : 
     157           0 :         if (ctx->flags & IORING_SETUP_CQE32) {
     158           0 :                 u64 extra1 = 0, extra2 = 0;
     159             : 
     160           0 :                 if (req->flags & REQ_F_CQE32_INIT) {
     161           0 :                         extra1 = req->extra1;
     162           0 :                         extra2 = req->extra2;
     163             :                 }
     164             : 
     165           0 :                 WRITE_ONCE(cqe->big_cqe[0], extra1);
     166           0 :                 WRITE_ONCE(cqe->big_cqe[1], extra2);
     167             :         }
     168             :         return true;
     169             : }
     170             : 
     171           0 : static inline bool io_fill_cqe_req(struct io_ring_ctx *ctx,
     172             :                                    struct io_kiocb *req)
     173             : {
     174           0 :         if (likely(__io_fill_cqe_req(ctx, req)))
     175             :                 return true;
     176           0 :         return io_req_cqe_overflow(req);
     177             : }
     178             : 
     179             : static inline void req_set_fail(struct io_kiocb *req)
     180             : {
     181           0 :         req->flags |= REQ_F_FAIL;
     182           0 :         if (req->flags & REQ_F_CQE_SKIP) {
     183             :                 req->flags &= ~REQ_F_CQE_SKIP;
     184           0 :                 req->flags |= REQ_F_SKIP_LINK_CQES;
     185             :         }
     186             : }
     187             : 
     188             : static inline void io_req_set_res(struct io_kiocb *req, s32 res, u32 cflags)
     189             : {
     190           0 :         req->cqe.res = res;
     191           0 :         req->cqe.flags = cflags;
     192             : }
     193             : 
     194             : static inline bool req_has_async_data(struct io_kiocb *req)
     195             : {
     196           0 :         return req->flags & REQ_F_ASYNC_DATA;
     197             : }
     198             : 
     199             : static inline void io_put_file(struct file *file)
     200             : {
     201           0 :         if (file)
     202           0 :                 fput(file);
     203             : }
     204             : 
     205             : static inline void io_ring_submit_unlock(struct io_ring_ctx *ctx,
     206             :                                          unsigned issue_flags)
     207             : {
     208             :         lockdep_assert_held(&ctx->uring_lock);
     209           0 :         if (issue_flags & IO_URING_F_UNLOCKED)
     210           0 :                 mutex_unlock(&ctx->uring_lock);
     211             : }
     212             : 
     213             : static inline void io_ring_submit_lock(struct io_ring_ctx *ctx,
     214             :                                        unsigned issue_flags)
     215             : {
     216             :         /*
     217             :          * "Normal" inline submissions always hold the uring_lock, since we
     218             :          * grab it from the system call. Same is true for the SQPOLL offload.
     219             :          * The only exception is when we've detached the request and issue it
     220             :          * from an async worker thread, grab the lock for that case.
     221             :          */
     222           0 :         if (issue_flags & IO_URING_F_UNLOCKED)
     223           0 :                 mutex_lock(&ctx->uring_lock);
     224             :         lockdep_assert_held(&ctx->uring_lock);
     225             : }
     226             : 
     227             : static inline void io_commit_cqring(struct io_ring_ctx *ctx)
     228             : {
     229             :         /* order cqe stores with ring update */
     230           0 :         smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
     231             : }
     232             : 
     233             : static inline void io_poll_wq_wake(struct io_ring_ctx *ctx)
     234             : {
     235           0 :         if (wq_has_sleeper(&ctx->poll_wq))
     236           0 :                 __wake_up(&ctx->poll_wq, TASK_NORMAL, 0,
     237             :                                 poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
     238             : }
     239             : 
     240             : static inline void io_cqring_wake(struct io_ring_ctx *ctx)
     241             : {
     242             :         /*
     243             :          * Trigger waitqueue handler on all waiters on our waitqueue. This
     244             :          * won't necessarily wake up all the tasks, io_should_wake() will make
     245             :          * that decision.
     246             :          *
     247             :          * Pass in EPOLLIN|EPOLL_URING_WAKE as the poll wakeup key. The latter
     248             :          * set in the mask so that if we recurse back into our own poll
     249             :          * waitqueue handlers, we know we have a dependency between eventfd or
     250             :          * epoll and should terminate multishot poll at that point.
     251             :          */
     252           0 :         if (wq_has_sleeper(&ctx->cq_wait))
     253           0 :                 __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
     254             :                                 poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
     255             : }
     256             : 
     257             : static inline bool io_sqring_full(struct io_ring_ctx *ctx)
     258             : {
     259           0 :         struct io_rings *r = ctx->rings;
     260             : 
     261           0 :         return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries;
     262             : }
     263             : 
     264             : static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
     265             : {
     266           0 :         struct io_rings *rings = ctx->rings;
     267             :         unsigned int entries;
     268             : 
     269             :         /* make sure SQ entry isn't read before tail */
     270           0 :         entries = smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
     271           0 :         return min(entries, ctx->sq_entries);
     272             : }
     273             : 
     274           0 : static inline int io_run_task_work(void)
     275             : {
     276             :         /*
     277             :          * Always check-and-clear the task_work notification signal. With how
     278             :          * signaling works for task_work, we can find it set with nothing to
     279             :          * run. We need to clear it for that case, like get_signal() does.
     280             :          */
     281           0 :         if (test_thread_flag(TIF_NOTIFY_SIGNAL))
     282             :                 clear_notify_signal();
     283             :         /*
     284             :          * PF_IO_WORKER never returns to userspace, so check here if we have
     285             :          * notify work that needs processing.
     286             :          */
     287           0 :         if (current->flags & PF_IO_WORKER &&
     288           0 :             test_thread_flag(TIF_NOTIFY_RESUME)) {
     289           0 :                 __set_current_state(TASK_RUNNING);
     290           0 :                 resume_user_mode_work(NULL);
     291             :         }
     292           0 :         if (task_work_pending(current)) {
     293           0 :                 __set_current_state(TASK_RUNNING);
     294           0 :                 task_work_run();
     295           0 :                 return 1;
     296             :         }
     297             : 
     298             :         return 0;
     299             : }
     300             : 
     301             : static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
     302             : {
     303           0 :         return task_work_pending(current) || !wq_list_empty(&ctx->work_llist);
     304             : }
     305             : 
     306             : static inline void io_tw_lock(struct io_ring_ctx *ctx, struct io_tw_state *ts)
     307             : {
     308           0 :         if (!ts->locked) {
     309           0 :                 mutex_lock(&ctx->uring_lock);
     310           0 :                 ts->locked = true;
     311             :         }
     312             : }
     313             : 
     314             : /*
     315             :  * Don't complete immediately but use deferred completion infrastructure.
     316             :  * Protected by ->uring_lock and can only be used either with
     317             :  * IO_URING_F_COMPLETE_DEFER or inside a tw handler holding the mutex.
     318             :  */
     319             : static inline void io_req_complete_defer(struct io_kiocb *req)
     320             :         __must_hold(&req->ctx->uring_lock)
     321             : {
     322           0 :         struct io_submit_state *state = &req->ctx->submit_state;
     323             : 
     324             :         lockdep_assert_held(&req->ctx->uring_lock);
     325             : 
     326           0 :         wq_list_add_tail(&req->comp_list, &state->compl_reqs);
     327             : }
     328             : 
     329             : static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
     330             : {
     331           0 :         if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
     332             :                      ctx->has_evfd || ctx->poll_activated))
     333           0 :                 __io_commit_cqring_flush(ctx);
     334             : }
     335             : 
     336           0 : static inline void io_get_task_refs(int nr)
     337             : {
     338           0 :         struct io_uring_task *tctx = current->io_uring;
     339             : 
     340           0 :         tctx->cached_refs -= nr;
     341           0 :         if (unlikely(tctx->cached_refs < 0))
     342           0 :                 io_task_refs_refill(tctx);
     343           0 : }
     344             : 
     345             : static inline bool io_req_cache_empty(struct io_ring_ctx *ctx)
     346             : {
     347           0 :         return !ctx->submit_state.free_list.next;
     348             : }
     349             : 
     350             : extern struct kmem_cache *req_cachep;
     351             : 
     352             : static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx)
     353             : {
     354             :         struct io_kiocb *req;
     355             : 
     356           0 :         req = container_of(ctx->submit_state.free_list.next, struct io_kiocb, comp_list);
     357           0 :         kasan_unpoison_object_data(req_cachep, req);
     358           0 :         wq_stack_extract(&ctx->submit_state.free_list);
     359             :         return req;
     360             : }
     361             : 
     362             : static inline bool io_alloc_req(struct io_ring_ctx *ctx, struct io_kiocb **req)
     363             : {
     364           0 :         if (unlikely(io_req_cache_empty(ctx))) {
     365           0 :                 if (!__io_alloc_req_refill(ctx))
     366             :                         return false;
     367             :         }
     368           0 :         *req = io_extract_req(ctx);
     369             :         return true;
     370             : }
     371             : 
     372             : static inline bool io_allowed_defer_tw_run(struct io_ring_ctx *ctx)
     373             : {
     374           0 :         return likely(ctx->submitter_task == current);
     375             : }
     376             : 
     377             : static inline bool io_allowed_run_tw(struct io_ring_ctx *ctx)
     378             : {
     379           0 :         return likely(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN) ||
     380             :                       ctx->submitter_task == current);
     381             : }
     382             : 
     383             : static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)
     384             : {
     385           0 :         io_req_set_res(req, res, 0);
     386           0 :         req->io_task_work.func = io_req_task_complete;
     387           0 :         io_req_task_work_add(req);
     388             : }
     389             : 
     390             : /*
     391             :  * IORING_SETUP_SQE128 contexts allocate twice the normal SQE size for each
     392             :  * slot.
     393             :  */
     394             : static inline size_t uring_sqe_size(struct io_ring_ctx *ctx)
     395             : {
     396           0 :         if (ctx->flags & IORING_SETUP_SQE128)
     397             :                 return 2 * sizeof(struct io_uring_sqe);
     398             :         return sizeof(struct io_uring_sqe);
     399             : }
     400             : #endif

Generated by: LCOV version 1.14