Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/kernel.h>
3 : #include <linux/errno.h>
4 : #include <linux/file.h>
5 : #include <linux/slab.h>
6 : #include <linux/nospec.h>
7 : #include <linux/io_uring.h>
8 :
9 : #include <uapi/linux/io_uring.h>
10 :
11 : #include "io_uring.h"
12 : #include "rsrc.h"
13 : #include "filetable.h"
14 : #include "msg_ring.h"
15 :
16 :
17 : /* All valid masks for MSG_RING */
18 : #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
19 : IORING_MSG_RING_FLAGS_PASS)
20 :
21 : struct io_msg {
22 : struct file *file;
23 : struct file *src_file;
24 : struct callback_head tw;
25 : u64 user_data;
26 : u32 len;
27 : u32 cmd;
28 : u32 src_fd;
29 : union {
30 : u32 dst_fd;
31 : u32 cqe_flags;
32 : };
33 : u32 flags;
34 : };
35 :
36 : static void io_double_unlock_ctx(struct io_ring_ctx *octx)
37 : {
38 0 : mutex_unlock(&octx->uring_lock);
39 : }
40 :
41 0 : static int io_double_lock_ctx(struct io_ring_ctx *octx,
42 : unsigned int issue_flags)
43 : {
44 : /*
45 : * To ensure proper ordering between the two ctxs, we can only
46 : * attempt a trylock on the target. If that fails and we already have
47 : * the source ctx lock, punt to io-wq.
48 : */
49 0 : if (!(issue_flags & IO_URING_F_UNLOCKED)) {
50 0 : if (!mutex_trylock(&octx->uring_lock))
51 : return -EAGAIN;
52 0 : return 0;
53 : }
54 0 : mutex_lock(&octx->uring_lock);
55 0 : return 0;
56 : }
57 :
58 0 : void io_msg_ring_cleanup(struct io_kiocb *req)
59 : {
60 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
61 :
62 0 : if (WARN_ON_ONCE(!msg->src_file))
63 : return;
64 :
65 0 : fput(msg->src_file);
66 0 : msg->src_file = NULL;
67 : }
68 :
69 : static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
70 : {
71 0 : if (!target_ctx->task_complete)
72 : return false;
73 0 : return current != target_ctx->submitter_task;
74 : }
75 :
76 0 : static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
77 : {
78 0 : struct io_ring_ctx *ctx = req->file->private_data;
79 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
80 0 : struct task_struct *task = READ_ONCE(ctx->submitter_task);
81 :
82 0 : if (unlikely(!task))
83 : return -EOWNERDEAD;
84 :
85 0 : init_task_work(&msg->tw, func);
86 0 : if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
87 : return -EOWNERDEAD;
88 :
89 0 : return IOU_ISSUE_SKIP_COMPLETE;
90 : }
91 :
92 0 : static void io_msg_tw_complete(struct callback_head *head)
93 : {
94 0 : struct io_msg *msg = container_of(head, struct io_msg, tw);
95 0 : struct io_kiocb *req = cmd_to_io_kiocb(msg);
96 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
97 0 : int ret = 0;
98 :
99 0 : if (current->flags & PF_EXITING) {
100 : ret = -EOWNERDEAD;
101 : } else {
102 0 : u32 flags = 0;
103 :
104 0 : if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
105 0 : flags = msg->cqe_flags;
106 :
107 : /*
108 : * If the target ring is using IOPOLL mode, then we need to be
109 : * holding the uring_lock for posting completions. Other ring
110 : * types rely on the regular completion locking, which is
111 : * handled while posting.
112 : */
113 0 : if (target_ctx->flags & IORING_SETUP_IOPOLL)
114 0 : mutex_lock(&target_ctx->uring_lock);
115 0 : if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
116 0 : ret = -EOVERFLOW;
117 0 : if (target_ctx->flags & IORING_SETUP_IOPOLL)
118 0 : mutex_unlock(&target_ctx->uring_lock);
119 : }
120 :
121 0 : if (ret < 0)
122 0 : req_set_fail(req);
123 0 : io_req_queue_tw_complete(req, ret);
124 0 : }
125 :
126 0 : static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
127 : {
128 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
129 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
130 0 : u32 flags = 0;
131 : int ret;
132 :
133 0 : if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
134 : return -EINVAL;
135 0 : if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
136 : return -EINVAL;
137 0 : if (target_ctx->flags & IORING_SETUP_R_DISABLED)
138 : return -EBADFD;
139 :
140 0 : if (io_msg_need_remote(target_ctx))
141 0 : return io_msg_exec_remote(req, io_msg_tw_complete);
142 :
143 0 : if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
144 0 : flags = msg->cqe_flags;
145 :
146 0 : ret = -EOVERFLOW;
147 0 : if (target_ctx->flags & IORING_SETUP_IOPOLL) {
148 0 : if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
149 : return -EAGAIN;
150 0 : if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
151 0 : ret = 0;
152 : io_double_unlock_ctx(target_ctx);
153 : } else {
154 0 : if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
155 0 : ret = 0;
156 : }
157 : return ret;
158 : }
159 :
160 0 : static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
161 : {
162 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
163 0 : struct io_ring_ctx *ctx = req->ctx;
164 0 : struct file *file = NULL;
165 0 : int idx = msg->src_fd;
166 :
167 0 : io_ring_submit_lock(ctx, issue_flags);
168 0 : if (likely(idx < ctx->nr_user_files)) {
169 0 : idx = array_index_nospec(idx, ctx->nr_user_files);
170 0 : file = io_file_from_index(&ctx->file_table, idx);
171 0 : if (file)
172 : get_file(file);
173 : }
174 0 : io_ring_submit_unlock(ctx, issue_flags);
175 0 : return file;
176 : }
177 :
178 0 : static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
179 : {
180 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
181 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
182 0 : struct file *src_file = msg->src_file;
183 : int ret;
184 :
185 0 : if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
186 : return -EAGAIN;
187 :
188 0 : ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
189 0 : if (ret < 0)
190 : goto out_unlock;
191 :
192 0 : msg->src_file = NULL;
193 0 : req->flags &= ~REQ_F_NEED_CLEANUP;
194 :
195 0 : if (msg->flags & IORING_MSG_RING_CQE_SKIP)
196 : goto out_unlock;
197 : /*
198 : * If this fails, the target still received the file descriptor but
199 : * wasn't notified of the fact. This means that if this request
200 : * completes with -EOVERFLOW, then the sender must ensure that a
201 : * later IORING_OP_MSG_RING delivers the message.
202 : */
203 0 : if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
204 0 : ret = -EOVERFLOW;
205 : out_unlock:
206 0 : io_double_unlock_ctx(target_ctx);
207 0 : return ret;
208 : }
209 :
210 0 : static void io_msg_tw_fd_complete(struct callback_head *head)
211 : {
212 0 : struct io_msg *msg = container_of(head, struct io_msg, tw);
213 0 : struct io_kiocb *req = cmd_to_io_kiocb(msg);
214 0 : int ret = -EOWNERDEAD;
215 :
216 0 : if (!(current->flags & PF_EXITING))
217 0 : ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
218 0 : if (ret < 0)
219 0 : req_set_fail(req);
220 0 : io_req_queue_tw_complete(req, ret);
221 0 : }
222 :
223 0 : static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
224 : {
225 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
226 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
227 0 : struct io_ring_ctx *ctx = req->ctx;
228 0 : struct file *src_file = msg->src_file;
229 :
230 0 : if (msg->len)
231 : return -EINVAL;
232 0 : if (target_ctx == ctx)
233 : return -EINVAL;
234 0 : if (target_ctx->flags & IORING_SETUP_R_DISABLED)
235 : return -EBADFD;
236 0 : if (!src_file) {
237 0 : src_file = io_msg_grab_file(req, issue_flags);
238 0 : if (!src_file)
239 : return -EBADF;
240 0 : msg->src_file = src_file;
241 0 : req->flags |= REQ_F_NEED_CLEANUP;
242 : }
243 :
244 0 : if (io_msg_need_remote(target_ctx))
245 0 : return io_msg_exec_remote(req, io_msg_tw_fd_complete);
246 0 : return io_msg_install_complete(req, issue_flags);
247 : }
248 :
249 0 : int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
250 : {
251 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
252 :
253 0 : if (unlikely(sqe->buf_index || sqe->personality))
254 : return -EINVAL;
255 :
256 0 : msg->src_file = NULL;
257 0 : msg->user_data = READ_ONCE(sqe->off);
258 0 : msg->len = READ_ONCE(sqe->len);
259 0 : msg->cmd = READ_ONCE(sqe->addr);
260 0 : msg->src_fd = READ_ONCE(sqe->addr3);
261 0 : msg->dst_fd = READ_ONCE(sqe->file_index);
262 0 : msg->flags = READ_ONCE(sqe->msg_ring_flags);
263 0 : if (msg->flags & ~IORING_MSG_RING_MASK)
264 : return -EINVAL;
265 :
266 0 : return 0;
267 : }
268 :
269 0 : int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
270 : {
271 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
272 : int ret;
273 :
274 0 : ret = -EBADFD;
275 0 : if (!io_is_uring_fops(req->file))
276 : goto done;
277 :
278 0 : switch (msg->cmd) {
279 : case IORING_MSG_DATA:
280 0 : ret = io_msg_ring_data(req, issue_flags);
281 0 : break;
282 : case IORING_MSG_SEND_FD:
283 0 : ret = io_msg_send_fd(req, issue_flags);
284 0 : break;
285 : default:
286 : ret = -EINVAL;
287 : break;
288 : }
289 :
290 : done:
291 0 : if (ret < 0) {
292 0 : if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE)
293 : return ret;
294 0 : req_set_fail(req);
295 : }
296 0 : io_req_set_res(req, ret, 0);
297 0 : return IOU_OK;
298 : }
|