Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/kernel.h>
3 : #include <linux/errno.h>
4 : #include <linux/file.h>
5 : #include <linux/slab.h>
6 : #include <linux/nospec.h>
7 : #include <linux/io_uring.h>
8 :
9 : #include <uapi/linux/io_uring.h>
10 :
11 : #include "io_uring.h"
12 : #include "rsrc.h"
13 : #include "filetable.h"
14 : #include "msg_ring.h"
15 :
16 :
17 : /* All valid masks for MSG_RING */
18 : #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \
19 : IORING_MSG_RING_FLAGS_PASS)
20 :
21 : struct io_msg {
22 : struct file *file;
23 : struct file *src_file;
24 : struct callback_head tw;
25 : u64 user_data;
26 : u32 len;
27 : u32 cmd;
28 : u32 src_fd;
29 : union {
30 : u32 dst_fd;
31 : u32 cqe_flags;
32 : };
33 : u32 flags;
34 : };
35 :
36 : static void io_double_unlock_ctx(struct io_ring_ctx *octx)
37 : {
38 0 : mutex_unlock(&octx->uring_lock);
39 : }
40 :
41 0 : static int io_double_lock_ctx(struct io_ring_ctx *octx,
42 : unsigned int issue_flags)
43 : {
44 : /*
45 : * To ensure proper ordering between the two ctxs, we can only
46 : * attempt a trylock on the target. If that fails and we already have
47 : * the source ctx lock, punt to io-wq.
48 : */
49 0 : if (!(issue_flags & IO_URING_F_UNLOCKED)) {
50 0 : if (!mutex_trylock(&octx->uring_lock))
51 : return -EAGAIN;
52 0 : return 0;
53 : }
54 0 : mutex_lock(&octx->uring_lock);
55 0 : return 0;
56 : }
57 :
58 0 : void io_msg_ring_cleanup(struct io_kiocb *req)
59 : {
60 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
61 :
62 0 : if (WARN_ON_ONCE(!msg->src_file))
63 : return;
64 :
65 0 : fput(msg->src_file);
66 0 : msg->src_file = NULL;
67 : }
68 :
69 : static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
70 : {
71 0 : if (!target_ctx->task_complete)
72 : return false;
73 0 : return current != target_ctx->submitter_task;
74 : }
75 :
76 0 : static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
77 : {
78 0 : struct io_ring_ctx *ctx = req->file->private_data;
79 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
80 0 : struct task_struct *task = READ_ONCE(ctx->submitter_task);
81 :
82 0 : if (unlikely(!task))
83 : return -EOWNERDEAD;
84 :
85 0 : init_task_work(&msg->tw, func);
86 0 : if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
87 : return -EOWNERDEAD;
88 :
89 0 : return IOU_ISSUE_SKIP_COMPLETE;
90 : }
91 :
92 0 : static void io_msg_tw_complete(struct callback_head *head)
93 : {
94 0 : struct io_msg *msg = container_of(head, struct io_msg, tw);
95 0 : struct io_kiocb *req = cmd_to_io_kiocb(msg);
96 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
97 0 : int ret = 0;
98 :
99 0 : if (current->flags & PF_EXITING) {
100 : ret = -EOWNERDEAD;
101 : } else {
102 0 : u32 flags = 0;
103 :
104 0 : if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
105 0 : flags = msg->cqe_flags;
106 :
107 : /*
108 : * If the target ring is using IOPOLL mode, then we need to be
109 : * holding the uring_lock for posting completions. Other ring
110 : * types rely on the regular completion locking, which is
111 : * handled while posting.
112 : */
113 0 : if (target_ctx->flags & IORING_SETUP_IOPOLL)
114 0 : mutex_lock(&target_ctx->uring_lock);
115 0 : if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
116 0 : ret = -EOVERFLOW;
117 0 : if (target_ctx->flags & IORING_SETUP_IOPOLL)
118 0 : mutex_unlock(&target_ctx->uring_lock);
119 : }
120 :
121 0 : if (ret < 0)
122 0 : req_set_fail(req);
123 0 : io_req_queue_tw_complete(req, ret);
124 0 : }
125 :
126 0 : static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
127 : {
128 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
129 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
130 0 : u32 flags = 0;
131 : int ret;
132 :
133 0 : if (msg->src_fd || msg->flags & ~IORING_MSG_RING_FLAGS_PASS)
134 : return -EINVAL;
135 0 : if (!(msg->flags & IORING_MSG_RING_FLAGS_PASS) && msg->dst_fd)
136 : return -EINVAL;
137 0 : if (target_ctx->flags & IORING_SETUP_R_DISABLED)
138 : return -EBADFD;
139 :
140 0 : if (io_msg_need_remote(target_ctx))
141 0 : return io_msg_exec_remote(req, io_msg_tw_complete);
142 :
143 0 : if (msg->flags & IORING_MSG_RING_FLAGS_PASS)
144 0 : flags = msg->cqe_flags;
145 :
146 0 : ret = -EOVERFLOW;
147 0 : if (target_ctx->flags & IORING_SETUP_IOPOLL) {
148 0 : if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
149 : return -EAGAIN;
150 0 : if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
151 0 : ret = 0;
152 : io_double_unlock_ctx(target_ctx);
153 : } else {
154 0 : if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
155 0 : ret = 0;
156 : }
157 : return ret;
158 : }
159 :
160 0 : static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_flags)
161 : {
162 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
163 0 : struct io_ring_ctx *ctx = req->ctx;
164 0 : struct file *file = NULL;
165 : unsigned long file_ptr;
166 0 : int idx = msg->src_fd;
167 :
168 0 : io_ring_submit_lock(ctx, issue_flags);
169 0 : if (likely(idx < ctx->nr_user_files)) {
170 0 : idx = array_index_nospec(idx, ctx->nr_user_files);
171 0 : file_ptr = io_fixed_file_slot(&ctx->file_table, idx)->file_ptr;
172 0 : file = (struct file *) (file_ptr & FFS_MASK);
173 0 : if (file)
174 : get_file(file);
175 : }
176 0 : io_ring_submit_unlock(ctx, issue_flags);
177 0 : return file;
178 : }
179 :
180 0 : static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
181 : {
182 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
183 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
184 0 : struct file *src_file = msg->src_file;
185 : int ret;
186 :
187 0 : if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
188 : return -EAGAIN;
189 :
190 0 : ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
191 0 : if (ret < 0)
192 : goto out_unlock;
193 :
194 0 : msg->src_file = NULL;
195 0 : req->flags &= ~REQ_F_NEED_CLEANUP;
196 :
197 0 : if (msg->flags & IORING_MSG_RING_CQE_SKIP)
198 : goto out_unlock;
199 : /*
200 : * If this fails, the target still received the file descriptor but
201 : * wasn't notified of the fact. This means that if this request
202 : * completes with -EOVERFLOW, then the sender must ensure that a
203 : * later IORING_OP_MSG_RING delivers the message.
204 : */
205 0 : if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
206 0 : ret = -EOVERFLOW;
207 : out_unlock:
208 0 : io_double_unlock_ctx(target_ctx);
209 0 : return ret;
210 : }
211 :
212 0 : static void io_msg_tw_fd_complete(struct callback_head *head)
213 : {
214 0 : struct io_msg *msg = container_of(head, struct io_msg, tw);
215 0 : struct io_kiocb *req = cmd_to_io_kiocb(msg);
216 0 : int ret = -EOWNERDEAD;
217 :
218 0 : if (!(current->flags & PF_EXITING))
219 0 : ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
220 0 : if (ret < 0)
221 0 : req_set_fail(req);
222 0 : io_req_queue_tw_complete(req, ret);
223 0 : }
224 :
225 0 : static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
226 : {
227 0 : struct io_ring_ctx *target_ctx = req->file->private_data;
228 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
229 0 : struct io_ring_ctx *ctx = req->ctx;
230 0 : struct file *src_file = msg->src_file;
231 :
232 0 : if (msg->len)
233 : return -EINVAL;
234 0 : if (target_ctx == ctx)
235 : return -EINVAL;
236 0 : if (target_ctx->flags & IORING_SETUP_R_DISABLED)
237 : return -EBADFD;
238 0 : if (!src_file) {
239 0 : src_file = io_msg_grab_file(req, issue_flags);
240 0 : if (!src_file)
241 : return -EBADF;
242 0 : msg->src_file = src_file;
243 0 : req->flags |= REQ_F_NEED_CLEANUP;
244 : }
245 :
246 0 : if (io_msg_need_remote(target_ctx))
247 0 : return io_msg_exec_remote(req, io_msg_tw_fd_complete);
248 0 : return io_msg_install_complete(req, issue_flags);
249 : }
250 :
251 0 : int io_msg_ring_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
252 : {
253 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
254 :
255 0 : if (unlikely(sqe->buf_index || sqe->personality))
256 : return -EINVAL;
257 :
258 0 : msg->src_file = NULL;
259 0 : msg->user_data = READ_ONCE(sqe->off);
260 0 : msg->len = READ_ONCE(sqe->len);
261 0 : msg->cmd = READ_ONCE(sqe->addr);
262 0 : msg->src_fd = READ_ONCE(sqe->addr3);
263 0 : msg->dst_fd = READ_ONCE(sqe->file_index);
264 0 : msg->flags = READ_ONCE(sqe->msg_ring_flags);
265 0 : if (msg->flags & ~IORING_MSG_RING_MASK)
266 : return -EINVAL;
267 :
268 0 : return 0;
269 : }
270 :
271 0 : int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags)
272 : {
273 0 : struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
274 : int ret;
275 :
276 0 : ret = -EBADFD;
277 0 : if (!io_is_uring_fops(req->file))
278 : goto done;
279 :
280 0 : switch (msg->cmd) {
281 : case IORING_MSG_DATA:
282 0 : ret = io_msg_ring_data(req, issue_flags);
283 0 : break;
284 : case IORING_MSG_SEND_FD:
285 0 : ret = io_msg_send_fd(req, issue_flags);
286 0 : break;
287 : default:
288 : ret = -EINVAL;
289 : break;
290 : }
291 :
292 : done:
293 0 : if (ret < 0) {
294 0 : if (ret == -EAGAIN || ret == IOU_ISSUE_SKIP_COMPLETE)
295 : return ret;
296 0 : req_set_fail(req);
297 : }
298 0 : io_req_set_res(req, ret, 0);
299 0 : return IOU_OK;
300 : }
|