Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * "splice": joining two ropes together by interweaving their strands.
4 : *
5 : * This is the "extended pipe" functionality, where a pipe is used as
6 : * an arbitrary in-memory buffer. Think of a pipe as a small kernel
7 : * buffer that you can use to transfer data from one end to the other.
8 : *
9 : * The traditional unix read/write is extended with a "splice()" operation
10 : * that transfers data buffers to or from a pipe buffer.
11 : *
12 : * Named by Larry McVoy, original implementation from Linus, extended by
13 : * Jens to support splicing to files, network, direct splicing, etc and
14 : * fixing lots of bugs.
15 : *
16 : * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk>
17 : * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org>
18 : * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu>
19 : *
20 : */
21 : #include <linux/bvec.h>
22 : #include <linux/fs.h>
23 : #include <linux/file.h>
24 : #include <linux/pagemap.h>
25 : #include <linux/splice.h>
26 : #include <linux/memcontrol.h>
27 : #include <linux/mm_inline.h>
28 : #include <linux/swap.h>
29 : #include <linux/writeback.h>
30 : #include <linux/export.h>
31 : #include <linux/syscalls.h>
32 : #include <linux/uio.h>
33 : #include <linux/fsnotify.h>
34 : #include <linux/security.h>
35 : #include <linux/gfp.h>
36 : #include <linux/socket.h>
37 : #include <linux/sched/signal.h>
38 :
39 : #include "internal.h"
40 :
41 : /*
42 : * Splice doesn't support FMODE_NOWAIT. Since pipes may set this flag to
43 : * indicate they support non-blocking reads or writes, we must clear it
44 : * here if set to avoid blocking other users of this pipe if splice is
45 : * being done on it.
46 : */
47 0 : static noinline void noinline pipe_clear_nowait(struct file *file)
48 : {
49 0 : fmode_t fmode = READ_ONCE(file->f_mode);
50 :
51 : do {
52 0 : if (!(fmode & FMODE_NOWAIT))
53 : break;
54 0 : } while (!try_cmpxchg(&file->f_mode, &fmode, fmode & ~FMODE_NOWAIT));
55 0 : }
56 :
57 : /*
58 : * Attempt to steal a page from a pipe buffer. This should perhaps go into
59 : * a vm helper function, it's already simplified quite a bit by the
60 : * addition of remove_mapping(). If success is returned, the caller may
61 : * attempt to reuse this page for another destination.
62 : */
63 0 : static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe,
64 : struct pipe_buffer *buf)
65 : {
66 0 : struct folio *folio = page_folio(buf->page);
67 : struct address_space *mapping;
68 :
69 0 : folio_lock(folio);
70 :
71 0 : mapping = folio_mapping(folio);
72 0 : if (mapping) {
73 0 : WARN_ON(!folio_test_uptodate(folio));
74 :
75 : /*
76 : * At least for ext2 with nobh option, we need to wait on
77 : * writeback completing on this folio, since we'll remove it
78 : * from the pagecache. Otherwise truncate wont wait on the
79 : * folio, allowing the disk blocks to be reused by someone else
80 : * before we actually wrote our data to them. fs corruption
81 : * ensues.
82 : */
83 0 : folio_wait_writeback(folio);
84 :
85 0 : if (folio_has_private(folio) &&
86 0 : !filemap_release_folio(folio, GFP_KERNEL))
87 : goto out_unlock;
88 :
89 : /*
90 : * If we succeeded in removing the mapping, set LRU flag
91 : * and return good.
92 : */
93 0 : if (remove_mapping(mapping, folio)) {
94 0 : buf->flags |= PIPE_BUF_FLAG_LRU;
95 0 : return true;
96 : }
97 : }
98 :
99 : /*
100 : * Raced with truncate or failed to remove folio from current
101 : * address space, unlock and return failure.
102 : */
103 : out_unlock:
104 0 : folio_unlock(folio);
105 0 : return false;
106 : }
107 :
108 0 : static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
109 : struct pipe_buffer *buf)
110 : {
111 0 : put_page(buf->page);
112 0 : buf->flags &= ~PIPE_BUF_FLAG_LRU;
113 0 : }
114 :
115 : /*
116 : * Check whether the contents of buf is OK to access. Since the content
117 : * is a page cache page, IO may be in flight.
118 : */
119 0 : static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
120 : struct pipe_buffer *buf)
121 : {
122 0 : struct page *page = buf->page;
123 : int err;
124 :
125 0 : if (!PageUptodate(page)) {
126 0 : lock_page(page);
127 :
128 : /*
129 : * Page got truncated/unhashed. This will cause a 0-byte
130 : * splice, if this is the first page.
131 : */
132 0 : if (!page->mapping) {
133 : err = -ENODATA;
134 : goto error;
135 : }
136 :
137 : /*
138 : * Uh oh, read-error from disk.
139 : */
140 0 : if (!PageUptodate(page)) {
141 : err = -EIO;
142 : goto error;
143 : }
144 :
145 : /*
146 : * Page is ok afterall, we are done.
147 : */
148 0 : unlock_page(page);
149 : }
150 :
151 : return 0;
152 : error:
153 0 : unlock_page(page);
154 0 : return err;
155 : }
156 :
157 : const struct pipe_buf_operations page_cache_pipe_buf_ops = {
158 : .confirm = page_cache_pipe_buf_confirm,
159 : .release = page_cache_pipe_buf_release,
160 : .try_steal = page_cache_pipe_buf_try_steal,
161 : .get = generic_pipe_buf_get,
162 : };
163 :
164 0 : static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe,
165 : struct pipe_buffer *buf)
166 : {
167 0 : if (!(buf->flags & PIPE_BUF_FLAG_GIFT))
168 : return false;
169 :
170 0 : buf->flags |= PIPE_BUF_FLAG_LRU;
171 0 : return generic_pipe_buf_try_steal(pipe, buf);
172 : }
173 :
174 : static const struct pipe_buf_operations user_page_pipe_buf_ops = {
175 : .release = page_cache_pipe_buf_release,
176 : .try_steal = user_page_pipe_buf_try_steal,
177 : .get = generic_pipe_buf_get,
178 : };
179 :
180 0 : static void wakeup_pipe_readers(struct pipe_inode_info *pipe)
181 : {
182 0 : smp_mb();
183 0 : if (waitqueue_active(&pipe->rd_wait))
184 0 : wake_up_interruptible(&pipe->rd_wait);
185 0 : kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
186 0 : }
187 :
188 : /**
189 : * splice_to_pipe - fill passed data into a pipe
190 : * @pipe: pipe to fill
191 : * @spd: data to fill
192 : *
193 : * Description:
194 : * @spd contains a map of pages and len/offset tuples, along with
195 : * the struct pipe_buf_operations associated with these pages. This
196 : * function will link that data to the pipe.
197 : *
198 : */
199 0 : ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
200 : struct splice_pipe_desc *spd)
201 : {
202 0 : unsigned int spd_pages = spd->nr_pages;
203 0 : unsigned int tail = pipe->tail;
204 0 : unsigned int head = pipe->head;
205 0 : unsigned int mask = pipe->ring_size - 1;
206 0 : int ret = 0, page_nr = 0;
207 :
208 0 : if (!spd_pages)
209 : return 0;
210 :
211 0 : if (unlikely(!pipe->readers)) {
212 0 : send_sig(SIGPIPE, current, 0);
213 0 : ret = -EPIPE;
214 0 : goto out;
215 : }
216 :
217 0 : while (!pipe_full(head, tail, pipe->max_usage)) {
218 0 : struct pipe_buffer *buf = &pipe->bufs[head & mask];
219 :
220 0 : buf->page = spd->pages[page_nr];
221 0 : buf->offset = spd->partial[page_nr].offset;
222 0 : buf->len = spd->partial[page_nr].len;
223 0 : buf->private = spd->partial[page_nr].private;
224 0 : buf->ops = spd->ops;
225 0 : buf->flags = 0;
226 :
227 0 : head++;
228 0 : pipe->head = head;
229 0 : page_nr++;
230 0 : ret += buf->len;
231 :
232 0 : if (!--spd->nr_pages)
233 : break;
234 : }
235 :
236 0 : if (!ret)
237 0 : ret = -EAGAIN;
238 :
239 : out:
240 0 : while (page_nr < spd_pages)
241 0 : spd->spd_release(spd, page_nr++);
242 :
243 0 : return ret;
244 : }
245 : EXPORT_SYMBOL_GPL(splice_to_pipe);
246 :
247 0 : ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
248 : {
249 0 : unsigned int head = pipe->head;
250 0 : unsigned int tail = pipe->tail;
251 0 : unsigned int mask = pipe->ring_size - 1;
252 : int ret;
253 :
254 0 : if (unlikely(!pipe->readers)) {
255 0 : send_sig(SIGPIPE, current, 0);
256 0 : ret = -EPIPE;
257 0 : } else if (pipe_full(head, tail, pipe->max_usage)) {
258 : ret = -EAGAIN;
259 : } else {
260 0 : pipe->bufs[head & mask] = *buf;
261 0 : pipe->head = head + 1;
262 0 : return buf->len;
263 : }
264 0 : pipe_buf_release(pipe, buf);
265 0 : return ret;
266 : }
267 : EXPORT_SYMBOL(add_to_pipe);
268 :
269 : /*
270 : * Check if we need to grow the arrays holding pages and partial page
271 : * descriptions.
272 : */
273 0 : int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
274 : {
275 0 : unsigned int max_usage = READ_ONCE(pipe->max_usage);
276 :
277 0 : spd->nr_pages_max = max_usage;
278 0 : if (max_usage <= PIPE_DEF_BUFFERS)
279 : return 0;
280 :
281 0 : spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL);
282 0 : spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page),
283 : GFP_KERNEL);
284 :
285 0 : if (spd->pages && spd->partial)
286 : return 0;
287 :
288 0 : kfree(spd->pages);
289 0 : kfree(spd->partial);
290 0 : return -ENOMEM;
291 : }
292 :
293 0 : void splice_shrink_spd(struct splice_pipe_desc *spd)
294 : {
295 0 : if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
296 : return;
297 :
298 0 : kfree(spd->pages);
299 0 : kfree(spd->partial);
300 : }
301 :
302 : /*
303 : * Splice data from an O_DIRECT file into pages and then add them to the output
304 : * pipe.
305 : */
306 0 : ssize_t direct_splice_read(struct file *in, loff_t *ppos,
307 : struct pipe_inode_info *pipe,
308 : size_t len, unsigned int flags)
309 : {
310 : struct iov_iter to;
311 : struct bio_vec *bv;
312 : struct kiocb kiocb;
313 : struct page **pages;
314 : ssize_t ret;
315 : size_t used, npages, chunk, remain, reclaim;
316 : int i;
317 :
318 : /* Work out how much data we can actually add into the pipe */
319 0 : used = pipe_occupancy(pipe->head, pipe->tail);
320 0 : npages = max_t(ssize_t, pipe->max_usage - used, 0);
321 0 : len = min_t(size_t, len, npages * PAGE_SIZE);
322 0 : npages = DIV_ROUND_UP(len, PAGE_SIZE);
323 :
324 0 : bv = kzalloc(array_size(npages, sizeof(bv[0])) +
325 0 : array_size(npages, sizeof(struct page *)), GFP_KERNEL);
326 0 : if (!bv)
327 : return -ENOMEM;
328 :
329 0 : pages = (void *)(bv + npages);
330 0 : npages = alloc_pages_bulk_array(GFP_USER, npages, pages);
331 0 : if (!npages) {
332 0 : kfree(bv);
333 0 : return -ENOMEM;
334 : }
335 :
336 0 : remain = len = min_t(size_t, len, npages * PAGE_SIZE);
337 :
338 0 : for (i = 0; i < npages; i++) {
339 0 : chunk = min_t(size_t, PAGE_SIZE, remain);
340 0 : bv[i].bv_page = pages[i];
341 0 : bv[i].bv_offset = 0;
342 0 : bv[i].bv_len = chunk;
343 0 : remain -= chunk;
344 : }
345 :
346 : /* Do the I/O */
347 0 : iov_iter_bvec(&to, ITER_DEST, bv, npages, len);
348 0 : init_sync_kiocb(&kiocb, in);
349 0 : kiocb.ki_pos = *ppos;
350 0 : ret = call_read_iter(in, &kiocb, &to);
351 :
352 0 : reclaim = npages * PAGE_SIZE;
353 0 : remain = 0;
354 0 : if (ret > 0) {
355 0 : reclaim -= ret;
356 0 : remain = ret;
357 0 : *ppos = kiocb.ki_pos;
358 : file_accessed(in);
359 0 : } else if (ret < 0) {
360 : /*
361 : * callers of ->splice_read() expect -EAGAIN on
362 : * "can't put anything in there", rather than -EFAULT.
363 : */
364 0 : if (ret == -EFAULT)
365 0 : ret = -EAGAIN;
366 : }
367 :
368 : /* Free any pages that didn't get touched at all. */
369 0 : reclaim /= PAGE_SIZE;
370 0 : if (reclaim) {
371 0 : npages -= reclaim;
372 0 : release_pages(pages + npages, reclaim);
373 : }
374 :
375 : /* Push the remaining pages into the pipe. */
376 0 : for (i = 0; i < npages; i++) {
377 0 : struct pipe_buffer *buf = pipe_head_buf(pipe);
378 :
379 0 : chunk = min_t(size_t, remain, PAGE_SIZE);
380 0 : *buf = (struct pipe_buffer) {
381 : .ops = &default_pipe_buf_ops,
382 0 : .page = bv[i].bv_page,
383 : .offset = 0,
384 : .len = chunk,
385 : };
386 0 : pipe->head++;
387 0 : remain -= chunk;
388 : }
389 :
390 0 : kfree(bv);
391 0 : return ret;
392 : }
393 : EXPORT_SYMBOL(direct_splice_read);
394 :
395 : /**
396 : * generic_file_splice_read - splice data from file to a pipe
397 : * @in: file to splice from
398 : * @ppos: position in @in
399 : * @pipe: pipe to splice to
400 : * @len: number of bytes to splice
401 : * @flags: splice modifier flags
402 : *
403 : * Description:
404 : * Will read pages from given file and fill them into a pipe. Can be
405 : * used as long as it has more or less sane ->read_iter().
406 : *
407 : */
408 0 : ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
409 : struct pipe_inode_info *pipe, size_t len,
410 : unsigned int flags)
411 : {
412 : struct iov_iter to;
413 : struct kiocb kiocb;
414 : int ret;
415 :
416 0 : iov_iter_pipe(&to, ITER_DEST, pipe, len);
417 0 : init_sync_kiocb(&kiocb, in);
418 0 : kiocb.ki_pos = *ppos;
419 0 : ret = call_read_iter(in, &kiocb, &to);
420 0 : if (ret > 0) {
421 0 : *ppos = kiocb.ki_pos;
422 : file_accessed(in);
423 0 : } else if (ret < 0) {
424 : /* free what was emitted */
425 0 : pipe_discard_from(pipe, to.start_head);
426 : /*
427 : * callers of ->splice_read() expect -EAGAIN on
428 : * "can't put anything in there", rather than -EFAULT.
429 : */
430 0 : if (ret == -EFAULT)
431 0 : ret = -EAGAIN;
432 : }
433 :
434 0 : return ret;
435 : }
436 : EXPORT_SYMBOL(generic_file_splice_read);
437 :
438 : const struct pipe_buf_operations default_pipe_buf_ops = {
439 : .release = generic_pipe_buf_release,
440 : .try_steal = generic_pipe_buf_try_steal,
441 : .get = generic_pipe_buf_get,
442 : };
443 :
444 : /* Pipe buffer operations for a socket and similar. */
445 : const struct pipe_buf_operations nosteal_pipe_buf_ops = {
446 : .release = generic_pipe_buf_release,
447 : .get = generic_pipe_buf_get,
448 : };
449 : EXPORT_SYMBOL(nosteal_pipe_buf_ops);
450 :
451 : /*
452 : * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
453 : * using sendpage(). Return the number of bytes sent.
454 : */
455 0 : static int pipe_to_sendpage(struct pipe_inode_info *pipe,
456 : struct pipe_buffer *buf, struct splice_desc *sd)
457 : {
458 0 : struct file *file = sd->u.file;
459 0 : loff_t pos = sd->pos;
460 : int more;
461 :
462 0 : if (!likely(file->f_op->sendpage))
463 : return -EINVAL;
464 :
465 0 : more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
466 :
467 0 : if (sd->len < sd->total_len &&
468 0 : pipe_occupancy(pipe->head, pipe->tail) > 1)
469 0 : more |= MSG_SENDPAGE_NOTLAST;
470 :
471 0 : return file->f_op->sendpage(file, buf->page, buf->offset,
472 : sd->len, &pos, more);
473 : }
474 :
475 0 : static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
476 : {
477 0 : smp_mb();
478 0 : if (waitqueue_active(&pipe->wr_wait))
479 0 : wake_up_interruptible(&pipe->wr_wait);
480 0 : kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
481 0 : }
482 :
483 : /**
484 : * splice_from_pipe_feed - feed available data from a pipe to a file
485 : * @pipe: pipe to splice from
486 : * @sd: information to @actor
487 : * @actor: handler that splices the data
488 : *
489 : * Description:
490 : * This function loops over the pipe and calls @actor to do the
491 : * actual moving of a single struct pipe_buffer to the desired
492 : * destination. It returns when there's no more buffers left in
493 : * the pipe or if the requested number of bytes (@sd->total_len)
494 : * have been copied. It returns a positive number (one) if the
495 : * pipe needs to be filled with more data, zero if the required
496 : * number of bytes have been copied and -errno on error.
497 : *
498 : * This, together with splice_from_pipe_{begin,end,next}, may be
499 : * used to implement the functionality of __splice_from_pipe() when
500 : * locking is required around copying the pipe buffers to the
501 : * destination.
502 : */
503 0 : static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
504 : splice_actor *actor)
505 : {
506 0 : unsigned int head = pipe->head;
507 0 : unsigned int tail = pipe->tail;
508 0 : unsigned int mask = pipe->ring_size - 1;
509 : int ret;
510 :
511 0 : while (!pipe_empty(head, tail)) {
512 0 : struct pipe_buffer *buf = &pipe->bufs[tail & mask];
513 :
514 0 : sd->len = buf->len;
515 0 : if (sd->len > sd->total_len)
516 0 : sd->len = sd->total_len;
517 :
518 0 : ret = pipe_buf_confirm(pipe, buf);
519 0 : if (unlikely(ret)) {
520 0 : if (ret == -ENODATA)
521 0 : ret = 0;
522 : return ret;
523 : }
524 :
525 0 : ret = actor(pipe, buf, sd);
526 0 : if (ret <= 0)
527 : return ret;
528 :
529 0 : buf->offset += ret;
530 0 : buf->len -= ret;
531 :
532 0 : sd->num_spliced += ret;
533 0 : sd->len -= ret;
534 0 : sd->pos += ret;
535 0 : sd->total_len -= ret;
536 :
537 0 : if (!buf->len) {
538 0 : pipe_buf_release(pipe, buf);
539 0 : tail++;
540 0 : pipe->tail = tail;
541 0 : if (pipe->files)
542 0 : sd->need_wakeup = true;
543 : }
544 :
545 0 : if (!sd->total_len)
546 : return 0;
547 : }
548 :
549 : return 1;
550 : }
551 :
552 : /* We know we have a pipe buffer, but maybe it's empty? */
553 0 : static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
554 : {
555 0 : unsigned int tail = pipe->tail;
556 0 : unsigned int mask = pipe->ring_size - 1;
557 0 : struct pipe_buffer *buf = &pipe->bufs[tail & mask];
558 :
559 0 : if (unlikely(!buf->len)) {
560 0 : pipe_buf_release(pipe, buf);
561 0 : pipe->tail = tail+1;
562 0 : return true;
563 : }
564 :
565 : return false;
566 : }
567 :
568 : /**
569 : * splice_from_pipe_next - wait for some data to splice from
570 : * @pipe: pipe to splice from
571 : * @sd: information about the splice operation
572 : *
573 : * Description:
574 : * This function will wait for some data and return a positive
575 : * value (one) if pipe buffers are available. It will return zero
576 : * or -errno if no more data needs to be spliced.
577 : */
578 0 : static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
579 : {
580 : /*
581 : * Check for signal early to make process killable when there are
582 : * always buffers available
583 : */
584 0 : if (signal_pending(current))
585 : return -ERESTARTSYS;
586 :
587 : repeat:
588 0 : while (pipe_empty(pipe->head, pipe->tail)) {
589 0 : if (!pipe->writers)
590 : return 0;
591 :
592 0 : if (sd->num_spliced)
593 : return 0;
594 :
595 0 : if (sd->flags & SPLICE_F_NONBLOCK)
596 : return -EAGAIN;
597 :
598 0 : if (signal_pending(current))
599 : return -ERESTARTSYS;
600 :
601 0 : if (sd->need_wakeup) {
602 0 : wakeup_pipe_writers(pipe);
603 0 : sd->need_wakeup = false;
604 : }
605 :
606 0 : pipe_wait_readable(pipe);
607 : }
608 :
609 0 : if (eat_empty_buffer(pipe))
610 : goto repeat;
611 :
612 : return 1;
613 : }
614 :
615 : /**
616 : * splice_from_pipe_begin - start splicing from pipe
617 : * @sd: information about the splice operation
618 : *
619 : * Description:
620 : * This function should be called before a loop containing
621 : * splice_from_pipe_next() and splice_from_pipe_feed() to
622 : * initialize the necessary fields of @sd.
623 : */
624 : static void splice_from_pipe_begin(struct splice_desc *sd)
625 : {
626 0 : sd->num_spliced = 0;
627 0 : sd->need_wakeup = false;
628 : }
629 :
630 : /**
631 : * splice_from_pipe_end - finish splicing from pipe
632 : * @pipe: pipe to splice from
633 : * @sd: information about the splice operation
634 : *
635 : * Description:
636 : * This function will wake up pipe writers if necessary. It should
637 : * be called after a loop containing splice_from_pipe_next() and
638 : * splice_from_pipe_feed().
639 : */
640 : static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
641 : {
642 0 : if (sd->need_wakeup)
643 0 : wakeup_pipe_writers(pipe);
644 : }
645 :
646 : /**
647 : * __splice_from_pipe - splice data from a pipe to given actor
648 : * @pipe: pipe to splice from
649 : * @sd: information to @actor
650 : * @actor: handler that splices the data
651 : *
652 : * Description:
653 : * This function does little more than loop over the pipe and call
654 : * @actor to do the actual moving of a single struct pipe_buffer to
655 : * the desired destination. See pipe_to_file, pipe_to_sendpage, or
656 : * pipe_to_user.
657 : *
658 : */
659 0 : ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
660 : splice_actor *actor)
661 : {
662 : int ret;
663 :
664 0 : splice_from_pipe_begin(sd);
665 : do {
666 0 : cond_resched();
667 0 : ret = splice_from_pipe_next(pipe, sd);
668 0 : if (ret > 0)
669 0 : ret = splice_from_pipe_feed(pipe, sd, actor);
670 0 : } while (ret > 0);
671 0 : splice_from_pipe_end(pipe, sd);
672 :
673 0 : return sd->num_spliced ? sd->num_spliced : ret;
674 : }
675 : EXPORT_SYMBOL(__splice_from_pipe);
676 :
677 : /**
678 : * splice_from_pipe - splice data from a pipe to a file
679 : * @pipe: pipe to splice from
680 : * @out: file to splice to
681 : * @ppos: position in @out
682 : * @len: how many bytes to splice
683 : * @flags: splice modifier flags
684 : * @actor: handler that splices the data
685 : *
686 : * Description:
687 : * See __splice_from_pipe. This function locks the pipe inode,
688 : * otherwise it's identical to __splice_from_pipe().
689 : *
690 : */
691 0 : ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
692 : loff_t *ppos, size_t len, unsigned int flags,
693 : splice_actor *actor)
694 : {
695 : ssize_t ret;
696 0 : struct splice_desc sd = {
697 : .total_len = len,
698 : .flags = flags,
699 0 : .pos = *ppos,
700 : .u.file = out,
701 : };
702 :
703 0 : pipe_lock(pipe);
704 0 : ret = __splice_from_pipe(pipe, &sd, actor);
705 0 : pipe_unlock(pipe);
706 :
707 0 : return ret;
708 : }
709 :
710 : /**
711 : * iter_file_splice_write - splice data from a pipe to a file
712 : * @pipe: pipe info
713 : * @out: file to write to
714 : * @ppos: position in @out
715 : * @len: number of bytes to splice
716 : * @flags: splice modifier flags
717 : *
718 : * Description:
719 : * Will either move or copy pages (determined by @flags options) from
720 : * the given pipe inode to the given file.
721 : * This one is ->write_iter-based.
722 : *
723 : */
724 : ssize_t
725 0 : iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
726 : loff_t *ppos, size_t len, unsigned int flags)
727 : {
728 0 : struct splice_desc sd = {
729 : .total_len = len,
730 : .flags = flags,
731 0 : .pos = *ppos,
732 : .u.file = out,
733 : };
734 0 : int nbufs = pipe->max_usage;
735 0 : struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
736 : GFP_KERNEL);
737 : ssize_t ret;
738 :
739 0 : if (unlikely(!array))
740 : return -ENOMEM;
741 :
742 0 : pipe_lock(pipe);
743 :
744 0 : splice_from_pipe_begin(&sd);
745 0 : while (sd.total_len) {
746 : struct iov_iter from;
747 : unsigned int head, tail, mask;
748 : size_t left;
749 : int n;
750 :
751 0 : ret = splice_from_pipe_next(pipe, &sd);
752 0 : if (ret <= 0)
753 : break;
754 :
755 0 : if (unlikely(nbufs < pipe->max_usage)) {
756 0 : kfree(array);
757 0 : nbufs = pipe->max_usage;
758 0 : array = kcalloc(nbufs, sizeof(struct bio_vec),
759 : GFP_KERNEL);
760 0 : if (!array) {
761 : ret = -ENOMEM;
762 : break;
763 : }
764 : }
765 :
766 0 : head = pipe->head;
767 0 : tail = pipe->tail;
768 0 : mask = pipe->ring_size - 1;
769 :
770 : /* build the vector */
771 0 : left = sd.total_len;
772 0 : for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) {
773 0 : struct pipe_buffer *buf = &pipe->bufs[tail & mask];
774 0 : size_t this_len = buf->len;
775 :
776 : /* zero-length bvecs are not supported, skip them */
777 0 : if (!this_len)
778 0 : continue;
779 0 : this_len = min(this_len, left);
780 :
781 0 : ret = pipe_buf_confirm(pipe, buf);
782 0 : if (unlikely(ret)) {
783 0 : if (ret == -ENODATA)
784 0 : ret = 0;
785 0 : goto done;
786 : }
787 :
788 0 : bvec_set_page(&array[n], buf->page, this_len,
789 : buf->offset);
790 0 : left -= this_len;
791 0 : n++;
792 : }
793 :
794 0 : iov_iter_bvec(&from, ITER_SOURCE, array, n, sd.total_len - left);
795 0 : ret = vfs_iter_write(out, &from, &sd.pos, 0);
796 0 : if (ret <= 0)
797 : break;
798 :
799 0 : sd.num_spliced += ret;
800 0 : sd.total_len -= ret;
801 0 : *ppos = sd.pos;
802 :
803 : /* dismiss the fully eaten buffers, adjust the partial one */
804 0 : tail = pipe->tail;
805 0 : while (ret) {
806 0 : struct pipe_buffer *buf = &pipe->bufs[tail & mask];
807 0 : if (ret >= buf->len) {
808 0 : ret -= buf->len;
809 0 : buf->len = 0;
810 0 : pipe_buf_release(pipe, buf);
811 0 : tail++;
812 0 : pipe->tail = tail;
813 0 : if (pipe->files)
814 0 : sd.need_wakeup = true;
815 : } else {
816 0 : buf->offset += ret;
817 0 : buf->len -= ret;
818 0 : ret = 0;
819 : }
820 : }
821 : }
822 : done:
823 0 : kfree(array);
824 0 : splice_from_pipe_end(pipe, &sd);
825 :
826 0 : pipe_unlock(pipe);
827 :
828 0 : if (sd.num_spliced)
829 0 : ret = sd.num_spliced;
830 :
831 : return ret;
832 : }
833 :
834 : EXPORT_SYMBOL(iter_file_splice_write);
835 :
836 : /**
837 : * generic_splice_sendpage - splice data from a pipe to a socket
838 : * @pipe: pipe to splice from
839 : * @out: socket to write to
840 : * @ppos: position in @out
841 : * @len: number of bytes to splice
842 : * @flags: splice modifier flags
843 : *
844 : * Description:
845 : * Will send @len bytes from the pipe to a network socket. No data copying
846 : * is involved.
847 : *
848 : */
849 0 : ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
850 : loff_t *ppos, size_t len, unsigned int flags)
851 : {
852 0 : return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage);
853 : }
854 :
855 : EXPORT_SYMBOL(generic_splice_sendpage);
856 :
857 : static int warn_unsupported(struct file *file, const char *op)
858 : {
859 : pr_debug_ratelimited(
860 : "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
861 : op, file, current->pid, current->comm);
862 : return -EINVAL;
863 : }
864 :
865 : /*
866 : * Attempt to initiate a splice from pipe to file.
867 : */
868 : static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
869 : loff_t *ppos, size_t len, unsigned int flags)
870 : {
871 0 : if (unlikely(!out->f_op->splice_write))
872 : return warn_unsupported(out, "write");
873 0 : return out->f_op->splice_write(pipe, out, ppos, len, flags);
874 : }
875 :
876 : /*
877 : * Attempt to initiate a splice from a file to a pipe.
878 : */
879 0 : static long do_splice_to(struct file *in, loff_t *ppos,
880 : struct pipe_inode_info *pipe, size_t len,
881 : unsigned int flags)
882 : {
883 : unsigned int p_space;
884 : int ret;
885 :
886 0 : if (unlikely(!(in->f_mode & FMODE_READ)))
887 : return -EBADF;
888 :
889 : /* Don't try to read more the pipe has space for. */
890 0 : p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail);
891 0 : len = min_t(size_t, len, p_space << PAGE_SHIFT);
892 :
893 0 : ret = rw_verify_area(READ, in, ppos, len);
894 0 : if (unlikely(ret < 0))
895 0 : return ret;
896 :
897 0 : if (unlikely(len > MAX_RW_COUNT))
898 0 : len = MAX_RW_COUNT;
899 :
900 0 : if (unlikely(!in->f_op->splice_read))
901 : return warn_unsupported(in, "read");
902 0 : return in->f_op->splice_read(in, ppos, pipe, len, flags);
903 : }
904 :
905 : /**
906 : * splice_direct_to_actor - splices data directly between two non-pipes
907 : * @in: file to splice from
908 : * @sd: actor information on where to splice to
909 : * @actor: handles the data splicing
910 : *
911 : * Description:
912 : * This is a special case helper to splice directly between two
913 : * points, without requiring an explicit pipe. Internally an allocated
914 : * pipe is cached in the process, and reused during the lifetime of
915 : * that process.
916 : *
917 : */
918 0 : ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
919 : splice_direct_actor *actor)
920 : {
921 : struct pipe_inode_info *pipe;
922 : long ret, bytes;
923 : size_t len;
924 : int i, flags, more;
925 :
926 : /*
927 : * We require the input to be seekable, as we don't want to randomly
928 : * drop data for eg socket -> socket splicing. Use the piped splicing
929 : * for that!
930 : */
931 0 : if (unlikely(!(in->f_mode & FMODE_LSEEK)))
932 : return -EINVAL;
933 :
934 : /*
935 : * neither in nor out is a pipe, setup an internal pipe attached to
936 : * 'out' and transfer the wanted data from 'in' to 'out' through that
937 : */
938 0 : pipe = current->splice_pipe;
939 0 : if (unlikely(!pipe)) {
940 0 : pipe = alloc_pipe_info();
941 0 : if (!pipe)
942 : return -ENOMEM;
943 :
944 : /*
945 : * We don't have an immediate reader, but we'll read the stuff
946 : * out of the pipe right after the splice_to_pipe(). So set
947 : * PIPE_READERS appropriately.
948 : */
949 0 : pipe->readers = 1;
950 :
951 0 : current->splice_pipe = pipe;
952 : }
953 :
954 : /*
955 : * Do the splice.
956 : */
957 0 : bytes = 0;
958 0 : len = sd->total_len;
959 0 : flags = sd->flags;
960 :
961 : /*
962 : * Don't block on output, we have to drain the direct pipe.
963 : */
964 0 : sd->flags &= ~SPLICE_F_NONBLOCK;
965 0 : more = sd->flags & SPLICE_F_MORE;
966 :
967 0 : WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail));
968 :
969 0 : while (len) {
970 : size_t read_len;
971 0 : loff_t pos = sd->pos, prev_pos = pos;
972 :
973 0 : ret = do_splice_to(in, &pos, pipe, len, flags);
974 0 : if (unlikely(ret <= 0))
975 : goto out_release;
976 :
977 0 : read_len = ret;
978 0 : sd->total_len = read_len;
979 :
980 : /*
981 : * If more data is pending, set SPLICE_F_MORE
982 : * If this is the last data and SPLICE_F_MORE was not set
983 : * initially, clears it.
984 : */
985 0 : if (read_len < len)
986 0 : sd->flags |= SPLICE_F_MORE;
987 0 : else if (!more)
988 0 : sd->flags &= ~SPLICE_F_MORE;
989 : /*
990 : * NOTE: nonblocking mode only applies to the input. We
991 : * must not do the output in nonblocking mode as then we
992 : * could get stuck data in the internal pipe:
993 : */
994 0 : ret = actor(pipe, sd);
995 0 : if (unlikely(ret <= 0)) {
996 0 : sd->pos = prev_pos;
997 0 : goto out_release;
998 : }
999 :
1000 0 : bytes += ret;
1001 0 : len -= ret;
1002 0 : sd->pos = pos;
1003 :
1004 0 : if (ret < read_len) {
1005 0 : sd->pos = prev_pos + ret;
1006 0 : goto out_release;
1007 : }
1008 : }
1009 :
1010 : done:
1011 0 : pipe->tail = pipe->head = 0;
1012 : file_accessed(in);
1013 : return bytes;
1014 :
1015 : out_release:
1016 : /*
1017 : * If we did an incomplete transfer we must release
1018 : * the pipe buffers in question:
1019 : */
1020 0 : for (i = 0; i < pipe->ring_size; i++) {
1021 0 : struct pipe_buffer *buf = &pipe->bufs[i];
1022 :
1023 0 : if (buf->ops)
1024 : pipe_buf_release(pipe, buf);
1025 : }
1026 :
1027 0 : if (!bytes)
1028 0 : bytes = ret;
1029 :
1030 : goto done;
1031 : }
1032 : EXPORT_SYMBOL(splice_direct_to_actor);
1033 :
1034 0 : static int direct_splice_actor(struct pipe_inode_info *pipe,
1035 : struct splice_desc *sd)
1036 : {
1037 0 : struct file *file = sd->u.file;
1038 :
1039 0 : return do_splice_from(pipe, file, sd->opos, sd->total_len,
1040 : sd->flags);
1041 : }
1042 :
1043 : /**
1044 : * do_splice_direct - splices data directly between two files
1045 : * @in: file to splice from
1046 : * @ppos: input file offset
1047 : * @out: file to splice to
1048 : * @opos: output file offset
1049 : * @len: number of bytes to splice
1050 : * @flags: splice modifier flags
1051 : *
1052 : * Description:
1053 : * For use by do_sendfile(). splice can easily emulate sendfile, but
1054 : * doing it in the application would incur an extra system call
1055 : * (splice in + splice out, as compared to just sendfile()). So this helper
1056 : * can splice directly through a process-private pipe.
1057 : *
1058 : */
1059 0 : long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1060 : loff_t *opos, size_t len, unsigned int flags)
1061 : {
1062 0 : struct splice_desc sd = {
1063 : .len = len,
1064 : .total_len = len,
1065 : .flags = flags,
1066 0 : .pos = *ppos,
1067 : .u.file = out,
1068 : .opos = opos,
1069 : };
1070 : long ret;
1071 :
1072 0 : if (unlikely(!(out->f_mode & FMODE_WRITE)))
1073 : return -EBADF;
1074 :
1075 0 : if (unlikely(out->f_flags & O_APPEND))
1076 : return -EINVAL;
1077 :
1078 0 : ret = rw_verify_area(WRITE, out, opos, len);
1079 0 : if (unlikely(ret < 0))
1080 : return ret;
1081 :
1082 0 : ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
1083 0 : if (ret > 0)
1084 0 : *ppos = sd.pos;
1085 :
1086 : return ret;
1087 : }
1088 : EXPORT_SYMBOL(do_splice_direct);
1089 :
1090 0 : static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
1091 : {
1092 : for (;;) {
1093 0 : if (unlikely(!pipe->readers)) {
1094 0 : send_sig(SIGPIPE, current, 0);
1095 0 : return -EPIPE;
1096 : }
1097 0 : if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
1098 : return 0;
1099 0 : if (flags & SPLICE_F_NONBLOCK)
1100 : return -EAGAIN;
1101 0 : if (signal_pending(current))
1102 : return -ERESTARTSYS;
1103 0 : pipe_wait_writable(pipe);
1104 : }
1105 : }
1106 :
1107 : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1108 : struct pipe_inode_info *opipe,
1109 : size_t len, unsigned int flags);
1110 :
1111 0 : long splice_file_to_pipe(struct file *in,
1112 : struct pipe_inode_info *opipe,
1113 : loff_t *offset,
1114 : size_t len, unsigned int flags)
1115 : {
1116 : long ret;
1117 :
1118 0 : pipe_lock(opipe);
1119 0 : ret = wait_for_space(opipe, flags);
1120 0 : if (!ret)
1121 0 : ret = do_splice_to(in, offset, opipe, len, flags);
1122 0 : pipe_unlock(opipe);
1123 0 : if (ret > 0)
1124 0 : wakeup_pipe_readers(opipe);
1125 0 : return ret;
1126 : }
1127 :
1128 : /*
1129 : * Determine where to splice to/from.
1130 : */
1131 0 : long do_splice(struct file *in, loff_t *off_in, struct file *out,
1132 : loff_t *off_out, size_t len, unsigned int flags)
1133 : {
1134 : struct pipe_inode_info *ipipe;
1135 : struct pipe_inode_info *opipe;
1136 : loff_t offset;
1137 : long ret;
1138 :
1139 0 : if (unlikely(!(in->f_mode & FMODE_READ) ||
1140 : !(out->f_mode & FMODE_WRITE)))
1141 : return -EBADF;
1142 :
1143 0 : ipipe = get_pipe_info(in, true);
1144 0 : opipe = get_pipe_info(out, true);
1145 :
1146 0 : if (ipipe && opipe) {
1147 0 : if (off_in || off_out)
1148 : return -ESPIPE;
1149 :
1150 : /* Splicing to self would be fun, but... */
1151 0 : if (ipipe == opipe)
1152 : return -EINVAL;
1153 :
1154 0 : if ((in->f_flags | out->f_flags) & O_NONBLOCK)
1155 0 : flags |= SPLICE_F_NONBLOCK;
1156 :
1157 0 : return splice_pipe_to_pipe(ipipe, opipe, len, flags);
1158 : }
1159 :
1160 0 : if (ipipe) {
1161 0 : if (off_in)
1162 : return -ESPIPE;
1163 0 : if (off_out) {
1164 0 : if (!(out->f_mode & FMODE_PWRITE))
1165 : return -EINVAL;
1166 0 : offset = *off_out;
1167 : } else {
1168 0 : offset = out->f_pos;
1169 : }
1170 :
1171 0 : if (unlikely(out->f_flags & O_APPEND))
1172 : return -EINVAL;
1173 :
1174 0 : ret = rw_verify_area(WRITE, out, &offset, len);
1175 0 : if (unlikely(ret < 0))
1176 : return ret;
1177 :
1178 0 : if (in->f_flags & O_NONBLOCK)
1179 0 : flags |= SPLICE_F_NONBLOCK;
1180 :
1181 0 : file_start_write(out);
1182 0 : ret = do_splice_from(ipipe, out, &offset, len, flags);
1183 0 : file_end_write(out);
1184 :
1185 0 : if (ret > 0)
1186 : fsnotify_modify(out);
1187 :
1188 0 : if (!off_out)
1189 0 : out->f_pos = offset;
1190 : else
1191 0 : *off_out = offset;
1192 :
1193 : return ret;
1194 : }
1195 :
1196 0 : if (opipe) {
1197 0 : if (off_out)
1198 : return -ESPIPE;
1199 0 : if (off_in) {
1200 0 : if (!(in->f_mode & FMODE_PREAD))
1201 : return -EINVAL;
1202 0 : offset = *off_in;
1203 : } else {
1204 0 : offset = in->f_pos;
1205 : }
1206 :
1207 0 : if (out->f_flags & O_NONBLOCK)
1208 0 : flags |= SPLICE_F_NONBLOCK;
1209 :
1210 0 : ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
1211 :
1212 0 : if (ret > 0)
1213 : fsnotify_access(in);
1214 :
1215 0 : if (!off_in)
1216 0 : in->f_pos = offset;
1217 : else
1218 0 : *off_in = offset;
1219 :
1220 : return ret;
1221 : }
1222 :
1223 : return -EINVAL;
1224 : }
1225 :
1226 0 : static long __do_splice(struct file *in, loff_t __user *off_in,
1227 : struct file *out, loff_t __user *off_out,
1228 : size_t len, unsigned int flags)
1229 : {
1230 : struct pipe_inode_info *ipipe;
1231 : struct pipe_inode_info *opipe;
1232 0 : loff_t offset, *__off_in = NULL, *__off_out = NULL;
1233 : long ret;
1234 :
1235 0 : ipipe = get_pipe_info(in, true);
1236 0 : opipe = get_pipe_info(out, true);
1237 :
1238 0 : if (ipipe) {
1239 0 : if (off_in)
1240 : return -ESPIPE;
1241 0 : pipe_clear_nowait(in);
1242 : }
1243 0 : if (opipe) {
1244 0 : if (off_out)
1245 : return -ESPIPE;
1246 0 : pipe_clear_nowait(out);
1247 : }
1248 :
1249 0 : if (off_out) {
1250 0 : if (copy_from_user(&offset, off_out, sizeof(loff_t)))
1251 : return -EFAULT;
1252 : __off_out = &offset;
1253 : }
1254 0 : if (off_in) {
1255 0 : if (copy_from_user(&offset, off_in, sizeof(loff_t)))
1256 : return -EFAULT;
1257 : __off_in = &offset;
1258 : }
1259 :
1260 0 : ret = do_splice(in, __off_in, out, __off_out, len, flags);
1261 0 : if (ret < 0)
1262 : return ret;
1263 :
1264 0 : if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
1265 : return -EFAULT;
1266 0 : if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
1267 : return -EFAULT;
1268 :
1269 : return ret;
1270 : }
1271 :
1272 0 : static int iter_to_pipe(struct iov_iter *from,
1273 : struct pipe_inode_info *pipe,
1274 : unsigned flags)
1275 : {
1276 0 : struct pipe_buffer buf = {
1277 : .ops = &user_page_pipe_buf_ops,
1278 : .flags = flags
1279 : };
1280 0 : size_t total = 0;
1281 0 : int ret = 0;
1282 :
1283 0 : while (iov_iter_count(from)) {
1284 : struct page *pages[16];
1285 : ssize_t left;
1286 : size_t start;
1287 : int i, n;
1288 :
1289 0 : left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start);
1290 0 : if (left <= 0) {
1291 0 : ret = left;
1292 0 : break;
1293 : }
1294 :
1295 0 : n = DIV_ROUND_UP(left + start, PAGE_SIZE);
1296 0 : for (i = 0; i < n; i++) {
1297 0 : int size = min_t(int, left, PAGE_SIZE - start);
1298 :
1299 0 : buf.page = pages[i];
1300 0 : buf.offset = start;
1301 0 : buf.len = size;
1302 0 : ret = add_to_pipe(pipe, &buf);
1303 0 : if (unlikely(ret < 0)) {
1304 0 : iov_iter_revert(from, left);
1305 : // this one got dropped by add_to_pipe()
1306 0 : while (++i < n)
1307 0 : put_page(pages[i]);
1308 0 : goto out;
1309 : }
1310 0 : total += ret;
1311 0 : left -= size;
1312 0 : start = 0;
1313 : }
1314 : }
1315 : out:
1316 0 : return total ? total : ret;
1317 : }
1318 :
1319 0 : static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1320 : struct splice_desc *sd)
1321 : {
1322 0 : int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
1323 0 : return n == sd->len ? n : -EFAULT;
1324 : }
1325 :
1326 : /*
1327 : * For lack of a better implementation, implement vmsplice() to userspace
1328 : * as a simple copy of the pipes pages to the user iov.
1329 : */
1330 0 : static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
1331 : unsigned int flags)
1332 : {
1333 0 : struct pipe_inode_info *pipe = get_pipe_info(file, true);
1334 0 : struct splice_desc sd = {
1335 0 : .total_len = iov_iter_count(iter),
1336 : .flags = flags,
1337 : .u.data = iter
1338 : };
1339 0 : long ret = 0;
1340 :
1341 0 : if (!pipe)
1342 : return -EBADF;
1343 :
1344 0 : pipe_clear_nowait(file);
1345 :
1346 0 : if (sd.total_len) {
1347 0 : pipe_lock(pipe);
1348 0 : ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
1349 0 : pipe_unlock(pipe);
1350 : }
1351 :
1352 : return ret;
1353 : }
1354 :
1355 : /*
1356 : * vmsplice splices a user address range into a pipe. It can be thought of
1357 : * as splice-from-memory, where the regular splice is splice-from-file (or
1358 : * to file). In both cases the output is a pipe, naturally.
1359 : */
1360 0 : static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
1361 : unsigned int flags)
1362 : {
1363 : struct pipe_inode_info *pipe;
1364 0 : long ret = 0;
1365 0 : unsigned buf_flag = 0;
1366 :
1367 0 : if (flags & SPLICE_F_GIFT)
1368 0 : buf_flag = PIPE_BUF_FLAG_GIFT;
1369 :
1370 0 : pipe = get_pipe_info(file, true);
1371 0 : if (!pipe)
1372 : return -EBADF;
1373 :
1374 0 : pipe_clear_nowait(file);
1375 :
1376 0 : pipe_lock(pipe);
1377 0 : ret = wait_for_space(pipe, flags);
1378 0 : if (!ret)
1379 0 : ret = iter_to_pipe(iter, pipe, buf_flag);
1380 0 : pipe_unlock(pipe);
1381 0 : if (ret > 0)
1382 0 : wakeup_pipe_readers(pipe);
1383 : return ret;
1384 : }
1385 :
1386 0 : static int vmsplice_type(struct fd f, int *type)
1387 : {
1388 0 : if (!f.file)
1389 : return -EBADF;
1390 0 : if (f.file->f_mode & FMODE_WRITE) {
1391 0 : *type = ITER_SOURCE;
1392 0 : } else if (f.file->f_mode & FMODE_READ) {
1393 0 : *type = ITER_DEST;
1394 : } else {
1395 0 : fdput(f);
1396 : return -EBADF;
1397 : }
1398 : return 0;
1399 : }
1400 :
1401 : /*
1402 : * Note that vmsplice only really supports true splicing _from_ user memory
1403 : * to a pipe, not the other way around. Splicing from user memory is a simple
1404 : * operation that can be supported without any funky alignment restrictions
1405 : * or nasty vm tricks. We simply map in the user memory and fill them into
1406 : * a pipe. The reverse isn't quite as easy, though. There are two possible
1407 : * solutions for that:
1408 : *
1409 : * - memcpy() the data internally, at which point we might as well just
1410 : * do a regular read() on the buffer anyway.
1411 : * - Lots of nasty vm tricks, that are neither fast nor flexible (it
1412 : * has restriction limitations on both ends of the pipe).
1413 : *
1414 : * Currently we punt and implement it as a normal copy, see pipe_to_user().
1415 : *
1416 : */
1417 0 : SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
1418 : unsigned long, nr_segs, unsigned int, flags)
1419 : {
1420 : struct iovec iovstack[UIO_FASTIOV];
1421 0 : struct iovec *iov = iovstack;
1422 : struct iov_iter iter;
1423 : ssize_t error;
1424 : struct fd f;
1425 : int type;
1426 :
1427 0 : if (unlikely(flags & ~SPLICE_F_ALL))
1428 : return -EINVAL;
1429 :
1430 0 : f = fdget(fd);
1431 0 : error = vmsplice_type(f, &type);
1432 0 : if (error)
1433 : return error;
1434 :
1435 0 : error = import_iovec(type, uiov, nr_segs,
1436 : ARRAY_SIZE(iovstack), &iov, &iter);
1437 0 : if (error < 0)
1438 : goto out_fdput;
1439 :
1440 0 : if (!iov_iter_count(&iter))
1441 : error = 0;
1442 0 : else if (type == ITER_SOURCE)
1443 0 : error = vmsplice_to_pipe(f.file, &iter, flags);
1444 : else
1445 0 : error = vmsplice_to_user(f.file, &iter, flags);
1446 :
1447 0 : kfree(iov);
1448 : out_fdput:
1449 0 : fdput(f);
1450 : return error;
1451 : }
1452 :
1453 0 : SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
1454 : int, fd_out, loff_t __user *, off_out,
1455 : size_t, len, unsigned int, flags)
1456 : {
1457 : struct fd in, out;
1458 : long error;
1459 :
1460 0 : if (unlikely(!len))
1461 : return 0;
1462 :
1463 0 : if (unlikely(flags & ~SPLICE_F_ALL))
1464 : return -EINVAL;
1465 :
1466 0 : error = -EBADF;
1467 0 : in = fdget(fd_in);
1468 0 : if (in.file) {
1469 0 : out = fdget(fd_out);
1470 0 : if (out.file) {
1471 0 : error = __do_splice(in.file, off_in, out.file, off_out,
1472 : len, flags);
1473 0 : fdput(out);
1474 : }
1475 0 : fdput(in);
1476 : }
1477 : return error;
1478 : }
1479 :
1480 : /*
1481 : * Make sure there's data to read. Wait for input if we can, otherwise
1482 : * return an appropriate error.
1483 : */
1484 0 : static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1485 : {
1486 : int ret;
1487 :
1488 : /*
1489 : * Check the pipe occupancy without the inode lock first. This function
1490 : * is speculative anyways, so missing one is ok.
1491 : */
1492 0 : if (!pipe_empty(pipe->head, pipe->tail))
1493 : return 0;
1494 :
1495 0 : ret = 0;
1496 0 : pipe_lock(pipe);
1497 :
1498 0 : while (pipe_empty(pipe->head, pipe->tail)) {
1499 0 : if (signal_pending(current)) {
1500 : ret = -ERESTARTSYS;
1501 : break;
1502 : }
1503 0 : if (!pipe->writers)
1504 : break;
1505 0 : if (flags & SPLICE_F_NONBLOCK) {
1506 : ret = -EAGAIN;
1507 : break;
1508 : }
1509 0 : pipe_wait_readable(pipe);
1510 : }
1511 :
1512 0 : pipe_unlock(pipe);
1513 0 : return ret;
1514 : }
1515 :
1516 : /*
1517 : * Make sure there's writeable room. Wait for room if we can, otherwise
1518 : * return an appropriate error.
1519 : */
1520 0 : static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
1521 : {
1522 : int ret;
1523 :
1524 : /*
1525 : * Check pipe occupancy without the inode lock first. This function
1526 : * is speculative anyways, so missing one is ok.
1527 : */
1528 0 : if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage))
1529 : return 0;
1530 :
1531 0 : ret = 0;
1532 0 : pipe_lock(pipe);
1533 :
1534 0 : while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
1535 0 : if (!pipe->readers) {
1536 0 : send_sig(SIGPIPE, current, 0);
1537 0 : ret = -EPIPE;
1538 0 : break;
1539 : }
1540 0 : if (flags & SPLICE_F_NONBLOCK) {
1541 : ret = -EAGAIN;
1542 : break;
1543 : }
1544 0 : if (signal_pending(current)) {
1545 : ret = -ERESTARTSYS;
1546 : break;
1547 : }
1548 0 : pipe_wait_writable(pipe);
1549 : }
1550 :
1551 0 : pipe_unlock(pipe);
1552 0 : return ret;
1553 : }
1554 :
1555 : /*
1556 : * Splice contents of ipipe to opipe.
1557 : */
1558 0 : static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1559 : struct pipe_inode_info *opipe,
1560 : size_t len, unsigned int flags)
1561 : {
1562 : struct pipe_buffer *ibuf, *obuf;
1563 : unsigned int i_head, o_head;
1564 : unsigned int i_tail, o_tail;
1565 : unsigned int i_mask, o_mask;
1566 0 : int ret = 0;
1567 0 : bool input_wakeup = false;
1568 :
1569 :
1570 : retry:
1571 0 : ret = ipipe_prep(ipipe, flags);
1572 0 : if (ret)
1573 : return ret;
1574 :
1575 0 : ret = opipe_prep(opipe, flags);
1576 0 : if (ret)
1577 : return ret;
1578 :
1579 : /*
1580 : * Potential ABBA deadlock, work around it by ordering lock
1581 : * grabbing by pipe info address. Otherwise two different processes
1582 : * could deadlock (one doing tee from A -> B, the other from B -> A).
1583 : */
1584 0 : pipe_double_lock(ipipe, opipe);
1585 :
1586 0 : i_tail = ipipe->tail;
1587 0 : i_mask = ipipe->ring_size - 1;
1588 0 : o_head = opipe->head;
1589 0 : o_mask = opipe->ring_size - 1;
1590 :
1591 : do {
1592 : size_t o_len;
1593 :
1594 0 : if (!opipe->readers) {
1595 0 : send_sig(SIGPIPE, current, 0);
1596 0 : if (!ret)
1597 0 : ret = -EPIPE;
1598 : break;
1599 : }
1600 :
1601 0 : i_head = ipipe->head;
1602 0 : o_tail = opipe->tail;
1603 :
1604 0 : if (pipe_empty(i_head, i_tail) && !ipipe->writers)
1605 : break;
1606 :
1607 : /*
1608 : * Cannot make any progress, because either the input
1609 : * pipe is empty or the output pipe is full.
1610 : */
1611 0 : if (pipe_empty(i_head, i_tail) ||
1612 0 : pipe_full(o_head, o_tail, opipe->max_usage)) {
1613 : /* Already processed some buffers, break */
1614 0 : if (ret)
1615 : break;
1616 :
1617 0 : if (flags & SPLICE_F_NONBLOCK) {
1618 : ret = -EAGAIN;
1619 : break;
1620 : }
1621 :
1622 : /*
1623 : * We raced with another reader/writer and haven't
1624 : * managed to process any buffers. A zero return
1625 : * value means EOF, so retry instead.
1626 : */
1627 0 : pipe_unlock(ipipe);
1628 0 : pipe_unlock(opipe);
1629 0 : goto retry;
1630 : }
1631 :
1632 0 : ibuf = &ipipe->bufs[i_tail & i_mask];
1633 0 : obuf = &opipe->bufs[o_head & o_mask];
1634 :
1635 0 : if (len >= ibuf->len) {
1636 : /*
1637 : * Simply move the whole buffer from ipipe to opipe
1638 : */
1639 0 : *obuf = *ibuf;
1640 0 : ibuf->ops = NULL;
1641 0 : i_tail++;
1642 0 : ipipe->tail = i_tail;
1643 0 : input_wakeup = true;
1644 0 : o_len = obuf->len;
1645 0 : o_head++;
1646 0 : opipe->head = o_head;
1647 : } else {
1648 : /*
1649 : * Get a reference to this pipe buffer,
1650 : * so we can copy the contents over.
1651 : */
1652 0 : if (!pipe_buf_get(ipipe, ibuf)) {
1653 0 : if (ret == 0)
1654 0 : ret = -EFAULT;
1655 : break;
1656 : }
1657 0 : *obuf = *ibuf;
1658 :
1659 : /*
1660 : * Don't inherit the gift and merge flags, we need to
1661 : * prevent multiple steals of this page.
1662 : */
1663 0 : obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1664 0 : obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
1665 :
1666 0 : obuf->len = len;
1667 0 : ibuf->offset += len;
1668 0 : ibuf->len -= len;
1669 0 : o_len = len;
1670 0 : o_head++;
1671 0 : opipe->head = o_head;
1672 : }
1673 0 : ret += o_len;
1674 0 : len -= o_len;
1675 0 : } while (len);
1676 :
1677 0 : pipe_unlock(ipipe);
1678 0 : pipe_unlock(opipe);
1679 :
1680 : /*
1681 : * If we put data in the output pipe, wakeup any potential readers.
1682 : */
1683 0 : if (ret > 0)
1684 0 : wakeup_pipe_readers(opipe);
1685 :
1686 0 : if (input_wakeup)
1687 0 : wakeup_pipe_writers(ipipe);
1688 :
1689 : return ret;
1690 : }
1691 :
1692 : /*
1693 : * Link contents of ipipe to opipe.
1694 : */
1695 0 : static int link_pipe(struct pipe_inode_info *ipipe,
1696 : struct pipe_inode_info *opipe,
1697 : size_t len, unsigned int flags)
1698 : {
1699 : struct pipe_buffer *ibuf, *obuf;
1700 : unsigned int i_head, o_head;
1701 : unsigned int i_tail, o_tail;
1702 : unsigned int i_mask, o_mask;
1703 0 : int ret = 0;
1704 :
1705 : /*
1706 : * Potential ABBA deadlock, work around it by ordering lock
1707 : * grabbing by pipe info address. Otherwise two different processes
1708 : * could deadlock (one doing tee from A -> B, the other from B -> A).
1709 : */
1710 0 : pipe_double_lock(ipipe, opipe);
1711 :
1712 0 : i_tail = ipipe->tail;
1713 0 : i_mask = ipipe->ring_size - 1;
1714 0 : o_head = opipe->head;
1715 0 : o_mask = opipe->ring_size - 1;
1716 :
1717 : do {
1718 0 : if (!opipe->readers) {
1719 0 : send_sig(SIGPIPE, current, 0);
1720 0 : if (!ret)
1721 0 : ret = -EPIPE;
1722 : break;
1723 : }
1724 :
1725 0 : i_head = ipipe->head;
1726 0 : o_tail = opipe->tail;
1727 :
1728 : /*
1729 : * If we have iterated all input buffers or run out of
1730 : * output room, break.
1731 : */
1732 0 : if (pipe_empty(i_head, i_tail) ||
1733 0 : pipe_full(o_head, o_tail, opipe->max_usage))
1734 : break;
1735 :
1736 0 : ibuf = &ipipe->bufs[i_tail & i_mask];
1737 0 : obuf = &opipe->bufs[o_head & o_mask];
1738 :
1739 : /*
1740 : * Get a reference to this pipe buffer,
1741 : * so we can copy the contents over.
1742 : */
1743 0 : if (!pipe_buf_get(ipipe, ibuf)) {
1744 0 : if (ret == 0)
1745 0 : ret = -EFAULT;
1746 : break;
1747 : }
1748 :
1749 0 : *obuf = *ibuf;
1750 :
1751 : /*
1752 : * Don't inherit the gift and merge flag, we need to prevent
1753 : * multiple steals of this page.
1754 : */
1755 0 : obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
1756 0 : obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE;
1757 :
1758 0 : if (obuf->len > len)
1759 0 : obuf->len = len;
1760 0 : ret += obuf->len;
1761 0 : len -= obuf->len;
1762 :
1763 0 : o_head++;
1764 0 : opipe->head = o_head;
1765 0 : i_tail++;
1766 0 : } while (len);
1767 :
1768 0 : pipe_unlock(ipipe);
1769 0 : pipe_unlock(opipe);
1770 :
1771 : /*
1772 : * If we put data in the output pipe, wakeup any potential readers.
1773 : */
1774 0 : if (ret > 0)
1775 0 : wakeup_pipe_readers(opipe);
1776 :
1777 0 : return ret;
1778 : }
1779 :
1780 : /*
1781 : * This is a tee(1) implementation that works on pipes. It doesn't copy
1782 : * any data, it simply references the 'in' pages on the 'out' pipe.
1783 : * The 'flags' used are the SPLICE_F_* variants, currently the only
1784 : * applicable one is SPLICE_F_NONBLOCK.
1785 : */
1786 0 : long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
1787 : {
1788 0 : struct pipe_inode_info *ipipe = get_pipe_info(in, true);
1789 0 : struct pipe_inode_info *opipe = get_pipe_info(out, true);
1790 0 : int ret = -EINVAL;
1791 :
1792 0 : if (unlikely(!(in->f_mode & FMODE_READ) ||
1793 : !(out->f_mode & FMODE_WRITE)))
1794 : return -EBADF;
1795 :
1796 : /*
1797 : * Duplicate the contents of ipipe to opipe without actually
1798 : * copying the data.
1799 : */
1800 0 : if (ipipe && opipe && ipipe != opipe) {
1801 0 : if ((in->f_flags | out->f_flags) & O_NONBLOCK)
1802 0 : flags |= SPLICE_F_NONBLOCK;
1803 :
1804 : /*
1805 : * Keep going, unless we encounter an error. The ipipe/opipe
1806 : * ordering doesn't really matter.
1807 : */
1808 0 : ret = ipipe_prep(ipipe, flags);
1809 0 : if (!ret) {
1810 0 : ret = opipe_prep(opipe, flags);
1811 0 : if (!ret)
1812 0 : ret = link_pipe(ipipe, opipe, len, flags);
1813 : }
1814 : }
1815 :
1816 0 : return ret;
1817 : }
1818 :
1819 0 : SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)
1820 : {
1821 : struct fd in, out;
1822 : int error;
1823 :
1824 0 : if (unlikely(flags & ~SPLICE_F_ALL))
1825 : return -EINVAL;
1826 :
1827 0 : if (unlikely(!len))
1828 : return 0;
1829 :
1830 0 : error = -EBADF;
1831 0 : in = fdget(fdin);
1832 0 : if (in.file) {
1833 0 : out = fdget(fdout);
1834 0 : if (out.file) {
1835 0 : error = do_tee(in.file, out.file, len, flags);
1836 0 : fdput(out);
1837 : }
1838 0 : fdput(in);
1839 : }
1840 :
1841 0 : return error;
1842 : }
|