LCOV - code coverage report
Current view: top level - fs - pipe.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 13 578 2.2 %
Date: 2023-04-06 08:38:28 Functions: 2 41 4.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/pipe.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
       6             :  */
       7             : 
       8             : #include <linux/mm.h>
       9             : #include <linux/file.h>
      10             : #include <linux/poll.h>
      11             : #include <linux/slab.h>
      12             : #include <linux/module.h>
      13             : #include <linux/init.h>
      14             : #include <linux/fs.h>
      15             : #include <linux/log2.h>
      16             : #include <linux/mount.h>
      17             : #include <linux/pseudo_fs.h>
      18             : #include <linux/magic.h>
      19             : #include <linux/pipe_fs_i.h>
      20             : #include <linux/uio.h>
      21             : #include <linux/highmem.h>
      22             : #include <linux/pagemap.h>
      23             : #include <linux/audit.h>
      24             : #include <linux/syscalls.h>
      25             : #include <linux/fcntl.h>
      26             : #include <linux/memcontrol.h>
      27             : #include <linux/watch_queue.h>
      28             : #include <linux/sysctl.h>
      29             : 
      30             : #include <linux/uaccess.h>
      31             : #include <asm/ioctls.h>
      32             : 
      33             : #include "internal.h"
      34             : 
      35             : /*
      36             :  * New pipe buffers will be restricted to this size while the user is exceeding
      37             :  * their pipe buffer quota. The general pipe use case needs at least two
      38             :  * buffers: one for data yet to be read, and one for new data. If this is less
      39             :  * than two, then a write to a non-empty pipe may block even if the pipe is not
      40             :  * full. This can occur with GNU make jobserver or similar uses of pipes as
      41             :  * semaphores: multiple processes may be waiting to write tokens back to the
      42             :  * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/.
      43             :  *
      44             :  * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their
      45             :  * own risk, namely: pipe writes to non-full pipes may block until the pipe is
      46             :  * emptied.
      47             :  */
      48             : #define PIPE_MIN_DEF_BUFFERS 2
      49             : 
      50             : /*
      51             :  * The max size that a non-root user is allowed to grow the pipe. Can
      52             :  * be set by root in /proc/sys/fs/pipe-max-size
      53             :  */
      54             : static unsigned int pipe_max_size = 1048576;
      55             : 
      56             : /* Maximum allocatable pages per user. Hard limit is unset by default, soft
      57             :  * matches default values.
      58             :  */
      59             : static unsigned long pipe_user_pages_hard;
      60             : static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
      61             : 
      62             : /*
      63             :  * We use head and tail indices that aren't masked off, except at the point of
      64             :  * dereference, but rather they're allowed to wrap naturally.  This means there
      65             :  * isn't a dead spot in the buffer, but the ring has to be a power of two and
      66             :  * <= 2^31.
      67             :  * -- David Howells 2019-09-23.
      68             :  *
      69             :  * Reads with count = 0 should always return 0.
      70             :  * -- Julian Bradfield 1999-06-07.
      71             :  *
      72             :  * FIFOs and Pipes now generate SIGIO for both readers and writers.
      73             :  * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
      74             :  *
      75             :  * pipe_read & write cleanup
      76             :  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
      77             :  */
      78             : 
      79             : static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
      80             : {
      81           0 :         if (pipe->files)
      82           0 :                 mutex_lock_nested(&pipe->mutex, subclass);
      83             : }
      84             : 
      85           0 : void pipe_lock(struct pipe_inode_info *pipe)
      86             : {
      87             :         /*
      88             :          * pipe_lock() nests non-pipe inode locks (for writing to a file)
      89             :          */
      90           0 :         pipe_lock_nested(pipe, I_MUTEX_PARENT);
      91           0 : }
      92             : EXPORT_SYMBOL(pipe_lock);
      93             : 
      94           0 : void pipe_unlock(struct pipe_inode_info *pipe)
      95             : {
      96           0 :         if (pipe->files)
      97           0 :                 mutex_unlock(&pipe->mutex);
      98           0 : }
      99             : EXPORT_SYMBOL(pipe_unlock);
     100             : 
     101             : static inline void __pipe_lock(struct pipe_inode_info *pipe)
     102             : {
     103           0 :         mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
     104             : }
     105             : 
     106             : static inline void __pipe_unlock(struct pipe_inode_info *pipe)
     107             : {
     108           0 :         mutex_unlock(&pipe->mutex);
     109             : }
     110             : 
     111           0 : void pipe_double_lock(struct pipe_inode_info *pipe1,
     112             :                       struct pipe_inode_info *pipe2)
     113             : {
     114           0 :         BUG_ON(pipe1 == pipe2);
     115             : 
     116           0 :         if (pipe1 < pipe2) {
     117           0 :                 pipe_lock_nested(pipe1, I_MUTEX_PARENT);
     118           0 :                 pipe_lock_nested(pipe2, I_MUTEX_CHILD);
     119             :         } else {
     120           0 :                 pipe_lock_nested(pipe2, I_MUTEX_PARENT);
     121           0 :                 pipe_lock_nested(pipe1, I_MUTEX_CHILD);
     122             :         }
     123           0 : }
     124             : 
     125           0 : static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
     126             :                                   struct pipe_buffer *buf)
     127             : {
     128           0 :         struct page *page = buf->page;
     129             : 
     130             :         /*
     131             :          * If nobody else uses this page, and we don't already have a
     132             :          * temporary page, let's keep track of it as a one-deep
     133             :          * allocation cache. (Otherwise just release our reference to it)
     134             :          */
     135           0 :         if (page_count(page) == 1 && !pipe->tmp_page)
     136           0 :                 pipe->tmp_page = page;
     137             :         else
     138           0 :                 put_page(page);
     139           0 : }
     140             : 
     141           0 : static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
     142             :                 struct pipe_buffer *buf)
     143             : {
     144           0 :         struct page *page = buf->page;
     145             : 
     146           0 :         if (page_count(page) != 1)
     147             :                 return false;
     148           0 :         memcg_kmem_uncharge_page(page, 0);
     149           0 :         __SetPageLocked(page);
     150           0 :         return true;
     151             : }
     152             : 
     153             : /**
     154             :  * generic_pipe_buf_try_steal - attempt to take ownership of a &pipe_buffer
     155             :  * @pipe:       the pipe that the buffer belongs to
     156             :  * @buf:        the buffer to attempt to steal
     157             :  *
     158             :  * Description:
     159             :  *      This function attempts to steal the &struct page attached to
     160             :  *      @buf. If successful, this function returns 0 and returns with
     161             :  *      the page locked. The caller may then reuse the page for whatever
     162             :  *      he wishes; the typical use is insertion into a different file
     163             :  *      page cache.
     164             :  */
     165           0 : bool generic_pipe_buf_try_steal(struct pipe_inode_info *pipe,
     166             :                 struct pipe_buffer *buf)
     167             : {
     168           0 :         struct page *page = buf->page;
     169             : 
     170             :         /*
     171             :          * A reference of one is golden, that means that the owner of this
     172             :          * page is the only one holding a reference to it. lock the page
     173             :          * and return OK.
     174             :          */
     175           0 :         if (page_count(page) == 1) {
     176           0 :                 lock_page(page);
     177           0 :                 return true;
     178             :         }
     179             :         return false;
     180             : }
     181             : EXPORT_SYMBOL(generic_pipe_buf_try_steal);
     182             : 
     183             : /**
     184             :  * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
     185             :  * @pipe:       the pipe that the buffer belongs to
     186             :  * @buf:        the buffer to get a reference to
     187             :  *
     188             :  * Description:
     189             :  *      This function grabs an extra reference to @buf. It's used in
     190             :  *      the tee() system call, when we duplicate the buffers in one
     191             :  *      pipe into another.
     192             :  */
     193           0 : bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
     194             : {
     195           0 :         return try_get_page(buf->page);
     196             : }
     197             : EXPORT_SYMBOL(generic_pipe_buf_get);
     198             : 
     199             : /**
     200             :  * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
     201             :  * @pipe:       the pipe that the buffer belongs to
     202             :  * @buf:        the buffer to put a reference to
     203             :  *
     204             :  * Description:
     205             :  *      This function releases a reference to @buf.
     206             :  */
     207           0 : void generic_pipe_buf_release(struct pipe_inode_info *pipe,
     208             :                               struct pipe_buffer *buf)
     209             : {
     210           0 :         put_page(buf->page);
     211           0 : }
     212             : EXPORT_SYMBOL(generic_pipe_buf_release);
     213             : 
     214             : static const struct pipe_buf_operations anon_pipe_buf_ops = {
     215             :         .release        = anon_pipe_buf_release,
     216             :         .try_steal      = anon_pipe_buf_try_steal,
     217             :         .get            = generic_pipe_buf_get,
     218             : };
     219             : 
     220             : /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
     221             : static inline bool pipe_readable(const struct pipe_inode_info *pipe)
     222             : {
     223           0 :         unsigned int head = READ_ONCE(pipe->head);
     224           0 :         unsigned int tail = READ_ONCE(pipe->tail);
     225           0 :         unsigned int writers = READ_ONCE(pipe->writers);
     226             : 
     227           0 :         return !pipe_empty(head, tail) || !writers;
     228             : }
     229             : 
     230             : static ssize_t
     231           0 : pipe_read(struct kiocb *iocb, struct iov_iter *to)
     232             : {
     233           0 :         size_t total_len = iov_iter_count(to);
     234           0 :         struct file *filp = iocb->ki_filp;
     235           0 :         struct pipe_inode_info *pipe = filp->private_data;
     236           0 :         bool was_full, wake_next_reader = false;
     237             :         ssize_t ret;
     238             : 
     239             :         /* Null read succeeds. */
     240           0 :         if (unlikely(total_len == 0))
     241             :                 return 0;
     242             : 
     243           0 :         ret = 0;
     244           0 :         __pipe_lock(pipe);
     245             : 
     246             :         /*
     247             :          * We only wake up writers if the pipe was full when we started
     248             :          * reading in order to avoid unnecessary wakeups.
     249             :          *
     250             :          * But when we do wake up writers, we do so using a sync wakeup
     251             :          * (WF_SYNC), because we want them to get going and generate more
     252             :          * data for us.
     253             :          */
     254           0 :         was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
     255             :         for (;;) {
     256             :                 /* Read ->head with a barrier vs post_one_notification() */
     257           0 :                 unsigned int head = smp_load_acquire(&pipe->head);
     258           0 :                 unsigned int tail = pipe->tail;
     259           0 :                 unsigned int mask = pipe->ring_size - 1;
     260             : 
     261             : #ifdef CONFIG_WATCH_QUEUE
     262             :                 if (pipe->note_loss) {
     263             :                         struct watch_notification n;
     264             : 
     265             :                         if (total_len < 8) {
     266             :                                 if (ret == 0)
     267             :                                         ret = -ENOBUFS;
     268             :                                 break;
     269             :                         }
     270             : 
     271             :                         n.type = WATCH_TYPE_META;
     272             :                         n.subtype = WATCH_META_LOSS_NOTIFICATION;
     273             :                         n.info = watch_sizeof(n);
     274             :                         if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) {
     275             :                                 if (ret == 0)
     276             :                                         ret = -EFAULT;
     277             :                                 break;
     278             :                         }
     279             :                         ret += sizeof(n);
     280             :                         total_len -= sizeof(n);
     281             :                         pipe->note_loss = false;
     282             :                 }
     283             : #endif
     284             : 
     285           0 :                 if (!pipe_empty(head, tail)) {
     286           0 :                         struct pipe_buffer *buf = &pipe->bufs[tail & mask];
     287           0 :                         size_t chars = buf->len;
     288             :                         size_t written;
     289             :                         int error;
     290             : 
     291           0 :                         if (chars > total_len) {
     292           0 :                                 if (buf->flags & PIPE_BUF_FLAG_WHOLE) {
     293           0 :                                         if (ret == 0)
     294           0 :                                                 ret = -ENOBUFS;
     295             :                                         break;
     296             :                                 }
     297             :                                 chars = total_len;
     298             :                         }
     299             : 
     300           0 :                         error = pipe_buf_confirm(pipe, buf);
     301           0 :                         if (error) {
     302           0 :                                 if (!ret)
     303           0 :                                         ret = error;
     304             :                                 break;
     305             :                         }
     306             : 
     307           0 :                         written = copy_page_to_iter(buf->page, buf->offset, chars, to);
     308           0 :                         if (unlikely(written < chars)) {
     309           0 :                                 if (!ret)
     310           0 :                                         ret = -EFAULT;
     311             :                                 break;
     312             :                         }
     313           0 :                         ret += chars;
     314           0 :                         buf->offset += chars;
     315           0 :                         buf->len -= chars;
     316             : 
     317             :                         /* Was it a packet buffer? Clean up and exit */
     318           0 :                         if (buf->flags & PIPE_BUF_FLAG_PACKET) {
     319           0 :                                 total_len = chars;
     320           0 :                                 buf->len = 0;
     321             :                         }
     322             : 
     323           0 :                         if (!buf->len) {
     324           0 :                                 pipe_buf_release(pipe, buf);
     325           0 :                                 spin_lock_irq(&pipe->rd_wait.lock);
     326             : #ifdef CONFIG_WATCH_QUEUE
     327             :                                 if (buf->flags & PIPE_BUF_FLAG_LOSS)
     328             :                                         pipe->note_loss = true;
     329             : #endif
     330           0 :                                 tail++;
     331           0 :                                 pipe->tail = tail;
     332           0 :                                 spin_unlock_irq(&pipe->rd_wait.lock);
     333             :                         }
     334           0 :                         total_len -= chars;
     335           0 :                         if (!total_len)
     336             :                                 break;  /* common path: read succeeded */
     337           0 :                         if (!pipe_empty(head, tail))    /* More to do? */
     338           0 :                                 continue;
     339             :                 }
     340             : 
     341           0 :                 if (!pipe->writers)
     342             :                         break;
     343           0 :                 if (ret)
     344             :                         break;
     345           0 :                 if (filp->f_flags & O_NONBLOCK) {
     346             :                         ret = -EAGAIN;
     347             :                         break;
     348             :                 }
     349           0 :                 __pipe_unlock(pipe);
     350             : 
     351             :                 /*
     352             :                  * We only get here if we didn't actually read anything.
     353             :                  *
     354             :                  * However, we could have seen (and removed) a zero-sized
     355             :                  * pipe buffer, and might have made space in the buffers
     356             :                  * that way.
     357             :                  *
     358             :                  * You can't make zero-sized pipe buffers by doing an empty
     359             :                  * write (not even in packet mode), but they can happen if
     360             :                  * the writer gets an EFAULT when trying to fill a buffer
     361             :                  * that already got allocated and inserted in the buffer
     362             :                  * array.
     363             :                  *
     364             :                  * So we still need to wake up any pending writers in the
     365             :                  * _very_ unlikely case that the pipe was full, but we got
     366             :                  * no data.
     367             :                  */
     368           0 :                 if (unlikely(was_full))
     369           0 :                         wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
     370           0 :                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     371             : 
     372             :                 /*
     373             :                  * But because we didn't read anything, at this point we can
     374             :                  * just return directly with -ERESTARTSYS if we're interrupted,
     375             :                  * since we've done any required wakeups and there's no need
     376             :                  * to mark anything accessed. And we've dropped the lock.
     377             :                  */
     378           0 :                 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
     379             :                         return -ERESTARTSYS;
     380             : 
     381           0 :                 __pipe_lock(pipe);
     382           0 :                 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
     383           0 :                 wake_next_reader = true;
     384             :         }
     385           0 :         if (pipe_empty(pipe->head, pipe->tail))
     386           0 :                 wake_next_reader = false;
     387           0 :         __pipe_unlock(pipe);
     388             : 
     389           0 :         if (was_full)
     390           0 :                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
     391           0 :         if (wake_next_reader)
     392           0 :                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
     393           0 :         kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     394           0 :         if (ret > 0)
     395             :                 file_accessed(filp);
     396             :         return ret;
     397             : }
     398             : 
     399             : static inline int is_packetized(struct file *file)
     400             : {
     401           0 :         return (file->f_flags & O_DIRECT) != 0;
     402             : }
     403             : 
     404             : /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */
     405             : static inline bool pipe_writable(const struct pipe_inode_info *pipe)
     406             : {
     407           0 :         unsigned int head = READ_ONCE(pipe->head);
     408           0 :         unsigned int tail = READ_ONCE(pipe->tail);
     409           0 :         unsigned int max_usage = READ_ONCE(pipe->max_usage);
     410             : 
     411           0 :         return !pipe_full(head, tail, max_usage) ||
     412           0 :                 !READ_ONCE(pipe->readers);
     413             : }
     414             : 
     415             : static ssize_t
     416           0 : pipe_write(struct kiocb *iocb, struct iov_iter *from)
     417             : {
     418           0 :         struct file *filp = iocb->ki_filp;
     419           0 :         struct pipe_inode_info *pipe = filp->private_data;
     420             :         unsigned int head;
     421           0 :         ssize_t ret = 0;
     422           0 :         size_t total_len = iov_iter_count(from);
     423             :         ssize_t chars;
     424           0 :         bool was_empty = false;
     425           0 :         bool wake_next_writer = false;
     426             : 
     427             :         /* Null write succeeds. */
     428           0 :         if (unlikely(total_len == 0))
     429             :                 return 0;
     430             : 
     431           0 :         __pipe_lock(pipe);
     432             : 
     433           0 :         if (!pipe->readers) {
     434           0 :                 send_sig(SIGPIPE, current, 0);
     435           0 :                 ret = -EPIPE;
     436           0 :                 goto out;
     437             :         }
     438             : 
     439             : #ifdef CONFIG_WATCH_QUEUE
     440             :         if (pipe->watch_queue) {
     441             :                 ret = -EXDEV;
     442             :                 goto out;
     443             :         }
     444             : #endif
     445             : 
     446             :         /*
     447             :          * If it wasn't empty we try to merge new data into
     448             :          * the last buffer.
     449             :          *
     450             :          * That naturally merges small writes, but it also
     451             :          * page-aligns the rest of the writes for large writes
     452             :          * spanning multiple pages.
     453             :          */
     454           0 :         head = pipe->head;
     455           0 :         was_empty = pipe_empty(head, pipe->tail);
     456           0 :         chars = total_len & (PAGE_SIZE-1);
     457           0 :         if (chars && !was_empty) {
     458           0 :                 unsigned int mask = pipe->ring_size - 1;
     459           0 :                 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
     460           0 :                 int offset = buf->offset + buf->len;
     461             : 
     462           0 :                 if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
     463           0 :                     offset + chars <= PAGE_SIZE) {
     464           0 :                         ret = pipe_buf_confirm(pipe, buf);
     465           0 :                         if (ret)
     466             :                                 goto out;
     467             : 
     468           0 :                         ret = copy_page_from_iter(buf->page, offset, chars, from);
     469           0 :                         if (unlikely(ret < chars)) {
     470             :                                 ret = -EFAULT;
     471             :                                 goto out;
     472             :                         }
     473             : 
     474           0 :                         buf->len += ret;
     475           0 :                         if (!iov_iter_count(from))
     476             :                                 goto out;
     477             :                 }
     478             :         }
     479             : 
     480             :         for (;;) {
     481           0 :                 if (!pipe->readers) {
     482           0 :                         send_sig(SIGPIPE, current, 0);
     483           0 :                         if (!ret)
     484           0 :                                 ret = -EPIPE;
     485             :                         break;
     486             :                 }
     487             : 
     488           0 :                 head = pipe->head;
     489           0 :                 if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
     490           0 :                         unsigned int mask = pipe->ring_size - 1;
     491           0 :                         struct pipe_buffer *buf = &pipe->bufs[head & mask];
     492           0 :                         struct page *page = pipe->tmp_page;
     493             :                         int copied;
     494             : 
     495           0 :                         if (!page) {
     496           0 :                                 page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
     497           0 :                                 if (unlikely(!page)) {
     498           0 :                                         ret = ret ? : -ENOMEM;
     499             :                                         break;
     500             :                                 }
     501           0 :                                 pipe->tmp_page = page;
     502             :                         }
     503             : 
     504             :                         /* Allocate a slot in the ring in advance and attach an
     505             :                          * empty buffer.  If we fault or otherwise fail to use
     506             :                          * it, either the reader will consume it or it'll still
     507             :                          * be there for the next write.
     508             :                          */
     509           0 :                         spin_lock_irq(&pipe->rd_wait.lock);
     510             : 
     511           0 :                         head = pipe->head;
     512           0 :                         if (pipe_full(head, pipe->tail, pipe->max_usage)) {
     513           0 :                                 spin_unlock_irq(&pipe->rd_wait.lock);
     514           0 :                                 continue;
     515             :                         }
     516             : 
     517           0 :                         pipe->head = head + 1;
     518           0 :                         spin_unlock_irq(&pipe->rd_wait.lock);
     519             : 
     520             :                         /* Insert it into the buffer array */
     521           0 :                         buf = &pipe->bufs[head & mask];
     522           0 :                         buf->page = page;
     523           0 :                         buf->ops = &anon_pipe_buf_ops;
     524           0 :                         buf->offset = 0;
     525           0 :                         buf->len = 0;
     526           0 :                         if (is_packetized(filp))
     527           0 :                                 buf->flags = PIPE_BUF_FLAG_PACKET;
     528             :                         else
     529           0 :                                 buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
     530           0 :                         pipe->tmp_page = NULL;
     531             : 
     532           0 :                         copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
     533           0 :                         if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
     534           0 :                                 if (!ret)
     535           0 :                                         ret = -EFAULT;
     536             :                                 break;
     537             :                         }
     538           0 :                         ret += copied;
     539           0 :                         buf->offset = 0;
     540           0 :                         buf->len = copied;
     541             : 
     542           0 :                         if (!iov_iter_count(from))
     543             :                                 break;
     544             :                 }
     545             : 
     546           0 :                 if (!pipe_full(head, pipe->tail, pipe->max_usage))
     547           0 :                         continue;
     548             : 
     549             :                 /* Wait for buffer space to become available. */
     550           0 :                 if (filp->f_flags & O_NONBLOCK) {
     551           0 :                         if (!ret)
     552           0 :                                 ret = -EAGAIN;
     553             :                         break;
     554             :                 }
     555           0 :                 if (signal_pending(current)) {
     556           0 :                         if (!ret)
     557           0 :                                 ret = -ERESTARTSYS;
     558             :                         break;
     559             :                 }
     560             : 
     561             :                 /*
     562             :                  * We're going to release the pipe lock and wait for more
     563             :                  * space. We wake up any readers if necessary, and then
     564             :                  * after waiting we need to re-check whether the pipe
     565             :                  * become empty while we dropped the lock.
     566             :                  */
     567           0 :                 __pipe_unlock(pipe);
     568           0 :                 if (was_empty)
     569           0 :                         wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
     570           0 :                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     571           0 :                 wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
     572           0 :                 __pipe_lock(pipe);
     573           0 :                 was_empty = pipe_empty(pipe->head, pipe->tail);
     574           0 :                 wake_next_writer = true;
     575             :         }
     576             : out:
     577           0 :         if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
     578           0 :                 wake_next_writer = false;
     579           0 :         __pipe_unlock(pipe);
     580             : 
     581             :         /*
     582             :          * If we do do a wakeup event, we do a 'sync' wakeup, because we
     583             :          * want the reader to start processing things asap, rather than
     584             :          * leave the data pending.
     585             :          *
     586             :          * This is particularly important for small writes, because of
     587             :          * how (for example) the GNU make jobserver uses small writes to
     588             :          * wake up pending jobs
     589             :          *
     590             :          * Epoll nonsensically wants a wakeup whether the pipe
     591             :          * was already empty or not.
     592             :          */
     593           0 :         if (was_empty || pipe->poll_usage)
     594           0 :                 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
     595           0 :         kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     596           0 :         if (wake_next_writer)
     597           0 :                 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
     598           0 :         if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
     599           0 :                 int err = file_update_time(filp);
     600           0 :                 if (err)
     601           0 :                         ret = err;
     602           0 :                 sb_end_write(file_inode(filp)->i_sb);
     603             :         }
     604             :         return ret;
     605             : }
     606             : 
     607           0 : static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
     608             : {
     609           0 :         struct pipe_inode_info *pipe = filp->private_data;
     610             :         unsigned int count, head, tail, mask;
     611             : 
     612           0 :         switch (cmd) {
     613             :         case FIONREAD:
     614           0 :                 __pipe_lock(pipe);
     615           0 :                 count = 0;
     616           0 :                 head = pipe->head;
     617           0 :                 tail = pipe->tail;
     618           0 :                 mask = pipe->ring_size - 1;
     619             : 
     620           0 :                 while (tail != head) {
     621           0 :                         count += pipe->bufs[tail & mask].len;
     622           0 :                         tail++;
     623             :                 }
     624           0 :                 __pipe_unlock(pipe);
     625             : 
     626           0 :                 return put_user(count, (int __user *)arg);
     627             : 
     628             : #ifdef CONFIG_WATCH_QUEUE
     629             :         case IOC_WATCH_QUEUE_SET_SIZE: {
     630             :                 int ret;
     631             :                 __pipe_lock(pipe);
     632             :                 ret = watch_queue_set_size(pipe, arg);
     633             :                 __pipe_unlock(pipe);
     634             :                 return ret;
     635             :         }
     636             : 
     637             :         case IOC_WATCH_QUEUE_SET_FILTER:
     638             :                 return watch_queue_set_filter(
     639             :                         pipe, (struct watch_notification_filter __user *)arg);
     640             : #endif
     641             : 
     642             :         default:
     643             :                 return -ENOIOCTLCMD;
     644             :         }
     645             : }
     646             : 
     647             : /* No kernel lock held - fine */
     648             : static __poll_t
     649           0 : pipe_poll(struct file *filp, poll_table *wait)
     650             : {
     651             :         __poll_t mask;
     652           0 :         struct pipe_inode_info *pipe = filp->private_data;
     653             :         unsigned int head, tail;
     654             : 
     655             :         /* Epoll has some historical nasty semantics, this enables them */
     656           0 :         WRITE_ONCE(pipe->poll_usage, true);
     657             : 
     658             :         /*
     659             :          * Reading pipe state only -- no need for acquiring the semaphore.
     660             :          *
     661             :          * But because this is racy, the code has to add the
     662             :          * entry to the poll table _first_ ..
     663             :          */
     664           0 :         if (filp->f_mode & FMODE_READ)
     665           0 :                 poll_wait(filp, &pipe->rd_wait, wait);
     666           0 :         if (filp->f_mode & FMODE_WRITE)
     667           0 :                 poll_wait(filp, &pipe->wr_wait, wait);
     668             : 
     669             :         /*
     670             :          * .. and only then can you do the racy tests. That way,
     671             :          * if something changes and you got it wrong, the poll
     672             :          * table entry will wake you up and fix it.
     673             :          */
     674           0 :         head = READ_ONCE(pipe->head);
     675           0 :         tail = READ_ONCE(pipe->tail);
     676             : 
     677           0 :         mask = 0;
     678           0 :         if (filp->f_mode & FMODE_READ) {
     679           0 :                 if (!pipe_empty(head, tail))
     680           0 :                         mask |= EPOLLIN | EPOLLRDNORM;
     681           0 :                 if (!pipe->writers && filp->f_version != pipe->w_counter)
     682           0 :                         mask |= EPOLLHUP;
     683             :         }
     684             : 
     685           0 :         if (filp->f_mode & FMODE_WRITE) {
     686           0 :                 if (!pipe_full(head, tail, pipe->max_usage))
     687           0 :                         mask |= EPOLLOUT | EPOLLWRNORM;
     688             :                 /*
     689             :                  * Most Unices do not set EPOLLERR for FIFOs but on Linux they
     690             :                  * behave exactly like pipes for poll().
     691             :                  */
     692           0 :                 if (!pipe->readers)
     693           0 :                         mask |= EPOLLERR;
     694             :         }
     695             : 
     696           0 :         return mask;
     697             : }
     698             : 
     699             : static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
     700             : {
     701           0 :         int kill = 0;
     702             : 
     703           0 :         spin_lock(&inode->i_lock);
     704           0 :         if (!--pipe->files) {
     705           0 :                 inode->i_pipe = NULL;
     706           0 :                 kill = 1;
     707             :         }
     708           0 :         spin_unlock(&inode->i_lock);
     709             : 
     710           0 :         if (kill)
     711           0 :                 free_pipe_info(pipe);
     712             : }
     713             : 
     714             : static int
     715           0 : pipe_release(struct inode *inode, struct file *file)
     716             : {
     717           0 :         struct pipe_inode_info *pipe = file->private_data;
     718             : 
     719           0 :         __pipe_lock(pipe);
     720           0 :         if (file->f_mode & FMODE_READ)
     721           0 :                 pipe->readers--;
     722           0 :         if (file->f_mode & FMODE_WRITE)
     723           0 :                 pipe->writers--;
     724             : 
     725             :         /* Was that the last reader or writer, but not the other side? */
     726           0 :         if (!pipe->readers != !pipe->writers) {
     727           0 :                 wake_up_interruptible_all(&pipe->rd_wait);
     728           0 :                 wake_up_interruptible_all(&pipe->wr_wait);
     729           0 :                 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
     730           0 :                 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
     731             :         }
     732           0 :         __pipe_unlock(pipe);
     733             : 
     734           0 :         put_pipe_info(inode, pipe);
     735           0 :         return 0;
     736             : }
     737             : 
     738             : static int
     739           0 : pipe_fasync(int fd, struct file *filp, int on)
     740             : {
     741           0 :         struct pipe_inode_info *pipe = filp->private_data;
     742           0 :         int retval = 0;
     743             : 
     744           0 :         __pipe_lock(pipe);
     745           0 :         if (filp->f_mode & FMODE_READ)
     746           0 :                 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
     747           0 :         if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
     748           0 :                 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
     749           0 :                 if (retval < 0 && (filp->f_mode & FMODE_READ))
     750             :                         /* this can happen only if on == T */
     751           0 :                         fasync_helper(-1, filp, 0, &pipe->fasync_readers);
     752             :         }
     753           0 :         __pipe_unlock(pipe);
     754           0 :         return retval;
     755             : }
     756             : 
     757           0 : unsigned long account_pipe_buffers(struct user_struct *user,
     758             :                                    unsigned long old, unsigned long new)
     759             : {
     760           0 :         return atomic_long_add_return(new - old, &user->pipe_bufs);
     761             : }
     762             : 
     763           0 : bool too_many_pipe_buffers_soft(unsigned long user_bufs)
     764             : {
     765           0 :         unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft);
     766             : 
     767           0 :         return soft_limit && user_bufs > soft_limit;
     768             : }
     769             : 
     770           0 : bool too_many_pipe_buffers_hard(unsigned long user_bufs)
     771             : {
     772           0 :         unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard);
     773             : 
     774           0 :         return hard_limit && user_bufs > hard_limit;
     775             : }
     776             : 
     777           0 : bool pipe_is_unprivileged_user(void)
     778             : {
     779           0 :         return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
     780             : }
     781             : 
     782           0 : struct pipe_inode_info *alloc_pipe_info(void)
     783             : {
     784             :         struct pipe_inode_info *pipe;
     785           0 :         unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
     786           0 :         struct user_struct *user = get_current_user();
     787             :         unsigned long user_bufs;
     788           0 :         unsigned int max_size = READ_ONCE(pipe_max_size);
     789             : 
     790           0 :         pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT);
     791           0 :         if (pipe == NULL)
     792             :                 goto out_free_uid;
     793             : 
     794           0 :         if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE))
     795           0 :                 pipe_bufs = max_size >> PAGE_SHIFT;
     796             : 
     797           0 :         user_bufs = account_pipe_buffers(user, 0, pipe_bufs);
     798             : 
     799           0 :         if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) {
     800           0 :                 user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS);
     801           0 :                 pipe_bufs = PIPE_MIN_DEF_BUFFERS;
     802             :         }
     803             : 
     804           0 :         if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user())
     805             :                 goto out_revert_acct;
     806             : 
     807           0 :         pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer),
     808             :                              GFP_KERNEL_ACCOUNT);
     809             : 
     810           0 :         if (pipe->bufs) {
     811           0 :                 init_waitqueue_head(&pipe->rd_wait);
     812           0 :                 init_waitqueue_head(&pipe->wr_wait);
     813           0 :                 pipe->r_counter = pipe->w_counter = 1;
     814           0 :                 pipe->max_usage = pipe_bufs;
     815           0 :                 pipe->ring_size = pipe_bufs;
     816           0 :                 pipe->nr_accounted = pipe_bufs;
     817           0 :                 pipe->user = user;
     818           0 :                 mutex_init(&pipe->mutex);
     819           0 :                 return pipe;
     820             :         }
     821             : 
     822             : out_revert_acct:
     823           0 :         (void) account_pipe_buffers(user, pipe_bufs, 0);
     824           0 :         kfree(pipe);
     825             : out_free_uid:
     826           0 :         free_uid(user);
     827           0 :         return NULL;
     828             : }
     829             : 
     830           0 : void free_pipe_info(struct pipe_inode_info *pipe)
     831             : {
     832             :         unsigned int i;
     833             : 
     834             : #ifdef CONFIG_WATCH_QUEUE
     835             :         if (pipe->watch_queue)
     836             :                 watch_queue_clear(pipe->watch_queue);
     837             : #endif
     838             : 
     839           0 :         (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
     840           0 :         free_uid(pipe->user);
     841           0 :         for (i = 0; i < pipe->ring_size; i++) {
     842           0 :                 struct pipe_buffer *buf = pipe->bufs + i;
     843           0 :                 if (buf->ops)
     844             :                         pipe_buf_release(pipe, buf);
     845             :         }
     846             : #ifdef CONFIG_WATCH_QUEUE
     847             :         if (pipe->watch_queue)
     848             :                 put_watch_queue(pipe->watch_queue);
     849             : #endif
     850           0 :         if (pipe->tmp_page)
     851           0 :                 __free_page(pipe->tmp_page);
     852           0 :         kfree(pipe->bufs);
     853           0 :         kfree(pipe);
     854           0 : }
     855             : 
     856             : static struct vfsmount *pipe_mnt __read_mostly;
     857             : 
     858             : /*
     859             :  * pipefs_dname() is called from d_path().
     860             :  */
     861           0 : static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
     862             : {
     863           0 :         return dynamic_dname(buffer, buflen, "pipe:[%lu]",
     864           0 :                                 d_inode(dentry)->i_ino);
     865             : }
     866             : 
     867             : static const struct dentry_operations pipefs_dentry_operations = {
     868             :         .d_dname        = pipefs_dname,
     869             : };
     870             : 
     871           0 : static struct inode * get_pipe_inode(void)
     872             : {
     873           0 :         struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
     874             :         struct pipe_inode_info *pipe;
     875             : 
     876           0 :         if (!inode)
     877             :                 goto fail_inode;
     878             : 
     879           0 :         inode->i_ino = get_next_ino();
     880             : 
     881           0 :         pipe = alloc_pipe_info();
     882           0 :         if (!pipe)
     883             :                 goto fail_iput;
     884             : 
     885           0 :         inode->i_pipe = pipe;
     886           0 :         pipe->files = 2;
     887           0 :         pipe->readers = pipe->writers = 1;
     888           0 :         inode->i_fop = &pipefifo_fops;
     889             : 
     890             :         /*
     891             :          * Mark the inode dirty from the very beginning,
     892             :          * that way it will never be moved to the dirty
     893             :          * list because "mark_inode_dirty()" will think
     894             :          * that it already _is_ on the dirty list.
     895             :          */
     896           0 :         inode->i_state = I_DIRTY;
     897           0 :         inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
     898           0 :         inode->i_uid = current_fsuid();
     899           0 :         inode->i_gid = current_fsgid();
     900           0 :         inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
     901             : 
     902           0 :         return inode;
     903             : 
     904             : fail_iput:
     905           0 :         iput(inode);
     906             : 
     907             : fail_inode:
     908             :         return NULL;
     909             : }
     910             : 
     911           0 : int create_pipe_files(struct file **res, int flags)
     912             : {
     913           0 :         struct inode *inode = get_pipe_inode();
     914             :         struct file *f;
     915             :         int error;
     916             : 
     917           0 :         if (!inode)
     918             :                 return -ENFILE;
     919             : 
     920           0 :         if (flags & O_NOTIFICATION_PIPE) {
     921           0 :                 error = watch_queue_init(inode->i_pipe);
     922             :                 if (error) {
     923           0 :                         free_pipe_info(inode->i_pipe);
     924           0 :                         iput(inode);
     925           0 :                         return error;
     926             :                 }
     927             :         }
     928             : 
     929           0 :         f = alloc_file_pseudo(inode, pipe_mnt, "",
     930           0 :                                 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
     931             :                                 &pipefifo_fops);
     932           0 :         if (IS_ERR(f)) {
     933           0 :                 free_pipe_info(inode->i_pipe);
     934           0 :                 iput(inode);
     935           0 :                 return PTR_ERR(f);
     936             :         }
     937             : 
     938           0 :         f->private_data = inode->i_pipe;
     939             : 
     940           0 :         res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
     941             :                                   &pipefifo_fops);
     942           0 :         if (IS_ERR(res[0])) {
     943           0 :                 put_pipe_info(inode, inode->i_pipe);
     944           0 :                 fput(f);
     945           0 :                 return PTR_ERR(res[0]);
     946             :         }
     947           0 :         res[0]->private_data = inode->i_pipe;
     948           0 :         res[1] = f;
     949           0 :         stream_open(inode, res[0]);
     950           0 :         stream_open(inode, res[1]);
     951           0 :         return 0;
     952             : }
     953             : 
     954           0 : static int __do_pipe_flags(int *fd, struct file **files, int flags)
     955             : {
     956             :         int error;
     957             :         int fdw, fdr;
     958             : 
     959           0 :         if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
     960             :                 return -EINVAL;
     961             : 
     962           0 :         error = create_pipe_files(files, flags);
     963           0 :         if (error)
     964             :                 return error;
     965             : 
     966           0 :         error = get_unused_fd_flags(flags);
     967           0 :         if (error < 0)
     968             :                 goto err_read_pipe;
     969           0 :         fdr = error;
     970             : 
     971           0 :         error = get_unused_fd_flags(flags);
     972           0 :         if (error < 0)
     973             :                 goto err_fdr;
     974           0 :         fdw = error;
     975             : 
     976           0 :         audit_fd_pair(fdr, fdw);
     977           0 :         fd[0] = fdr;
     978           0 :         fd[1] = fdw;
     979           0 :         return 0;
     980             : 
     981             :  err_fdr:
     982           0 :         put_unused_fd(fdr);
     983             :  err_read_pipe:
     984           0 :         fput(files[0]);
     985           0 :         fput(files[1]);
     986           0 :         return error;
     987             : }
     988             : 
     989           0 : int do_pipe_flags(int *fd, int flags)
     990             : {
     991             :         struct file *files[2];
     992           0 :         int error = __do_pipe_flags(fd, files, flags);
     993           0 :         if (!error) {
     994           0 :                 fd_install(fd[0], files[0]);
     995           0 :                 fd_install(fd[1], files[1]);
     996             :         }
     997           0 :         return error;
     998             : }
     999             : 
    1000             : /*
    1001             :  * sys_pipe() is the normal C calling standard for creating
    1002             :  * a pipe. It's not the way Unix traditionally does this, though.
    1003             :  */
    1004           0 : static int do_pipe2(int __user *fildes, int flags)
    1005             : {
    1006             :         struct file *files[2];
    1007             :         int fd[2];
    1008             :         int error;
    1009             : 
    1010           0 :         error = __do_pipe_flags(fd, files, flags);
    1011           0 :         if (!error) {
    1012           0 :                 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) {
    1013           0 :                         fput(files[0]);
    1014           0 :                         fput(files[1]);
    1015           0 :                         put_unused_fd(fd[0]);
    1016           0 :                         put_unused_fd(fd[1]);
    1017           0 :                         error = -EFAULT;
    1018             :                 } else {
    1019           0 :                         fd_install(fd[0], files[0]);
    1020           0 :                         fd_install(fd[1], files[1]);
    1021             :                 }
    1022             :         }
    1023           0 :         return error;
    1024             : }
    1025             : 
    1026           0 : SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
    1027             : {
    1028           0 :         return do_pipe2(fildes, flags);
    1029             : }
    1030             : 
    1031           0 : SYSCALL_DEFINE1(pipe, int __user *, fildes)
    1032             : {
    1033           0 :         return do_pipe2(fildes, 0);
    1034             : }
    1035             : 
    1036             : /*
    1037             :  * This is the stupid "wait for pipe to be readable or writable"
    1038             :  * model.
    1039             :  *
    1040             :  * See pipe_read/write() for the proper kind of exclusive wait,
    1041             :  * but that requires that we wake up any other readers/writers
    1042             :  * if we then do not end up reading everything (ie the whole
    1043             :  * "wake_next_reader/writer" logic in pipe_read/write()).
    1044             :  */
    1045           0 : void pipe_wait_readable(struct pipe_inode_info *pipe)
    1046             : {
    1047           0 :         pipe_unlock(pipe);
    1048           0 :         wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
    1049           0 :         pipe_lock(pipe);
    1050           0 : }
    1051             : 
    1052           0 : void pipe_wait_writable(struct pipe_inode_info *pipe)
    1053             : {
    1054           0 :         pipe_unlock(pipe);
    1055           0 :         wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
    1056           0 :         pipe_lock(pipe);
    1057           0 : }
    1058             : 
    1059             : /*
    1060             :  * This depends on both the wait (here) and the wakeup (wake_up_partner)
    1061             :  * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
    1062             :  * race with the count check and waitqueue prep.
    1063             :  *
    1064             :  * Normally in order to avoid races, you'd do the prepare_to_wait() first,
    1065             :  * then check the condition you're waiting for, and only then sleep. But
    1066             :  * because of the pipe lock, we can check the condition before being on
    1067             :  * the wait queue.
    1068             :  *
    1069             :  * We use the 'rd_wait' waitqueue for pipe partner waiting.
    1070             :  */
    1071           0 : static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
    1072             : {
    1073           0 :         DEFINE_WAIT(rdwait);
    1074           0 :         int cur = *cnt;
    1075             : 
    1076           0 :         while (cur == *cnt) {
    1077           0 :                 prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
    1078           0 :                 pipe_unlock(pipe);
    1079           0 :                 schedule();
    1080           0 :                 finish_wait(&pipe->rd_wait, &rdwait);
    1081           0 :                 pipe_lock(pipe);
    1082           0 :                 if (signal_pending(current))
    1083             :                         break;
    1084             :         }
    1085           0 :         return cur == *cnt ? -ERESTARTSYS : 0;
    1086             : }
    1087             : 
    1088             : static void wake_up_partner(struct pipe_inode_info *pipe)
    1089             : {
    1090           0 :         wake_up_interruptible_all(&pipe->rd_wait);
    1091             : }
    1092             : 
    1093           0 : static int fifo_open(struct inode *inode, struct file *filp)
    1094             : {
    1095             :         struct pipe_inode_info *pipe;
    1096           0 :         bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
    1097             :         int ret;
    1098             : 
    1099           0 :         filp->f_version = 0;
    1100             : 
    1101           0 :         spin_lock(&inode->i_lock);
    1102           0 :         if (inode->i_pipe) {
    1103           0 :                 pipe = inode->i_pipe;
    1104           0 :                 pipe->files++;
    1105           0 :                 spin_unlock(&inode->i_lock);
    1106             :         } else {
    1107           0 :                 spin_unlock(&inode->i_lock);
    1108           0 :                 pipe = alloc_pipe_info();
    1109           0 :                 if (!pipe)
    1110             :                         return -ENOMEM;
    1111           0 :                 pipe->files = 1;
    1112           0 :                 spin_lock(&inode->i_lock);
    1113           0 :                 if (unlikely(inode->i_pipe)) {
    1114           0 :                         inode->i_pipe->files++;
    1115           0 :                         spin_unlock(&inode->i_lock);
    1116           0 :                         free_pipe_info(pipe);
    1117           0 :                         pipe = inode->i_pipe;
    1118             :                 } else {
    1119           0 :                         inode->i_pipe = pipe;
    1120           0 :                         spin_unlock(&inode->i_lock);
    1121             :                 }
    1122             :         }
    1123           0 :         filp->private_data = pipe;
    1124             :         /* OK, we have a pipe and it's pinned down */
    1125             : 
    1126           0 :         __pipe_lock(pipe);
    1127             : 
    1128             :         /* We can only do regular read/write on fifos */
    1129           0 :         stream_open(inode, filp);
    1130             : 
    1131           0 :         switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
    1132             :         case FMODE_READ:
    1133             :         /*
    1134             :          *  O_RDONLY
    1135             :          *  POSIX.1 says that O_NONBLOCK means return with the FIFO
    1136             :          *  opened, even when there is no process writing the FIFO.
    1137             :          */
    1138           0 :                 pipe->r_counter++;
    1139           0 :                 if (pipe->readers++ == 0)
    1140             :                         wake_up_partner(pipe);
    1141             : 
    1142           0 :                 if (!is_pipe && !pipe->writers) {
    1143           0 :                         if ((filp->f_flags & O_NONBLOCK)) {
    1144             :                                 /* suppress EPOLLHUP until we have
    1145             :                                  * seen a writer */
    1146           0 :                                 filp->f_version = pipe->w_counter;
    1147             :                         } else {
    1148           0 :                                 if (wait_for_partner(pipe, &pipe->w_counter))
    1149             :                                         goto err_rd;
    1150             :                         }
    1151             :                 }
    1152             :                 break;
    1153             : 
    1154             :         case FMODE_WRITE:
    1155             :         /*
    1156             :          *  O_WRONLY
    1157             :          *  POSIX.1 says that O_NONBLOCK means return -1 with
    1158             :          *  errno=ENXIO when there is no process reading the FIFO.
    1159             :          */
    1160           0 :                 ret = -ENXIO;
    1161           0 :                 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
    1162             :                         goto err;
    1163             : 
    1164           0 :                 pipe->w_counter++;
    1165           0 :                 if (!pipe->writers++)
    1166             :                         wake_up_partner(pipe);
    1167             : 
    1168           0 :                 if (!is_pipe && !pipe->readers) {
    1169           0 :                         if (wait_for_partner(pipe, &pipe->r_counter))
    1170             :                                 goto err_wr;
    1171             :                 }
    1172             :                 break;
    1173             : 
    1174             :         case FMODE_READ | FMODE_WRITE:
    1175             :         /*
    1176             :          *  O_RDWR
    1177             :          *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
    1178             :          *  This implementation will NEVER block on a O_RDWR open, since
    1179             :          *  the process can at least talk to itself.
    1180             :          */
    1181             : 
    1182           0 :                 pipe->readers++;
    1183           0 :                 pipe->writers++;
    1184           0 :                 pipe->r_counter++;
    1185           0 :                 pipe->w_counter++;
    1186           0 :                 if (pipe->readers == 1 || pipe->writers == 1)
    1187             :                         wake_up_partner(pipe);
    1188             :                 break;
    1189             : 
    1190             :         default:
    1191             :                 ret = -EINVAL;
    1192             :                 goto err;
    1193             :         }
    1194             : 
    1195             :         /* Ok! */
    1196           0 :         __pipe_unlock(pipe);
    1197           0 :         return 0;
    1198             : 
    1199             : err_rd:
    1200           0 :         if (!--pipe->readers)
    1201           0 :                 wake_up_interruptible(&pipe->wr_wait);
    1202             :         ret = -ERESTARTSYS;
    1203             :         goto err;
    1204             : 
    1205             : err_wr:
    1206           0 :         if (!--pipe->writers)
    1207           0 :                 wake_up_interruptible_all(&pipe->rd_wait);
    1208             :         ret = -ERESTARTSYS;
    1209             :         goto err;
    1210             : 
    1211             : err:
    1212           0 :         __pipe_unlock(pipe);
    1213             : 
    1214           0 :         put_pipe_info(inode, pipe);
    1215             :         return ret;
    1216             : }
    1217             : 
    1218             : const struct file_operations pipefifo_fops = {
    1219             :         .open           = fifo_open,
    1220             :         .llseek         = no_llseek,
    1221             :         .read_iter      = pipe_read,
    1222             :         .write_iter     = pipe_write,
    1223             :         .poll           = pipe_poll,
    1224             :         .unlocked_ioctl = pipe_ioctl,
    1225             :         .release        = pipe_release,
    1226             :         .fasync         = pipe_fasync,
    1227             :         .splice_write   = iter_file_splice_write,
    1228             : };
    1229             : 
    1230             : /*
    1231             :  * Currently we rely on the pipe array holding a power-of-2 number
    1232             :  * of pages. Returns 0 on error.
    1233             :  */
    1234           0 : unsigned int round_pipe_size(unsigned long size)
    1235             : {
    1236           0 :         if (size > (1U << 31))
    1237             :                 return 0;
    1238             : 
    1239             :         /* Minimum pipe size, as required by POSIX */
    1240           0 :         if (size < PAGE_SIZE)
    1241             :                 return PAGE_SIZE;
    1242             : 
    1243           0 :         return roundup_pow_of_two(size);
    1244             : }
    1245             : 
    1246             : /*
    1247             :  * Resize the pipe ring to a number of slots.
    1248             :  *
    1249             :  * Note the pipe can be reduced in capacity, but only if the current
    1250             :  * occupancy doesn't exceed nr_slots; if it does, EBUSY will be
    1251             :  * returned instead.
    1252             :  */
    1253           0 : int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
    1254             : {
    1255             :         struct pipe_buffer *bufs;
    1256             :         unsigned int head, tail, mask, n;
    1257             : 
    1258           0 :         bufs = kcalloc(nr_slots, sizeof(*bufs),
    1259             :                        GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
    1260           0 :         if (unlikely(!bufs))
    1261             :                 return -ENOMEM;
    1262             : 
    1263           0 :         spin_lock_irq(&pipe->rd_wait.lock);
    1264           0 :         mask = pipe->ring_size - 1;
    1265           0 :         head = pipe->head;
    1266           0 :         tail = pipe->tail;
    1267             : 
    1268           0 :         n = pipe_occupancy(head, tail);
    1269           0 :         if (nr_slots < n) {
    1270           0 :                 spin_unlock_irq(&pipe->rd_wait.lock);
    1271           0 :                 kfree(bufs);
    1272           0 :                 return -EBUSY;
    1273             :         }
    1274             : 
    1275             :         /*
    1276             :          * The pipe array wraps around, so just start the new one at zero
    1277             :          * and adjust the indices.
    1278             :          */
    1279           0 :         if (n > 0) {
    1280           0 :                 unsigned int h = head & mask;
    1281           0 :                 unsigned int t = tail & mask;
    1282           0 :                 if (h > t) {
    1283           0 :                         memcpy(bufs, pipe->bufs + t,
    1284             :                                n * sizeof(struct pipe_buffer));
    1285             :                 } else {
    1286           0 :                         unsigned int tsize = pipe->ring_size - t;
    1287           0 :                         if (h > 0)
    1288           0 :                                 memcpy(bufs + tsize, pipe->bufs,
    1289             :                                        h * sizeof(struct pipe_buffer));
    1290           0 :                         memcpy(bufs, pipe->bufs + t,
    1291             :                                tsize * sizeof(struct pipe_buffer));
    1292             :                 }
    1293             :         }
    1294             : 
    1295           0 :         head = n;
    1296           0 :         tail = 0;
    1297             : 
    1298           0 :         kfree(pipe->bufs);
    1299           0 :         pipe->bufs = bufs;
    1300           0 :         pipe->ring_size = nr_slots;
    1301           0 :         if (pipe->max_usage > nr_slots)
    1302           0 :                 pipe->max_usage = nr_slots;
    1303           0 :         pipe->tail = tail;
    1304           0 :         pipe->head = head;
    1305             : 
    1306           0 :         spin_unlock_irq(&pipe->rd_wait.lock);
    1307             : 
    1308             :         /* This might have made more room for writers */
    1309           0 :         wake_up_interruptible(&pipe->wr_wait);
    1310           0 :         return 0;
    1311             : }
    1312             : 
    1313             : /*
    1314             :  * Allocate a new array of pipe buffers and copy the info over. Returns the
    1315             :  * pipe size if successful, or return -ERROR on error.
    1316             :  */
    1317           0 : static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
    1318             : {
    1319             :         unsigned long user_bufs;
    1320             :         unsigned int nr_slots, size;
    1321           0 :         long ret = 0;
    1322             : 
    1323             : #ifdef CONFIG_WATCH_QUEUE
    1324             :         if (pipe->watch_queue)
    1325             :                 return -EBUSY;
    1326             : #endif
    1327             : 
    1328           0 :         size = round_pipe_size(arg);
    1329           0 :         nr_slots = size >> PAGE_SHIFT;
    1330             : 
    1331           0 :         if (!nr_slots)
    1332             :                 return -EINVAL;
    1333             : 
    1334             :         /*
    1335             :          * If trying to increase the pipe capacity, check that an
    1336             :          * unprivileged user is not trying to exceed various limits
    1337             :          * (soft limit check here, hard limit check just below).
    1338             :          * Decreasing the pipe capacity is always permitted, even
    1339             :          * if the user is currently over a limit.
    1340             :          */
    1341           0 :         if (nr_slots > pipe->max_usage &&
    1342           0 :                         size > pipe_max_size && !capable(CAP_SYS_RESOURCE))
    1343             :                 return -EPERM;
    1344             : 
    1345           0 :         user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots);
    1346             : 
    1347           0 :         if (nr_slots > pipe->max_usage &&
    1348           0 :                         (too_many_pipe_buffers_hard(user_bufs) ||
    1349           0 :                          too_many_pipe_buffers_soft(user_bufs)) &&
    1350           0 :                         pipe_is_unprivileged_user()) {
    1351             :                 ret = -EPERM;
    1352             :                 goto out_revert_acct;
    1353             :         }
    1354             : 
    1355           0 :         ret = pipe_resize_ring(pipe, nr_slots);
    1356           0 :         if (ret < 0)
    1357             :                 goto out_revert_acct;
    1358             : 
    1359           0 :         pipe->max_usage = nr_slots;
    1360           0 :         pipe->nr_accounted = nr_slots;
    1361           0 :         return pipe->max_usage * PAGE_SIZE;
    1362             : 
    1363             : out_revert_acct:
    1364           0 :         (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted);
    1365           0 :         return ret;
    1366             : }
    1367             : 
    1368             : /*
    1369             :  * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is
    1370             :  * not enough to verify that this is a pipe.
    1371             :  */
    1372           0 : struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
    1373             : {
    1374           0 :         struct pipe_inode_info *pipe = file->private_data;
    1375             : 
    1376           0 :         if (file->f_op != &pipefifo_fops || !pipe)
    1377             :                 return NULL;
    1378             : #ifdef CONFIG_WATCH_QUEUE
    1379             :         if (for_splice && pipe->watch_queue)
    1380             :                 return NULL;
    1381             : #endif
    1382           0 :         return pipe;
    1383             : }
    1384             : 
    1385           0 : long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
    1386             : {
    1387             :         struct pipe_inode_info *pipe;
    1388             :         long ret;
    1389             : 
    1390           0 :         pipe = get_pipe_info(file, false);
    1391           0 :         if (!pipe)
    1392             :                 return -EBADF;
    1393             : 
    1394           0 :         __pipe_lock(pipe);
    1395             : 
    1396           0 :         switch (cmd) {
    1397             :         case F_SETPIPE_SZ:
    1398           0 :                 ret = pipe_set_size(pipe, arg);
    1399           0 :                 break;
    1400             :         case F_GETPIPE_SZ:
    1401           0 :                 ret = pipe->max_usage * PAGE_SIZE;
    1402           0 :                 break;
    1403             :         default:
    1404             :                 ret = -EINVAL;
    1405             :                 break;
    1406             :         }
    1407             : 
    1408           0 :         __pipe_unlock(pipe);
    1409           0 :         return ret;
    1410             : }
    1411             : 
    1412             : static const struct super_operations pipefs_ops = {
    1413             :         .destroy_inode = free_inode_nonrcu,
    1414             :         .statfs = simple_statfs,
    1415             : };
    1416             : 
    1417             : /*
    1418             :  * pipefs should _never_ be mounted by userland - too much of security hassle,
    1419             :  * no real gain from having the whole whorehouse mounted. So we don't need
    1420             :  * any operations on the root directory. However, we need a non-trivial
    1421             :  * d_name - pipe: will go nicely and kill the special-casing in procfs.
    1422             :  */
    1423             : 
    1424           1 : static int pipefs_init_fs_context(struct fs_context *fc)
    1425             : {
    1426           1 :         struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC);
    1427           1 :         if (!ctx)
    1428             :                 return -ENOMEM;
    1429           1 :         ctx->ops = &pipefs_ops;
    1430           1 :         ctx->dops = &pipefs_dentry_operations;
    1431           1 :         return 0;
    1432             : }
    1433             : 
    1434             : static struct file_system_type pipe_fs_type = {
    1435             :         .name           = "pipefs",
    1436             :         .init_fs_context = pipefs_init_fs_context,
    1437             :         .kill_sb        = kill_anon_super,
    1438             : };
    1439             : 
    1440             : #ifdef CONFIG_SYSCTL
    1441           0 : static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
    1442             :                                         unsigned int *valp,
    1443             :                                         int write, void *data)
    1444             : {
    1445           0 :         if (write) {
    1446             :                 unsigned int val;
    1447             : 
    1448           0 :                 val = round_pipe_size(*lvalp);
    1449           0 :                 if (val == 0)
    1450             :                         return -EINVAL;
    1451             : 
    1452           0 :                 *valp = val;
    1453             :         } else {
    1454           0 :                 unsigned int val = *valp;
    1455           0 :                 *lvalp = (unsigned long) val;
    1456             :         }
    1457             : 
    1458             :         return 0;
    1459             : }
    1460             : 
    1461           0 : static int proc_dopipe_max_size(struct ctl_table *table, int write,
    1462             :                                 void *buffer, size_t *lenp, loff_t *ppos)
    1463             : {
    1464           0 :         return do_proc_douintvec(table, write, buffer, lenp, ppos,
    1465             :                                  do_proc_dopipe_max_size_conv, NULL);
    1466             : }
    1467             : 
    1468             : static struct ctl_table fs_pipe_sysctls[] = {
    1469             :         {
    1470             :                 .procname       = "pipe-max-size",
    1471             :                 .data           = &pipe_max_size,
    1472             :                 .maxlen         = sizeof(pipe_max_size),
    1473             :                 .mode           = 0644,
    1474             :                 .proc_handler   = proc_dopipe_max_size,
    1475             :         },
    1476             :         {
    1477             :                 .procname       = "pipe-user-pages-hard",
    1478             :                 .data           = &pipe_user_pages_hard,
    1479             :                 .maxlen         = sizeof(pipe_user_pages_hard),
    1480             :                 .mode           = 0644,
    1481             :                 .proc_handler   = proc_doulongvec_minmax,
    1482             :         },
    1483             :         {
    1484             :                 .procname       = "pipe-user-pages-soft",
    1485             :                 .data           = &pipe_user_pages_soft,
    1486             :                 .maxlen         = sizeof(pipe_user_pages_soft),
    1487             :                 .mode           = 0644,
    1488             :                 .proc_handler   = proc_doulongvec_minmax,
    1489             :         },
    1490             :         { }
    1491             : };
    1492             : #endif
    1493             : 
    1494           1 : static int __init init_pipe_fs(void)
    1495             : {
    1496           1 :         int err = register_filesystem(&pipe_fs_type);
    1497             : 
    1498           1 :         if (!err) {
    1499           1 :                 pipe_mnt = kern_mount(&pipe_fs_type);
    1500           2 :                 if (IS_ERR(pipe_mnt)) {
    1501           0 :                         err = PTR_ERR(pipe_mnt);
    1502           0 :                         unregister_filesystem(&pipe_fs_type);
    1503             :                 }
    1504             :         }
    1505             : #ifdef CONFIG_SYSCTL
    1506           1 :         register_sysctl_init("fs", fs_pipe_sysctls);
    1507             : #endif
    1508           1 :         return err;
    1509             : }
    1510             : 
    1511             : fs_initcall(init_pipe_fs);

Generated by: LCOV version 1.14