LCOV - code coverage report
Current view: top level - fs/kernfs - file.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 43 355 12.1 %
Date: 2023-07-19 18:55:55 Functions: 4 24 16.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * fs/kernfs/file.c - kernfs file implementation
       4             :  *
       5             :  * Copyright (c) 2001-3 Patrick Mochel
       6             :  * Copyright (c) 2007 SUSE Linux Products GmbH
       7             :  * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
       8             :  */
       9             : 
      10             : #include <linux/fs.h>
      11             : #include <linux/seq_file.h>
      12             : #include <linux/slab.h>
      13             : #include <linux/poll.h>
      14             : #include <linux/pagemap.h>
      15             : #include <linux/sched/mm.h>
      16             : #include <linux/fsnotify.h>
      17             : #include <linux/uio.h>
      18             : 
      19             : #include "kernfs-internal.h"
      20             : 
      21             : struct kernfs_open_node {
      22             :         struct rcu_head         rcu_head;
      23             :         atomic_t                event;
      24             :         wait_queue_head_t       poll;
      25             :         struct list_head        files; /* goes through kernfs_open_file.list */
      26             :         unsigned int            nr_mmapped;
      27             :         unsigned int            nr_to_release;
      28             : };
      29             : 
      30             : /*
      31             :  * kernfs_notify() may be called from any context and bounces notifications
      32             :  * through a work item.  To minimize space overhead in kernfs_node, the
      33             :  * pending queue is implemented as a singly linked list of kernfs_nodes.
      34             :  * The list is terminated with the self pointer so that whether a
      35             :  * kernfs_node is on the list or not can be determined by testing the next
      36             :  * pointer for %NULL.
      37             :  */
      38             : #define KERNFS_NOTIFY_EOL                       ((void *)&kernfs_notify_list)
      39             : 
      40             : static DEFINE_SPINLOCK(kernfs_notify_lock);
      41             : static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
      42             : 
      43             : static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
      44             : {
      45           0 :         int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
      46             : 
      47           0 :         return &kernfs_locks->open_file_mutex[idx];
      48             : }
      49             : 
      50             : static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
      51             : {
      52             :         struct mutex *lock;
      53             : 
      54           0 :         lock = kernfs_open_file_mutex_ptr(kn);
      55             : 
      56           0 :         mutex_lock(lock);
      57             : 
      58             :         return lock;
      59             : }
      60             : 
      61             : /**
      62             :  * of_on - Get the kernfs_open_node of the specified kernfs_open_file
      63             :  * @of: target kernfs_open_file
      64             :  *
      65             :  * Return: the kernfs_open_node of the kernfs_open_file
      66             :  */
      67             : static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
      68             : {
      69           0 :         return rcu_dereference_protected(of->kn->attr.open,
      70             :                                          !list_empty(&of->list));
      71             : }
      72             : 
      73             : /**
      74             :  * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
      75             :  *
      76             :  * @kn: target kernfs_node.
      77             :  *
      78             :  * Fetch and return ->attr.open of @kn when caller holds the
      79             :  * kernfs_open_file_mutex_ptr(kn).
      80             :  *
      81             :  * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when
      82             :  * the caller guarantees that this mutex is being held, other updaters can't
      83             :  * change ->attr.open and this means that we can safely deref ->attr.open
      84             :  * outside RCU read-side critical section.
      85             :  *
      86             :  * The caller needs to make sure that kernfs_open_file_mutex is held.
      87             :  *
      88             :  * Return: @kn->attr.open when kernfs_open_file_mutex is held.
      89             :  */
      90             : static struct kernfs_open_node *
      91             : kernfs_deref_open_node_locked(struct kernfs_node *kn)
      92             : {
      93             :         return rcu_dereference_protected(kn->attr.open,
      94             :                                 lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
      95             : }
      96             : 
      97             : static struct kernfs_open_file *kernfs_of(struct file *file)
      98             : {
      99           0 :         return ((struct seq_file *)file->private_data)->private;
     100             : }
     101             : 
     102             : /*
     103             :  * Determine the kernfs_ops for the given kernfs_node.  This function must
     104             :  * be called while holding an active reference.
     105             :  */
     106             : static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
     107             : {
     108             :         if (kn->flags & KERNFS_LOCKDEP)
     109             :                 lockdep_assert_held(kn);
     110             :         return kn->attr.ops;
     111             : }
     112             : 
     113             : /*
     114             :  * As kernfs_seq_stop() is also called after kernfs_seq_start() or
     115             :  * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
     116             :  * a seq_file iteration which is fully initialized with an active reference
     117             :  * or an aborted kernfs_seq_start() due to get_active failure.  The
     118             :  * position pointer is the only context for each seq_file iteration and
     119             :  * thus the stop condition should be encoded in it.  As the return value is
     120             :  * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
     121             :  * choice to indicate get_active failure.
     122             :  *
     123             :  * Unfortunately, this is complicated due to the optional custom seq_file
     124             :  * operations which may return ERR_PTR(-ENODEV) too.  kernfs_seq_stop()
     125             :  * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
     126             :  * custom seq_file operations and thus can't decide whether put_active
     127             :  * should be performed or not only on ERR_PTR(-ENODEV).
     128             :  *
     129             :  * This is worked around by factoring out the custom seq_stop() and
     130             :  * put_active part into kernfs_seq_stop_active(), skipping it from
     131             :  * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
     132             :  * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
     133             :  * that kernfs_seq_stop_active() is skipped only after get_active failure.
     134             :  */
     135           0 : static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
     136             : {
     137           0 :         struct kernfs_open_file *of = sf->private;
     138           0 :         const struct kernfs_ops *ops = kernfs_ops(of->kn);
     139             : 
     140           0 :         if (ops->seq_stop)
     141           0 :                 ops->seq_stop(sf, v);
     142           0 :         kernfs_put_active(of->kn);
     143           0 : }
     144             : 
     145           0 : static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
     146             : {
     147           0 :         struct kernfs_open_file *of = sf->private;
     148             :         const struct kernfs_ops *ops;
     149             : 
     150             :         /*
     151             :          * @of->mutex nests outside active ref and is primarily to ensure that
     152             :          * the ops aren't called concurrently for the same open file.
     153             :          */
     154           0 :         mutex_lock(&of->mutex);
     155           0 :         if (!kernfs_get_active(of->kn))
     156             :                 return ERR_PTR(-ENODEV);
     157             : 
     158           0 :         ops = kernfs_ops(of->kn);
     159           0 :         if (ops->seq_start) {
     160           0 :                 void *next = ops->seq_start(sf, ppos);
     161             :                 /* see the comment above kernfs_seq_stop_active() */
     162           0 :                 if (next == ERR_PTR(-ENODEV))
     163           0 :                         kernfs_seq_stop_active(sf, next);
     164             :                 return next;
     165             :         }
     166           0 :         return single_start(sf, ppos);
     167             : }
     168             : 
     169           0 : static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
     170             : {
     171           0 :         struct kernfs_open_file *of = sf->private;
     172           0 :         const struct kernfs_ops *ops = kernfs_ops(of->kn);
     173             : 
     174           0 :         if (ops->seq_next) {
     175           0 :                 void *next = ops->seq_next(sf, v, ppos);
     176             :                 /* see the comment above kernfs_seq_stop_active() */
     177           0 :                 if (next == ERR_PTR(-ENODEV))
     178           0 :                         kernfs_seq_stop_active(sf, next);
     179             :                 return next;
     180             :         } else {
     181             :                 /*
     182             :                  * The same behavior and code as single_open(), always
     183             :                  * terminate after the initial read.
     184             :                  */
     185           0 :                 ++*ppos;
     186           0 :                 return NULL;
     187             :         }
     188             : }
     189             : 
     190           0 : static void kernfs_seq_stop(struct seq_file *sf, void *v)
     191             : {
     192           0 :         struct kernfs_open_file *of = sf->private;
     193             : 
     194           0 :         if (v != ERR_PTR(-ENODEV))
     195           0 :                 kernfs_seq_stop_active(sf, v);
     196           0 :         mutex_unlock(&of->mutex);
     197           0 : }
     198             : 
     199           0 : static int kernfs_seq_show(struct seq_file *sf, void *v)
     200             : {
     201           0 :         struct kernfs_open_file *of = sf->private;
     202             : 
     203           0 :         of->event = atomic_read(&of_on(of)->event);
     204             : 
     205           0 :         return of->kn->attr.ops->seq_show(sf, v);
     206             : }
     207             : 
     208             : static const struct seq_operations kernfs_seq_ops = {
     209             :         .start = kernfs_seq_start,
     210             :         .next = kernfs_seq_next,
     211             :         .stop = kernfs_seq_stop,
     212             :         .show = kernfs_seq_show,
     213             : };
     214             : 
     215             : /*
     216             :  * As reading a bin file can have side-effects, the exact offset and bytes
     217             :  * specified in read(2) call should be passed to the read callback making
     218             :  * it difficult to use seq_file.  Implement simplistic custom buffering for
     219             :  * bin files.
     220             :  */
     221           0 : static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
     222             : {
     223           0 :         struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
     224           0 :         ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
     225             :         const struct kernfs_ops *ops;
     226             :         char *buf;
     227             : 
     228           0 :         buf = of->prealloc_buf;
     229           0 :         if (buf)
     230           0 :                 mutex_lock(&of->prealloc_mutex);
     231             :         else
     232           0 :                 buf = kmalloc(len, GFP_KERNEL);
     233           0 :         if (!buf)
     234             :                 return -ENOMEM;
     235             : 
     236             :         /*
     237             :          * @of->mutex nests outside active ref and is used both to ensure that
     238             :          * the ops aren't called concurrently for the same open file.
     239             :          */
     240           0 :         mutex_lock(&of->mutex);
     241           0 :         if (!kernfs_get_active(of->kn)) {
     242           0 :                 len = -ENODEV;
     243           0 :                 mutex_unlock(&of->mutex);
     244             :                 goto out_free;
     245             :         }
     246             : 
     247           0 :         of->event = atomic_read(&of_on(of)->event);
     248             : 
     249           0 :         ops = kernfs_ops(of->kn);
     250           0 :         if (ops->read)
     251           0 :                 len = ops->read(of, buf, len, iocb->ki_pos);
     252             :         else
     253             :                 len = -EINVAL;
     254             : 
     255           0 :         kernfs_put_active(of->kn);
     256           0 :         mutex_unlock(&of->mutex);
     257             : 
     258           0 :         if (len < 0)
     259             :                 goto out_free;
     260             : 
     261           0 :         if (copy_to_iter(buf, len, iter) != len) {
     262             :                 len = -EFAULT;
     263             :                 goto out_free;
     264             :         }
     265             : 
     266           0 :         iocb->ki_pos += len;
     267             : 
     268             :  out_free:
     269           0 :         if (buf == of->prealloc_buf)
     270           0 :                 mutex_unlock(&of->prealloc_mutex);
     271             :         else
     272           0 :                 kfree(buf);
     273             :         return len;
     274             : }
     275             : 
     276           0 : static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
     277             : {
     278           0 :         if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
     279           0 :                 return seq_read_iter(iocb, iter);
     280           0 :         return kernfs_file_read_iter(iocb, iter);
     281             : }
     282             : 
     283             : /*
     284             :  * Copy data in from userland and pass it to the matching kernfs write
     285             :  * operation.
     286             :  *
     287             :  * There is no easy way for us to know if userspace is only doing a partial
     288             :  * write, so we don't support them. We expect the entire buffer to come on
     289             :  * the first write.  Hint: if you're writing a value, first read the file,
     290             :  * modify only the value you're changing, then write entire buffer
     291             :  * back.
     292             :  */
     293           0 : static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
     294             : {
     295           0 :         struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
     296           0 :         ssize_t len = iov_iter_count(iter);
     297             :         const struct kernfs_ops *ops;
     298             :         char *buf;
     299             : 
     300           0 :         if (of->atomic_write_len) {
     301           0 :                 if (len > of->atomic_write_len)
     302             :                         return -E2BIG;
     303             :         } else {
     304           0 :                 len = min_t(size_t, len, PAGE_SIZE);
     305             :         }
     306             : 
     307           0 :         buf = of->prealloc_buf;
     308           0 :         if (buf)
     309           0 :                 mutex_lock(&of->prealloc_mutex);
     310             :         else
     311           0 :                 buf = kmalloc(len + 1, GFP_KERNEL);
     312           0 :         if (!buf)
     313             :                 return -ENOMEM;
     314             : 
     315           0 :         if (copy_from_iter(buf, len, iter) != len) {
     316             :                 len = -EFAULT;
     317             :                 goto out_free;
     318             :         }
     319           0 :         buf[len] = '\0';        /* guarantee string termination */
     320             : 
     321             :         /*
     322             :          * @of->mutex nests outside active ref and is used both to ensure that
     323             :          * the ops aren't called concurrently for the same open file.
     324             :          */
     325           0 :         mutex_lock(&of->mutex);
     326           0 :         if (!kernfs_get_active(of->kn)) {
     327           0 :                 mutex_unlock(&of->mutex);
     328           0 :                 len = -ENODEV;
     329           0 :                 goto out_free;
     330             :         }
     331             : 
     332           0 :         ops = kernfs_ops(of->kn);
     333           0 :         if (ops->write)
     334           0 :                 len = ops->write(of, buf, len, iocb->ki_pos);
     335             :         else
     336             :                 len = -EINVAL;
     337             : 
     338           0 :         kernfs_put_active(of->kn);
     339           0 :         mutex_unlock(&of->mutex);
     340             : 
     341           0 :         if (len > 0)
     342           0 :                 iocb->ki_pos += len;
     343             : 
     344             : out_free:
     345           0 :         if (buf == of->prealloc_buf)
     346           0 :                 mutex_unlock(&of->prealloc_mutex);
     347             :         else
     348           0 :                 kfree(buf);
     349             :         return len;
     350             : }
     351             : 
     352           0 : static void kernfs_vma_open(struct vm_area_struct *vma)
     353             : {
     354           0 :         struct file *file = vma->vm_file;
     355           0 :         struct kernfs_open_file *of = kernfs_of(file);
     356             : 
     357           0 :         if (!of->vm_ops)
     358             :                 return;
     359             : 
     360           0 :         if (!kernfs_get_active(of->kn))
     361             :                 return;
     362             : 
     363           0 :         if (of->vm_ops->open)
     364           0 :                 of->vm_ops->open(vma);
     365             : 
     366           0 :         kernfs_put_active(of->kn);
     367             : }
     368             : 
     369           0 : static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
     370             : {
     371           0 :         struct file *file = vmf->vma->vm_file;
     372           0 :         struct kernfs_open_file *of = kernfs_of(file);
     373             :         vm_fault_t ret;
     374             : 
     375           0 :         if (!of->vm_ops)
     376             :                 return VM_FAULT_SIGBUS;
     377             : 
     378           0 :         if (!kernfs_get_active(of->kn))
     379             :                 return VM_FAULT_SIGBUS;
     380             : 
     381           0 :         ret = VM_FAULT_SIGBUS;
     382           0 :         if (of->vm_ops->fault)
     383           0 :                 ret = of->vm_ops->fault(vmf);
     384             : 
     385           0 :         kernfs_put_active(of->kn);
     386           0 :         return ret;
     387             : }
     388             : 
     389           0 : static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
     390             : {
     391           0 :         struct file *file = vmf->vma->vm_file;
     392           0 :         struct kernfs_open_file *of = kernfs_of(file);
     393             :         vm_fault_t ret;
     394             : 
     395           0 :         if (!of->vm_ops)
     396             :                 return VM_FAULT_SIGBUS;
     397             : 
     398           0 :         if (!kernfs_get_active(of->kn))
     399             :                 return VM_FAULT_SIGBUS;
     400             : 
     401           0 :         ret = 0;
     402           0 :         if (of->vm_ops->page_mkwrite)
     403           0 :                 ret = of->vm_ops->page_mkwrite(vmf);
     404             :         else
     405           0 :                 file_update_time(file);
     406             : 
     407           0 :         kernfs_put_active(of->kn);
     408           0 :         return ret;
     409             : }
     410             : 
     411           0 : static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
     412             :                              void *buf, int len, int write)
     413             : {
     414           0 :         struct file *file = vma->vm_file;
     415           0 :         struct kernfs_open_file *of = kernfs_of(file);
     416             :         int ret;
     417             : 
     418           0 :         if (!of->vm_ops)
     419             :                 return -EINVAL;
     420             : 
     421           0 :         if (!kernfs_get_active(of->kn))
     422             :                 return -EINVAL;
     423             : 
     424           0 :         ret = -EINVAL;
     425           0 :         if (of->vm_ops->access)
     426           0 :                 ret = of->vm_ops->access(vma, addr, buf, len, write);
     427             : 
     428           0 :         kernfs_put_active(of->kn);
     429           0 :         return ret;
     430             : }
     431             : 
     432             : #ifdef CONFIG_NUMA
     433             : static int kernfs_vma_set_policy(struct vm_area_struct *vma,
     434             :                                  struct mempolicy *new)
     435             : {
     436             :         struct file *file = vma->vm_file;
     437             :         struct kernfs_open_file *of = kernfs_of(file);
     438             :         int ret;
     439             : 
     440             :         if (!of->vm_ops)
     441             :                 return 0;
     442             : 
     443             :         if (!kernfs_get_active(of->kn))
     444             :                 return -EINVAL;
     445             : 
     446             :         ret = 0;
     447             :         if (of->vm_ops->set_policy)
     448             :                 ret = of->vm_ops->set_policy(vma, new);
     449             : 
     450             :         kernfs_put_active(of->kn);
     451             :         return ret;
     452             : }
     453             : 
     454             : static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
     455             :                                                unsigned long addr)
     456             : {
     457             :         struct file *file = vma->vm_file;
     458             :         struct kernfs_open_file *of = kernfs_of(file);
     459             :         struct mempolicy *pol;
     460             : 
     461             :         if (!of->vm_ops)
     462             :                 return vma->vm_policy;
     463             : 
     464             :         if (!kernfs_get_active(of->kn))
     465             :                 return vma->vm_policy;
     466             : 
     467             :         pol = vma->vm_policy;
     468             :         if (of->vm_ops->get_policy)
     469             :                 pol = of->vm_ops->get_policy(vma, addr);
     470             : 
     471             :         kernfs_put_active(of->kn);
     472             :         return pol;
     473             : }
     474             : 
     475             : #endif
     476             : 
     477             : static const struct vm_operations_struct kernfs_vm_ops = {
     478             :         .open           = kernfs_vma_open,
     479             :         .fault          = kernfs_vma_fault,
     480             :         .page_mkwrite   = kernfs_vma_page_mkwrite,
     481             :         .access         = kernfs_vma_access,
     482             : #ifdef CONFIG_NUMA
     483             :         .set_policy     = kernfs_vma_set_policy,
     484             :         .get_policy     = kernfs_vma_get_policy,
     485             : #endif
     486             : };
     487             : 
     488           0 : static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
     489             : {
     490           0 :         struct kernfs_open_file *of = kernfs_of(file);
     491             :         const struct kernfs_ops *ops;
     492             :         int rc;
     493             : 
     494             :         /*
     495             :          * mmap path and of->mutex are prone to triggering spurious lockdep
     496             :          * warnings and we don't want to add spurious locking dependency
     497             :          * between the two.  Check whether mmap is actually implemented
     498             :          * without grabbing @of->mutex by testing HAS_MMAP flag.  See the
     499             :          * comment in kernfs_file_open() for more details.
     500             :          */
     501           0 :         if (!(of->kn->flags & KERNFS_HAS_MMAP))
     502             :                 return -ENODEV;
     503             : 
     504           0 :         mutex_lock(&of->mutex);
     505             : 
     506           0 :         rc = -ENODEV;
     507           0 :         if (!kernfs_get_active(of->kn))
     508             :                 goto out_unlock;
     509             : 
     510           0 :         ops = kernfs_ops(of->kn);
     511           0 :         rc = ops->mmap(of, vma);
     512           0 :         if (rc)
     513             :                 goto out_put;
     514             : 
     515             :         /*
     516             :          * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
     517             :          * to satisfy versions of X which crash if the mmap fails: that
     518             :          * substitutes a new vm_file, and we don't then want bin_vm_ops.
     519             :          */
     520           0 :         if (vma->vm_file != file)
     521             :                 goto out_put;
     522             : 
     523           0 :         rc = -EINVAL;
     524           0 :         if (of->mmapped && of->vm_ops != vma->vm_ops)
     525             :                 goto out_put;
     526             : 
     527             :         /*
     528             :          * It is not possible to successfully wrap close.
     529             :          * So error if someone is trying to use close.
     530             :          */
     531           0 :         if (vma->vm_ops && vma->vm_ops->close)
     532             :                 goto out_put;
     533             : 
     534           0 :         rc = 0;
     535           0 :         of->mmapped = true;
     536           0 :         of_on(of)->nr_mmapped++;
     537           0 :         of->vm_ops = vma->vm_ops;
     538           0 :         vma->vm_ops = &kernfs_vm_ops;
     539             : out_put:
     540           0 :         kernfs_put_active(of->kn);
     541             : out_unlock:
     542           0 :         mutex_unlock(&of->mutex);
     543             : 
     544           0 :         return rc;
     545             : }
     546             : 
     547             : /**
     548             :  *      kernfs_get_open_node - get or create kernfs_open_node
     549             :  *      @kn: target kernfs_node
     550             :  *      @of: kernfs_open_file for this instance of open
     551             :  *
     552             :  *      If @kn->attr.open exists, increment its reference count; otherwise,
     553             :  *      create one.  @of is chained to the files list.
     554             :  *
     555             :  *      Locking:
     556             :  *      Kernel thread context (may sleep).
     557             :  *
     558             :  *      Return:
     559             :  *      %0 on success, -errno on failure.
     560             :  */
     561           0 : static int kernfs_get_open_node(struct kernfs_node *kn,
     562             :                                 struct kernfs_open_file *of)
     563             : {
     564             :         struct kernfs_open_node *on;
     565             :         struct mutex *mutex;
     566             : 
     567           0 :         mutex = kernfs_open_file_mutex_lock(kn);
     568           0 :         on = kernfs_deref_open_node_locked(kn);
     569             : 
     570           0 :         if (!on) {
     571             :                 /* not there, initialize a new one */
     572           0 :                 on = kzalloc(sizeof(*on), GFP_KERNEL);
     573           0 :                 if (!on) {
     574           0 :                         mutex_unlock(mutex);
     575           0 :                         return -ENOMEM;
     576             :                 }
     577           0 :                 atomic_set(&on->event, 1);
     578           0 :                 init_waitqueue_head(&on->poll);
     579           0 :                 INIT_LIST_HEAD(&on->files);
     580           0 :                 rcu_assign_pointer(kn->attr.open, on);
     581             :         }
     582             : 
     583           0 :         list_add_tail(&of->list, &on->files);
     584           0 :         if (kn->flags & KERNFS_HAS_RELEASE)
     585           0 :                 on->nr_to_release++;
     586             : 
     587           0 :         mutex_unlock(mutex);
     588           0 :         return 0;
     589             : }
     590             : 
     591             : /**
     592             :  *      kernfs_unlink_open_file - Unlink @of from @kn.
     593             :  *
     594             :  *      @kn: target kernfs_node
     595             :  *      @of: associated kernfs_open_file
     596             :  *      @open_failed: ->open() failed, cancel ->release()
     597             :  *
     598             :  *      Unlink @of from list of @kn's associated open files. If list of
     599             :  *      associated open files becomes empty, disassociate and free
     600             :  *      kernfs_open_node.
     601             :  *
     602             :  *      LOCKING:
     603             :  *      None.
     604             :  */
     605           0 : static void kernfs_unlink_open_file(struct kernfs_node *kn,
     606             :                                     struct kernfs_open_file *of,
     607             :                                     bool open_failed)
     608             : {
     609             :         struct kernfs_open_node *on;
     610             :         struct mutex *mutex;
     611             : 
     612           0 :         mutex = kernfs_open_file_mutex_lock(kn);
     613             : 
     614           0 :         on = kernfs_deref_open_node_locked(kn);
     615           0 :         if (!on) {
     616           0 :                 mutex_unlock(mutex);
     617           0 :                 return;
     618             :         }
     619             : 
     620           0 :         if (of) {
     621           0 :                 if (kn->flags & KERNFS_HAS_RELEASE) {
     622           0 :                         WARN_ON_ONCE(of->released == open_failed);
     623           0 :                         if (open_failed)
     624           0 :                                 on->nr_to_release--;
     625             :                 }
     626           0 :                 if (of->mmapped)
     627           0 :                         on->nr_mmapped--;
     628           0 :                 list_del(&of->list);
     629             :         }
     630             : 
     631           0 :         if (list_empty(&on->files)) {
     632           0 :                 rcu_assign_pointer(kn->attr.open, NULL);
     633           0 :                 kfree_rcu(on, rcu_head);
     634             :         }
     635             : 
     636           0 :         mutex_unlock(mutex);
     637             : }
     638             : 
     639           0 : static int kernfs_fop_open(struct inode *inode, struct file *file)
     640             : {
     641           0 :         struct kernfs_node *kn = inode->i_private;
     642           0 :         struct kernfs_root *root = kernfs_root(kn);
     643             :         const struct kernfs_ops *ops;
     644             :         struct kernfs_open_file *of;
     645             :         bool has_read, has_write, has_mmap;
     646           0 :         int error = -EACCES;
     647             : 
     648           0 :         if (!kernfs_get_active(kn))
     649             :                 return -ENODEV;
     650             : 
     651           0 :         ops = kernfs_ops(kn);
     652             : 
     653           0 :         has_read = ops->seq_show || ops->read || ops->mmap;
     654           0 :         has_write = ops->write || ops->mmap;
     655           0 :         has_mmap = ops->mmap;
     656             : 
     657             :         /* see the flag definition for details */
     658           0 :         if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
     659           0 :                 if ((file->f_mode & FMODE_WRITE) &&
     660           0 :                     (!(inode->i_mode & S_IWUGO) || !has_write))
     661             :                         goto err_out;
     662             : 
     663           0 :                 if ((file->f_mode & FMODE_READ) &&
     664           0 :                     (!(inode->i_mode & S_IRUGO) || !has_read))
     665             :                         goto err_out;
     666             :         }
     667             : 
     668             :         /* allocate a kernfs_open_file for the file */
     669           0 :         error = -ENOMEM;
     670           0 :         of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL);
     671           0 :         if (!of)
     672             :                 goto err_out;
     673             : 
     674             :         /*
     675             :          * The following is done to give a different lockdep key to
     676             :          * @of->mutex for files which implement mmap.  This is a rather
     677             :          * crude way to avoid false positive lockdep warning around
     678             :          * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
     679             :          * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
     680             :          * which mm->mmap_lock nests, while holding @of->mutex.  As each
     681             :          * open file has a separate mutex, it's okay as long as those don't
     682             :          * happen on the same file.  At this point, we can't easily give
     683             :          * each file a separate locking class.  Let's differentiate on
     684             :          * whether the file has mmap or not for now.
     685             :          *
     686             :          * Both paths of the branch look the same.  They're supposed to
     687             :          * look that way and give @of->mutex different static lockdep keys.
     688             :          */
     689           0 :         if (has_mmap)
     690           0 :                 mutex_init(&of->mutex);
     691             :         else
     692           0 :                 mutex_init(&of->mutex);
     693             : 
     694           0 :         of->kn = kn;
     695           0 :         of->file = file;
     696             : 
     697             :         /*
     698             :          * Write path needs to atomic_write_len outside active reference.
     699             :          * Cache it in open_file.  See kernfs_fop_write_iter() for details.
     700             :          */
     701           0 :         of->atomic_write_len = ops->atomic_write_len;
     702             : 
     703           0 :         error = -EINVAL;
     704             :         /*
     705             :          * ->seq_show is incompatible with ->prealloc,
     706             :          * as seq_read does its own allocation.
     707             :          * ->read must be used instead.
     708             :          */
     709           0 :         if (ops->prealloc && ops->seq_show)
     710             :                 goto err_free;
     711           0 :         if (ops->prealloc) {
     712           0 :                 int len = of->atomic_write_len ?: PAGE_SIZE;
     713           0 :                 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
     714           0 :                 error = -ENOMEM;
     715           0 :                 if (!of->prealloc_buf)
     716             :                         goto err_free;
     717           0 :                 mutex_init(&of->prealloc_mutex);
     718             :         }
     719             : 
     720             :         /*
     721             :          * Always instantiate seq_file even if read access doesn't use
     722             :          * seq_file or is not requested.  This unifies private data access
     723             :          * and readable regular files are the vast majority anyway.
     724             :          */
     725           0 :         if (ops->seq_show)
     726           0 :                 error = seq_open(file, &kernfs_seq_ops);
     727             :         else
     728           0 :                 error = seq_open(file, NULL);
     729           0 :         if (error)
     730             :                 goto err_free;
     731             : 
     732           0 :         of->seq_file = file->private_data;
     733           0 :         of->seq_file->private = of;
     734             : 
     735             :         /* seq_file clears PWRITE unconditionally, restore it if WRITE */
     736           0 :         if (file->f_mode & FMODE_WRITE)
     737           0 :                 file->f_mode |= FMODE_PWRITE;
     738             : 
     739             :         /* make sure we have open node struct */
     740           0 :         error = kernfs_get_open_node(kn, of);
     741           0 :         if (error)
     742             :                 goto err_seq_release;
     743             : 
     744           0 :         if (ops->open) {
     745             :                 /* nobody has access to @of yet, skip @of->mutex */
     746           0 :                 error = ops->open(of);
     747           0 :                 if (error)
     748             :                         goto err_put_node;
     749             :         }
     750             : 
     751             :         /* open succeeded, put active references */
     752           0 :         kernfs_put_active(kn);
     753           0 :         return 0;
     754             : 
     755             : err_put_node:
     756           0 :         kernfs_unlink_open_file(kn, of, true);
     757             : err_seq_release:
     758           0 :         seq_release(inode, file);
     759             : err_free:
     760           0 :         kfree(of->prealloc_buf);
     761           0 :         kfree(of);
     762             : err_out:
     763           0 :         kernfs_put_active(kn);
     764           0 :         return error;
     765             : }
     766             : 
     767             : /* used from release/drain to ensure that ->release() is called exactly once */
     768             : static void kernfs_release_file(struct kernfs_node *kn,
     769             :                                 struct kernfs_open_file *of)
     770             : {
     771             :         /*
     772             :          * @of is guaranteed to have no other file operations in flight and
     773             :          * we just want to synchronize release and drain paths.
     774             :          * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used
     775             :          * here because drain path may be called from places which can
     776             :          * cause circular dependency.
     777             :          */
     778           0 :         lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));
     779             : 
     780           0 :         if (!of->released) {
     781             :                 /*
     782             :                  * A file is never detached without being released and we
     783             :                  * need to be able to release files which are deactivated
     784             :                  * and being drained.  Don't use kernfs_ops().
     785             :                  */
     786           0 :                 kn->attr.ops->release(of);
     787           0 :                 of->released = true;
     788           0 :                 of_on(of)->nr_to_release--;
     789             :         }
     790             : }
     791             : 
     792           0 : static int kernfs_fop_release(struct inode *inode, struct file *filp)
     793             : {
     794           0 :         struct kernfs_node *kn = inode->i_private;
     795           0 :         struct kernfs_open_file *of = kernfs_of(filp);
     796             : 
     797           0 :         if (kn->flags & KERNFS_HAS_RELEASE) {
     798             :                 struct mutex *mutex;
     799             : 
     800           0 :                 mutex = kernfs_open_file_mutex_lock(kn);
     801           0 :                 kernfs_release_file(kn, of);
     802           0 :                 mutex_unlock(mutex);
     803             :         }
     804             : 
     805           0 :         kernfs_unlink_open_file(kn, of, false);
     806           0 :         seq_release(inode, filp);
     807           0 :         kfree(of->prealloc_buf);
     808           0 :         kfree(of);
     809             : 
     810           0 :         return 0;
     811             : }
     812             : 
     813         524 : bool kernfs_should_drain_open_files(struct kernfs_node *kn)
     814             : {
     815             :         struct kernfs_open_node *on;
     816             :         bool ret;
     817             : 
     818             :         /*
     819             :          * @kn being deactivated guarantees that @kn->attr.open can't change
     820             :          * beneath us making the lockless test below safe.
     821             :          */
     822        1048 :         WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
     823             : 
     824             :         rcu_read_lock();
     825         524 :         on = rcu_dereference(kn->attr.open);
     826         524 :         ret = on && (on->nr_mmapped || on->nr_to_release);
     827             :         rcu_read_unlock();
     828             : 
     829         524 :         return ret;
     830             : }
     831             : 
     832           0 : void kernfs_drain_open_files(struct kernfs_node *kn)
     833             : {
     834             :         struct kernfs_open_node *on;
     835             :         struct kernfs_open_file *of;
     836             :         struct mutex *mutex;
     837             : 
     838           0 :         mutex = kernfs_open_file_mutex_lock(kn);
     839           0 :         on = kernfs_deref_open_node_locked(kn);
     840           0 :         if (!on) {
     841           0 :                 mutex_unlock(mutex);
     842           0 :                 return;
     843             :         }
     844             : 
     845           0 :         list_for_each_entry(of, &on->files, list) {
     846           0 :                 struct inode *inode = file_inode(of->file);
     847             : 
     848           0 :                 if (of->mmapped) {
     849           0 :                         unmap_mapping_range(inode->i_mapping, 0, 0, 1);
     850           0 :                         of->mmapped = false;
     851           0 :                         on->nr_mmapped--;
     852             :                 }
     853             : 
     854           0 :                 if (kn->flags & KERNFS_HAS_RELEASE)
     855           0 :                         kernfs_release_file(kn, of);
     856             :         }
     857             : 
     858           0 :         WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
     859           0 :         mutex_unlock(mutex);
     860             : }
     861             : 
     862             : /*
     863             :  * Kernfs attribute files are pollable.  The idea is that you read
     864             :  * the content and then you use 'poll' or 'select' to wait for
     865             :  * the content to change.  When the content changes (assuming the
     866             :  * manager for the kobject supports notification), poll will
     867             :  * return EPOLLERR|EPOLLPRI, and select will return the fd whether
     868             :  * it is waiting for read, write, or exceptions.
     869             :  * Once poll/select indicates that the value has changed, you
     870             :  * need to close and re-open the file, or seek to 0 and read again.
     871             :  * Reminder: this only works for attributes which actively support
     872             :  * it, and it is not possible to test an attribute from userspace
     873             :  * to see if it supports poll (Neither 'poll' nor 'select' return
     874             :  * an appropriate error code).  When in doubt, set a suitable timeout value.
     875             :  */
     876           0 : __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
     877             : {
     878           0 :         struct kernfs_open_node *on = of_on(of);
     879             : 
     880           0 :         poll_wait(of->file, &on->poll, wait);
     881             : 
     882           0 :         if (of->event != atomic_read(&on->event))
     883             :                 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
     884             : 
     885           0 :         return DEFAULT_POLLMASK;
     886             : }
     887             : 
     888           0 : static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
     889             : {
     890           0 :         struct kernfs_open_file *of = kernfs_of(filp);
     891           0 :         struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
     892             :         __poll_t ret;
     893             : 
     894           0 :         if (!kernfs_get_active(kn))
     895             :                 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
     896             : 
     897           0 :         if (kn->attr.ops->poll)
     898           0 :                 ret = kn->attr.ops->poll(of, wait);
     899             :         else
     900             :                 ret = kernfs_generic_poll(of, wait);
     901             : 
     902           0 :         kernfs_put_active(kn);
     903           0 :         return ret;
     904             : }
     905             : 
     906           1 : static void kernfs_notify_workfn(struct work_struct *work)
     907             : {
     908             :         struct kernfs_node *kn;
     909             :         struct kernfs_super_info *info;
     910             :         struct kernfs_root *root;
     911             : repeat:
     912             :         /* pop one off the notify_list */
     913           2 :         spin_lock_irq(&kernfs_notify_lock);
     914           2 :         kn = kernfs_notify_list;
     915           2 :         if (kn == KERNFS_NOTIFY_EOL) {
     916           1 :                 spin_unlock_irq(&kernfs_notify_lock);
     917           1 :                 return;
     918             :         }
     919           1 :         kernfs_notify_list = kn->attr.notify_next;
     920           1 :         kn->attr.notify_next = NULL;
     921           1 :         spin_unlock_irq(&kernfs_notify_lock);
     922             : 
     923           1 :         root = kernfs_root(kn);
     924             :         /* kick fsnotify */
     925             : 
     926           1 :         down_read(&root->kernfs_supers_rwsem);
     927           2 :         list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
     928             :                 struct kernfs_node *parent;
     929           0 :                 struct inode *p_inode = NULL;
     930             :                 struct inode *inode;
     931             :                 struct qstr name;
     932             : 
     933             :                 /*
     934             :                  * We want fsnotify_modify() on @kn but as the
     935             :                  * modifications aren't originating from userland don't
     936             :                  * have the matching @file available.  Look up the inodes
     937             :                  * and generate the events manually.
     938             :                  */
     939           0 :                 inode = ilookup(info->sb, kernfs_ino(kn));
     940           0 :                 if (!inode)
     941           0 :                         continue;
     942             : 
     943           0 :                 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name));
     944           0 :                 parent = kernfs_get_parent(kn);
     945           0 :                 if (parent) {
     946           0 :                         p_inode = ilookup(info->sb, kernfs_ino(parent));
     947           0 :                         if (p_inode) {
     948           0 :                                 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
     949             :                                          inode, FSNOTIFY_EVENT_INODE,
     950             :                                          p_inode, &name, inode, 0);
     951           0 :                                 iput(p_inode);
     952             :                         }
     953             : 
     954           0 :                         kernfs_put(parent);
     955             :                 }
     956             : 
     957           0 :                 if (!p_inode)
     958           0 :                         fsnotify_inode(inode, FS_MODIFY);
     959             : 
     960           0 :                 iput(inode);
     961             :         }
     962             : 
     963           1 :         up_read(&root->kernfs_supers_rwsem);
     964           1 :         kernfs_put(kn);
     965           1 :         goto repeat;
     966             : }
     967             : 
     968             : /**
     969             :  * kernfs_notify - notify a kernfs file
     970             :  * @kn: file to notify
     971             :  *
     972             :  * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any
     973             :  * context.
     974             :  */
     975           2 : void kernfs_notify(struct kernfs_node *kn)
     976             : {
     977             :         static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
     978             :         unsigned long flags;
     979             :         struct kernfs_open_node *on;
     980             : 
     981           4 :         if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
     982             :                 return;
     983             : 
     984             :         /* kick poll immediately */
     985             :         rcu_read_lock();
     986           2 :         on = rcu_dereference(kn->attr.open);
     987           2 :         if (on) {
     988           0 :                 atomic_inc(&on->event);
     989           0 :                 wake_up_interruptible(&on->poll);
     990             :         }
     991             :         rcu_read_unlock();
     992             : 
     993             :         /* schedule work to kick fsnotify */
     994           2 :         spin_lock_irqsave(&kernfs_notify_lock, flags);
     995           2 :         if (!kn->attr.notify_next) {
     996           1 :                 kernfs_get(kn);
     997           1 :                 kn->attr.notify_next = kernfs_notify_list;
     998           1 :                 kernfs_notify_list = kn;
     999             :                 schedule_work(&kernfs_notify_work);
    1000             :         }
    1001             :         spin_unlock_irqrestore(&kernfs_notify_lock, flags);
    1002             : }
    1003             : EXPORT_SYMBOL_GPL(kernfs_notify);
    1004             : 
    1005             : const struct file_operations kernfs_file_fops = {
    1006             :         .read_iter      = kernfs_fop_read_iter,
    1007             :         .write_iter     = kernfs_fop_write_iter,
    1008             :         .llseek         = generic_file_llseek,
    1009             :         .mmap           = kernfs_fop_mmap,
    1010             :         .open           = kernfs_fop_open,
    1011             :         .release        = kernfs_fop_release,
    1012             :         .poll           = kernfs_fop_poll,
    1013             :         .fsync          = noop_fsync,
    1014             :         .splice_read    = generic_file_splice_read,
    1015             :         .splice_write   = iter_file_splice_write,
    1016             : };
    1017             : 
    1018             : /**
    1019             :  * __kernfs_create_file - kernfs internal function to create a file
    1020             :  * @parent: directory to create the file in
    1021             :  * @name: name of the file
    1022             :  * @mode: mode of the file
    1023             :  * @uid: uid of the file
    1024             :  * @gid: gid of the file
    1025             :  * @size: size of the file
    1026             :  * @ops: kernfs operations for the file
    1027             :  * @priv: private data for the file
    1028             :  * @ns: optional namespace tag of the file
    1029             :  * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
    1030             :  *
    1031             :  * Return: the created node on success, ERR_PTR() value on error.
    1032             :  */
    1033        5659 : struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
    1034             :                                          const char *name,
    1035             :                                          umode_t mode, kuid_t uid, kgid_t gid,
    1036             :                                          loff_t size,
    1037             :                                          const struct kernfs_ops *ops,
    1038             :                                          void *priv, const void *ns,
    1039             :                                          struct lock_class_key *key)
    1040             : {
    1041             :         struct kernfs_node *kn;
    1042             :         unsigned flags;
    1043             :         int rc;
    1044             : 
    1045        5659 :         flags = KERNFS_FILE;
    1046             : 
    1047        5659 :         kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
    1048             :                              uid, gid, flags);
    1049        5659 :         if (!kn)
    1050             :                 return ERR_PTR(-ENOMEM);
    1051             : 
    1052        5659 :         kn->attr.ops = ops;
    1053        5659 :         kn->attr.size = size;
    1054        5659 :         kn->ns = ns;
    1055        5659 :         kn->priv = priv;
    1056             : 
    1057             : #ifdef CONFIG_DEBUG_LOCK_ALLOC
    1058             :         if (key) {
    1059             :                 lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
    1060             :                 kn->flags |= KERNFS_LOCKDEP;
    1061             :         }
    1062             : #endif
    1063             : 
    1064             :         /*
    1065             :          * kn->attr.ops is accessible only while holding active ref.  We
    1066             :          * need to know whether some ops are implemented outside active
    1067             :          * ref.  Cache their existence in flags.
    1068             :          */
    1069        5659 :         if (ops->seq_show)
    1070        5650 :                 kn->flags |= KERNFS_HAS_SEQ_SHOW;
    1071        5659 :         if (ops->mmap)
    1072           0 :                 kn->flags |= KERNFS_HAS_MMAP;
    1073        5659 :         if (ops->release)
    1074           0 :                 kn->flags |= KERNFS_HAS_RELEASE;
    1075             : 
    1076        5659 :         rc = kernfs_add_one(kn);
    1077        5659 :         if (rc) {
    1078           0 :                 kernfs_put(kn);
    1079           0 :                 return ERR_PTR(rc);
    1080             :         }
    1081             :         return kn;
    1082             : }

Generated by: LCOV version 1.14