LCOV - code coverage report
Current view: top level - fs - open.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 559 0.0 %
Date: 2023-08-24 13:40:31 Functions: 0 65 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  linux/fs/open.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  */
       7             : 
       8             : #include <linux/string.h>
       9             : #include <linux/mm.h>
      10             : #include <linux/file.h>
      11             : #include <linux/fdtable.h>
      12             : #include <linux/fsnotify.h>
      13             : #include <linux/module.h>
      14             : #include <linux/tty.h>
      15             : #include <linux/namei.h>
      16             : #include <linux/backing-dev.h>
      17             : #include <linux/capability.h>
      18             : #include <linux/securebits.h>
      19             : #include <linux/security.h>
      20             : #include <linux/mount.h>
      21             : #include <linux/fcntl.h>
      22             : #include <linux/slab.h>
      23             : #include <linux/uaccess.h>
      24             : #include <linux/fs.h>
      25             : #include <linux/personality.h>
      26             : #include <linux/pagemap.h>
      27             : #include <linux/syscalls.h>
      28             : #include <linux/rcupdate.h>
      29             : #include <linux/audit.h>
      30             : #include <linux/falloc.h>
      31             : #include <linux/fs_struct.h>
      32             : #include <linux/ima.h>
      33             : #include <linux/dnotify.h>
      34             : #include <linux/compat.h>
      35             : #include <linux/mnt_idmapping.h>
      36             : #include <linux/filelock.h>
      37             : 
      38             : #include "internal.h"
      39             : 
      40           0 : int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
      41             :                 loff_t length, unsigned int time_attrs, struct file *filp)
      42             : {
      43             :         int ret;
      44             :         struct iattr newattrs;
      45             : 
      46             :         /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
      47           0 :         if (length < 0)
      48             :                 return -EINVAL;
      49             : 
      50           0 :         newattrs.ia_size = length;
      51           0 :         newattrs.ia_valid = ATTR_SIZE | time_attrs;
      52           0 :         if (filp) {
      53           0 :                 newattrs.ia_file = filp;
      54           0 :                 newattrs.ia_valid |= ATTR_FILE;
      55             :         }
      56             : 
      57             :         /* Remove suid, sgid, and file capabilities on truncate too */
      58           0 :         ret = dentry_needs_remove_privs(idmap, dentry);
      59           0 :         if (ret < 0)
      60             :                 return ret;
      61           0 :         if (ret)
      62           0 :                 newattrs.ia_valid |= ret | ATTR_FORCE;
      63             : 
      64           0 :         inode_lock(dentry->d_inode);
      65             :         /* Note any delegations or leases have already been broken: */
      66           0 :         ret = notify_change(idmap, dentry, &newattrs, NULL);
      67           0 :         inode_unlock(dentry->d_inode);
      68           0 :         return ret;
      69             : }
      70             : 
      71           0 : long vfs_truncate(const struct path *path, loff_t length)
      72             : {
      73             :         struct mnt_idmap *idmap;
      74             :         struct inode *inode;
      75             :         long error;
      76             : 
      77           0 :         inode = path->dentry->d_inode;
      78             : 
      79             :         /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
      80           0 :         if (S_ISDIR(inode->i_mode))
      81             :                 return -EISDIR;
      82           0 :         if (!S_ISREG(inode->i_mode))
      83             :                 return -EINVAL;
      84             : 
      85           0 :         error = mnt_want_write(path->mnt);
      86           0 :         if (error)
      87             :                 goto out;
      88             : 
      89           0 :         idmap = mnt_idmap(path->mnt);
      90           0 :         error = inode_permission(idmap, inode, MAY_WRITE);
      91           0 :         if (error)
      92             :                 goto mnt_drop_write_and_out;
      93             : 
      94           0 :         error = -EPERM;
      95           0 :         if (IS_APPEND(inode))
      96             :                 goto mnt_drop_write_and_out;
      97             : 
      98           0 :         error = get_write_access(inode);
      99           0 :         if (error)
     100             :                 goto mnt_drop_write_and_out;
     101             : 
     102             :         /*
     103             :          * Make sure that there are no leases.  get_write_access() protects
     104             :          * against the truncate racing with a lease-granting setlease().
     105             :          */
     106           0 :         error = break_lease(inode, O_WRONLY);
     107           0 :         if (error)
     108             :                 goto put_write_and_out;
     109             : 
     110           0 :         error = security_path_truncate(path);
     111             :         if (!error)
     112           0 :                 error = do_truncate(idmap, path->dentry, length, 0, NULL);
     113             : 
     114             : put_write_and_out:
     115             :         put_write_access(inode);
     116             : mnt_drop_write_and_out:
     117           0 :         mnt_drop_write(path->mnt);
     118             : out:
     119             :         return error;
     120             : }
     121             : EXPORT_SYMBOL_GPL(vfs_truncate);
     122             : 
     123           0 : long do_sys_truncate(const char __user *pathname, loff_t length)
     124             : {
     125           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW;
     126             :         struct path path;
     127             :         int error;
     128             : 
     129           0 :         if (length < 0)      /* sorry, but loff_t says... */
     130             :                 return -EINVAL;
     131             : 
     132             : retry:
     133           0 :         error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
     134           0 :         if (!error) {
     135           0 :                 error = vfs_truncate(&path, length);
     136           0 :                 path_put(&path);
     137             :         }
     138           0 :         if (retry_estale(error, lookup_flags)) {
     139             :                 lookup_flags |= LOOKUP_REVAL;
     140             :                 goto retry;
     141             :         }
     142             :         return error;
     143             : }
     144             : 
     145           0 : SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
     146             : {
     147           0 :         return do_sys_truncate(path, length);
     148             : }
     149             : 
     150             : #ifdef CONFIG_COMPAT
     151             : COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
     152             : {
     153             :         return do_sys_truncate(path, length);
     154             : }
     155             : #endif
     156             : 
     157           0 : long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
     158             : {
     159             :         struct inode *inode;
     160             :         struct dentry *dentry;
     161             :         struct fd f;
     162             :         int error;
     163             : 
     164           0 :         error = -EINVAL;
     165           0 :         if (length < 0)
     166             :                 goto out;
     167           0 :         error = -EBADF;
     168           0 :         f = fdget(fd);
     169           0 :         if (!f.file)
     170             :                 goto out;
     171             : 
     172             :         /* explicitly opened as large or we are on 64-bit box */
     173           0 :         if (f.file->f_flags & O_LARGEFILE)
     174           0 :                 small = 0;
     175             : 
     176           0 :         dentry = f.file->f_path.dentry;
     177           0 :         inode = dentry->d_inode;
     178           0 :         error = -EINVAL;
     179           0 :         if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
     180             :                 goto out_putf;
     181             : 
     182           0 :         error = -EINVAL;
     183             :         /* Cannot ftruncate over 2^31 bytes without large file support */
     184           0 :         if (small && length > MAX_NON_LFS)
     185             :                 goto out_putf;
     186             : 
     187           0 :         error = -EPERM;
     188             :         /* Check IS_APPEND on real upper inode */
     189           0 :         if (IS_APPEND(file_inode(f.file)))
     190             :                 goto out_putf;
     191           0 :         sb_start_write(inode->i_sb);
     192           0 :         error = security_file_truncate(f.file);
     193             :         if (!error)
     194           0 :                 error = do_truncate(file_mnt_idmap(f.file), dentry, length,
     195             :                                     ATTR_MTIME | ATTR_CTIME, f.file);
     196           0 :         sb_end_write(inode->i_sb);
     197             : out_putf:
     198           0 :         fdput(f);
     199             : out:
     200           0 :         return error;
     201             : }
     202             : 
     203           0 : SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
     204             : {
     205           0 :         return do_sys_ftruncate(fd, length, 1);
     206             : }
     207             : 
     208             : #ifdef CONFIG_COMPAT
     209             : COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
     210             : {
     211             :         return do_sys_ftruncate(fd, length, 1);
     212             : }
     213             : #endif
     214             : 
     215             : /* LFS versions of truncate are only needed on 32 bit machines */
     216             : #if BITS_PER_LONG == 32
     217             : SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
     218             : {
     219             :         return do_sys_truncate(path, length);
     220             : }
     221             : 
     222             : SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
     223             : {
     224             :         return do_sys_ftruncate(fd, length, 0);
     225             : }
     226             : #endif /* BITS_PER_LONG == 32 */
     227             : 
     228             : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_TRUNCATE64)
     229             : COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
     230             :                        compat_arg_u64_dual(length))
     231             : {
     232             :         return ksys_truncate(pathname, compat_arg_u64_glue(length));
     233             : }
     234             : #endif
     235             : 
     236             : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FTRUNCATE64)
     237             : COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
     238             :                        compat_arg_u64_dual(length))
     239             : {
     240             :         return ksys_ftruncate(fd, compat_arg_u64_glue(length));
     241             : }
     242             : #endif
     243             : 
     244           0 : int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
     245             : {
     246           0 :         struct inode *inode = file_inode(file);
     247             :         long ret;
     248             : 
     249           0 :         if (offset < 0 || len <= 0)
     250             :                 return -EINVAL;
     251             : 
     252             :         /* Return error if mode is not supported */
     253           0 :         if (mode & ~FALLOC_FL_SUPPORTED_MASK)
     254             :                 return -EOPNOTSUPP;
     255             : 
     256             :         /* Punch hole and zero range are mutually exclusive */
     257           0 :         if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
     258             :             (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
     259             :                 return -EOPNOTSUPP;
     260             : 
     261             :         /* Punch hole must have keep size set */
     262           0 :         if ((mode & FALLOC_FL_PUNCH_HOLE) &&
     263             :             !(mode & FALLOC_FL_KEEP_SIZE))
     264             :                 return -EOPNOTSUPP;
     265             : 
     266             :         /* Collapse range should only be used exclusively. */
     267           0 :         if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
     268           0 :             (mode & ~FALLOC_FL_COLLAPSE_RANGE))
     269             :                 return -EINVAL;
     270             : 
     271             :         /* Insert range should only be used exclusively. */
     272           0 :         if ((mode & FALLOC_FL_INSERT_RANGE) &&
     273           0 :             (mode & ~FALLOC_FL_INSERT_RANGE))
     274             :                 return -EINVAL;
     275             : 
     276             :         /* Unshare range should only be used with allocate mode. */
     277           0 :         if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
     278           0 :             (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
     279             :                 return -EINVAL;
     280             : 
     281           0 :         if (!(file->f_mode & FMODE_WRITE))
     282             :                 return -EBADF;
     283             : 
     284             :         /*
     285             :          * We can only allow pure fallocate on append only files
     286             :          */
     287           0 :         if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
     288             :                 return -EPERM;
     289             : 
     290           0 :         if (IS_IMMUTABLE(inode))
     291             :                 return -EPERM;
     292             : 
     293             :         /*
     294             :          * We cannot allow any fallocate operation on an active swapfile
     295             :          */
     296           0 :         if (IS_SWAPFILE(inode))
     297             :                 return -ETXTBSY;
     298             : 
     299             :         /*
     300             :          * Revalidate the write permissions, in case security policy has
     301             :          * changed since the files were opened.
     302             :          */
     303           0 :         ret = security_file_permission(file, MAY_WRITE);
     304             :         if (ret)
     305             :                 return ret;
     306             : 
     307           0 :         if (S_ISFIFO(inode->i_mode))
     308             :                 return -ESPIPE;
     309             : 
     310           0 :         if (S_ISDIR(inode->i_mode))
     311             :                 return -EISDIR;
     312             : 
     313           0 :         if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
     314             :                 return -ENODEV;
     315             : 
     316             :         /* Check for wrap through zero too */
     317           0 :         if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
     318             :                 return -EFBIG;
     319             : 
     320           0 :         if (!file->f_op->fallocate)
     321             :                 return -EOPNOTSUPP;
     322             : 
     323           0 :         file_start_write(file);
     324           0 :         ret = file->f_op->fallocate(file, mode, offset, len);
     325             : 
     326             :         /*
     327             :          * Create inotify and fanotify events.
     328             :          *
     329             :          * To keep the logic simple always create events if fallocate succeeds.
     330             :          * This implies that events are even created if the file size remains
     331             :          * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
     332             :          */
     333           0 :         if (ret == 0)
     334             :                 fsnotify_modify(file);
     335             : 
     336           0 :         file_end_write(file);
     337           0 :         return ret;
     338             : }
     339             : EXPORT_SYMBOL_GPL(vfs_fallocate);
     340             : 
     341           0 : int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
     342             : {
     343           0 :         struct fd f = fdget(fd);
     344           0 :         int error = -EBADF;
     345             : 
     346           0 :         if (f.file) {
     347           0 :                 error = vfs_fallocate(f.file, mode, offset, len);
     348           0 :                 fdput(f);
     349             :         }
     350           0 :         return error;
     351             : }
     352             : 
     353           0 : SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
     354             : {
     355           0 :         return ksys_fallocate(fd, mode, offset, len);
     356             : }
     357             : 
     358             : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FALLOCATE)
     359             : COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset),
     360             :                        compat_arg_u64_dual(len))
     361             : {
     362             :         return ksys_fallocate(fd, mode, compat_arg_u64_glue(offset),
     363             :                               compat_arg_u64_glue(len));
     364             : }
     365             : #endif
     366             : 
     367             : /*
     368             :  * access() needs to use the real uid/gid, not the effective uid/gid.
     369             :  * We do this by temporarily clearing all FS-related capabilities and
     370             :  * switching the fsuid/fsgid around to the real ones.
     371             :  *
     372             :  * Creating new credentials is expensive, so we try to skip doing it,
     373             :  * which we can if the result would match what we already got.
     374             :  */
     375           0 : static bool access_need_override_creds(int flags)
     376             : {
     377             :         const struct cred *cred;
     378             : 
     379           0 :         if (flags & AT_EACCESS)
     380             :                 return false;
     381             : 
     382           0 :         cred = current_cred();
     383           0 :         if (!uid_eq(cred->fsuid, cred->uid) ||
     384           0 :             !gid_eq(cred->fsgid, cred->gid))
     385             :                 return true;
     386             : 
     387           0 :         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
     388           0 :                 kuid_t root_uid = make_kuid(cred->user_ns, 0);
     389           0 :                 if (!uid_eq(cred->uid, root_uid)) {
     390           0 :                         if (!cap_isclear(cred->cap_effective))
     391             :                                 return true;
     392             :                 } else {
     393           0 :                         if (!cap_isidentical(cred->cap_effective,
     394             :                             cred->cap_permitted))
     395             :                                 return true;
     396             :                 }
     397             :         }
     398             : 
     399             :         return false;
     400             : }
     401             : 
     402           0 : static const struct cred *access_override_creds(void)
     403             : {
     404             :         const struct cred *old_cred;
     405             :         struct cred *override_cred;
     406             : 
     407           0 :         override_cred = prepare_creds();
     408           0 :         if (!override_cred)
     409             :                 return NULL;
     410             : 
     411             :         /*
     412             :          * XXX access_need_override_creds performs checks in hopes of skipping
     413             :          * this work. Make sure it stays in sync if making any changes in this
     414             :          * routine.
     415             :          */
     416             : 
     417           0 :         override_cred->fsuid = override_cred->uid;
     418           0 :         override_cred->fsgid = override_cred->gid;
     419             : 
     420           0 :         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
     421             :                 /* Clear the capabilities if we switch to a non-root user */
     422           0 :                 kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
     423           0 :                 if (!uid_eq(override_cred->uid, root_uid))
     424           0 :                         cap_clear(override_cred->cap_effective);
     425             :                 else
     426           0 :                         override_cred->cap_effective =
     427             :                                 override_cred->cap_permitted;
     428             :         }
     429             : 
     430             :         /*
     431             :          * The new set of credentials can *only* be used in
     432             :          * task-synchronous circumstances, and does not need
     433             :          * RCU freeing, unless somebody then takes a separate
     434             :          * reference to it.
     435             :          *
     436             :          * NOTE! This is _only_ true because this credential
     437             :          * is used purely for override_creds() that installs
     438             :          * it as the subjective cred. Other threads will be
     439             :          * accessing ->real_cred, not the subjective cred.
     440             :          *
     441             :          * If somebody _does_ make a copy of this (using the
     442             :          * 'get_current_cred()' function), that will clear the
     443             :          * non_rcu field, because now that other user may be
     444             :          * expecting RCU freeing. But normal thread-synchronous
     445             :          * cred accesses will keep things non-RCY.
     446             :          */
     447           0 :         override_cred->non_rcu = 1;
     448             : 
     449           0 :         old_cred = override_creds(override_cred);
     450             : 
     451             :         /* override_cred() gets its own ref */
     452             :         put_cred(override_cred);
     453             : 
     454             :         return old_cred;
     455             : }
     456             : 
     457           0 : static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
     458             : {
     459             :         struct path path;
     460             :         struct inode *inode;
     461             :         int res;
     462           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW;
     463           0 :         const struct cred *old_cred = NULL;
     464             : 
     465           0 :         if (mode & ~S_IRWXO)        /* where's F_OK, X_OK, W_OK, R_OK? */
     466             :                 return -EINVAL;
     467             : 
     468           0 :         if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
     469             :                 return -EINVAL;
     470             : 
     471           0 :         if (flags & AT_SYMLINK_NOFOLLOW)
     472           0 :                 lookup_flags &= ~LOOKUP_FOLLOW;
     473           0 :         if (flags & AT_EMPTY_PATH)
     474           0 :                 lookup_flags |= LOOKUP_EMPTY;
     475             : 
     476           0 :         if (access_need_override_creds(flags)) {
     477           0 :                 old_cred = access_override_creds();
     478           0 :                 if (!old_cred)
     479             :                         return -ENOMEM;
     480             :         }
     481             : 
     482             : retry:
     483           0 :         res = user_path_at(dfd, filename, lookup_flags, &path);
     484           0 :         if (res)
     485             :                 goto out;
     486             : 
     487           0 :         inode = d_backing_inode(path.dentry);
     488             : 
     489           0 :         if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
     490             :                 /*
     491             :                  * MAY_EXEC on regular files is denied if the fs is mounted
     492             :                  * with the "noexec" flag.
     493             :                  */
     494           0 :                 res = -EACCES;
     495           0 :                 if (path_noexec(&path))
     496             :                         goto out_path_release;
     497             :         }
     498             : 
     499           0 :         res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS);
     500             :         /* SuS v2 requires we report a read only fs too */
     501           0 :         if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
     502             :                 goto out_path_release;
     503             :         /*
     504             :          * This is a rare case where using __mnt_is_readonly()
     505             :          * is OK without a mnt_want/drop_write() pair.  Since
     506             :          * no actual write to the fs is performed here, we do
     507             :          * not need to telegraph to that to anyone.
     508             :          *
     509             :          * By doing this, we accept that this access is
     510             :          * inherently racy and know that the fs may change
     511             :          * state before we even see this result.
     512             :          */
     513           0 :         if (__mnt_is_readonly(path.mnt))
     514           0 :                 res = -EROFS;
     515             : 
     516             : out_path_release:
     517           0 :         path_put(&path);
     518           0 :         if (retry_estale(res, lookup_flags)) {
     519           0 :                 lookup_flags |= LOOKUP_REVAL;
     520           0 :                 goto retry;
     521             :         }
     522             : out:
     523           0 :         if (old_cred)
     524           0 :                 revert_creds(old_cred);
     525             : 
     526           0 :         return res;
     527             : }
     528             : 
     529           0 : SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
     530             : {
     531           0 :         return do_faccessat(dfd, filename, mode, 0);
     532             : }
     533             : 
     534           0 : SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
     535             :                 int, flags)
     536             : {
     537           0 :         return do_faccessat(dfd, filename, mode, flags);
     538             : }
     539             : 
     540           0 : SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
     541             : {
     542           0 :         return do_faccessat(AT_FDCWD, filename, mode, 0);
     543             : }
     544             : 
     545           0 : SYSCALL_DEFINE1(chdir, const char __user *, filename)
     546             : {
     547             :         struct path path;
     548             :         int error;
     549           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
     550             : retry:
     551           0 :         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
     552           0 :         if (error)
     553             :                 goto out;
     554             : 
     555           0 :         error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
     556           0 :         if (error)
     557             :                 goto dput_and_out;
     558             : 
     559           0 :         set_fs_pwd(current->fs, &path);
     560             : 
     561             : dput_and_out:
     562           0 :         path_put(&path);
     563           0 :         if (retry_estale(error, lookup_flags)) {
     564             :                 lookup_flags |= LOOKUP_REVAL;
     565             :                 goto retry;
     566             :         }
     567             : out:
     568           0 :         return error;
     569             : }
     570             : 
     571           0 : SYSCALL_DEFINE1(fchdir, unsigned int, fd)
     572             : {
     573           0 :         struct fd f = fdget_raw(fd);
     574             :         int error;
     575             : 
     576           0 :         error = -EBADF;
     577           0 :         if (!f.file)
     578             :                 goto out;
     579             : 
     580           0 :         error = -ENOTDIR;
     581           0 :         if (!d_can_lookup(f.file->f_path.dentry))
     582             :                 goto out_putf;
     583             : 
     584           0 :         error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
     585           0 :         if (!error)
     586           0 :                 set_fs_pwd(current->fs, &f.file->f_path);
     587             : out_putf:
     588           0 :         fdput(f);
     589             : out:
     590           0 :         return error;
     591             : }
     592             : 
     593           0 : SYSCALL_DEFINE1(chroot, const char __user *, filename)
     594             : {
     595             :         struct path path;
     596             :         int error;
     597           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
     598             : retry:
     599           0 :         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
     600           0 :         if (error)
     601             :                 goto out;
     602             : 
     603           0 :         error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
     604           0 :         if (error)
     605             :                 goto dput_and_out;
     606             : 
     607           0 :         error = -EPERM;
     608           0 :         if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
     609             :                 goto dput_and_out;
     610           0 :         error = security_path_chroot(&path);
     611             :         if (error)
     612             :                 goto dput_and_out;
     613             : 
     614           0 :         set_fs_root(current->fs, &path);
     615           0 :         error = 0;
     616             : dput_and_out:
     617           0 :         path_put(&path);
     618           0 :         if (retry_estale(error, lookup_flags)) {
     619             :                 lookup_flags |= LOOKUP_REVAL;
     620             :                 goto retry;
     621             :         }
     622             : out:
     623           0 :         return error;
     624             : }
     625             : 
     626           0 : int chmod_common(const struct path *path, umode_t mode)
     627             : {
     628           0 :         struct inode *inode = path->dentry->d_inode;
     629           0 :         struct inode *delegated_inode = NULL;
     630             :         struct iattr newattrs;
     631             :         int error;
     632             : 
     633           0 :         error = mnt_want_write(path->mnt);
     634           0 :         if (error)
     635             :                 return error;
     636             : retry_deleg:
     637           0 :         inode_lock(inode);
     638           0 :         error = security_path_chmod(path, mode);
     639             :         if (error)
     640             :                 goto out_unlock;
     641           0 :         newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
     642           0 :         newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
     643           0 :         error = notify_change(mnt_idmap(path->mnt), path->dentry,
     644             :                               &newattrs, &delegated_inode);
     645             : out_unlock:
     646           0 :         inode_unlock(inode);
     647           0 :         if (delegated_inode) {
     648           0 :                 error = break_deleg_wait(&delegated_inode);
     649           0 :                 if (!error)
     650             :                         goto retry_deleg;
     651             :         }
     652           0 :         mnt_drop_write(path->mnt);
     653           0 :         return error;
     654             : }
     655             : 
     656           0 : int vfs_fchmod(struct file *file, umode_t mode)
     657             : {
     658           0 :         audit_file(file);
     659           0 :         return chmod_common(&file->f_path, mode);
     660             : }
     661             : 
     662           0 : SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
     663             : {
     664           0 :         struct fd f = fdget(fd);
     665           0 :         int err = -EBADF;
     666             : 
     667           0 :         if (f.file) {
     668           0 :                 err = vfs_fchmod(f.file, mode);
     669           0 :                 fdput(f);
     670             :         }
     671           0 :         return err;
     672             : }
     673             : 
     674           0 : static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
     675             : {
     676             :         struct path path;
     677             :         int error;
     678           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW;
     679             : retry:
     680           0 :         error = user_path_at(dfd, filename, lookup_flags, &path);
     681           0 :         if (!error) {
     682           0 :                 error = chmod_common(&path, mode);
     683           0 :                 path_put(&path);
     684           0 :                 if (retry_estale(error, lookup_flags)) {
     685             :                         lookup_flags |= LOOKUP_REVAL;
     686             :                         goto retry;
     687             :                 }
     688             :         }
     689           0 :         return error;
     690             : }
     691             : 
     692           0 : SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
     693             :                 umode_t, mode)
     694             : {
     695           0 :         return do_fchmodat(dfd, filename, mode);
     696             : }
     697             : 
     698           0 : SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
     699             : {
     700           0 :         return do_fchmodat(AT_FDCWD, filename, mode);
     701             : }
     702             : 
     703             : /*
     704             :  * Check whether @kuid is valid and if so generate and set vfsuid_t in
     705             :  * ia_vfsuid.
     706             :  *
     707             :  * Return: true if @kuid is valid, false if not.
     708             :  */
     709             : static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
     710             : {
     711           0 :         if (!uid_valid(kuid))
     712             :                 return false;
     713           0 :         attr->ia_valid |= ATTR_UID;
     714           0 :         attr->ia_vfsuid = VFSUIDT_INIT(kuid);
     715             :         return true;
     716             : }
     717             : 
     718             : /*
     719             :  * Check whether @kgid is valid and if so generate and set vfsgid_t in
     720             :  * ia_vfsgid.
     721             :  *
     722             :  * Return: true if @kgid is valid, false if not.
     723             :  */
     724             : static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
     725             : {
     726           0 :         if (!gid_valid(kgid))
     727             :                 return false;
     728           0 :         attr->ia_valid |= ATTR_GID;
     729           0 :         attr->ia_vfsgid = VFSGIDT_INIT(kgid);
     730             :         return true;
     731             : }
     732             : 
     733           0 : int chown_common(const struct path *path, uid_t user, gid_t group)
     734             : {
     735             :         struct mnt_idmap *idmap;
     736             :         struct user_namespace *fs_userns;
     737           0 :         struct inode *inode = path->dentry->d_inode;
     738           0 :         struct inode *delegated_inode = NULL;
     739             :         int error;
     740             :         struct iattr newattrs;
     741             :         kuid_t uid;
     742             :         kgid_t gid;
     743             : 
     744           0 :         uid = make_kuid(current_user_ns(), user);
     745           0 :         gid = make_kgid(current_user_ns(), group);
     746             : 
     747           0 :         idmap = mnt_idmap(path->mnt);
     748           0 :         fs_userns = i_user_ns(inode);
     749             : 
     750             : retry_deleg:
     751           0 :         newattrs.ia_vfsuid = INVALID_VFSUID;
     752           0 :         newattrs.ia_vfsgid = INVALID_VFSGID;
     753           0 :         newattrs.ia_valid =  ATTR_CTIME;
     754           0 :         if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
     755             :                 return -EINVAL;
     756           0 :         if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
     757             :                 return -EINVAL;
     758           0 :         inode_lock(inode);
     759           0 :         if (!S_ISDIR(inode->i_mode))
     760           0 :                 newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
     761           0 :                                      setattr_should_drop_sgid(idmap, inode);
     762             :         /* Continue to send actual fs values, not the mount values. */
     763           0 :         error = security_path_chown(
     764             :                 path,
     765             :                 from_vfsuid(idmap, fs_userns, newattrs.ia_vfsuid),
     766             :                 from_vfsgid(idmap, fs_userns, newattrs.ia_vfsgid));
     767             :         if (!error)
     768           0 :                 error = notify_change(idmap, path->dentry, &newattrs,
     769             :                                       &delegated_inode);
     770           0 :         inode_unlock(inode);
     771           0 :         if (delegated_inode) {
     772           0 :                 error = break_deleg_wait(&delegated_inode);
     773           0 :                 if (!error)
     774             :                         goto retry_deleg;
     775             :         }
     776             :         return error;
     777             : }
     778             : 
     779           0 : int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
     780             :                 int flag)
     781             : {
     782             :         struct path path;
     783           0 :         int error = -EINVAL;
     784             :         int lookup_flags;
     785             : 
     786           0 :         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
     787             :                 goto out;
     788             : 
     789           0 :         lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
     790           0 :         if (flag & AT_EMPTY_PATH)
     791           0 :                 lookup_flags |= LOOKUP_EMPTY;
     792             : retry:
     793           0 :         error = user_path_at(dfd, filename, lookup_flags, &path);
     794           0 :         if (error)
     795             :                 goto out;
     796           0 :         error = mnt_want_write(path.mnt);
     797           0 :         if (error)
     798             :                 goto out_release;
     799           0 :         error = chown_common(&path, user, group);
     800           0 :         mnt_drop_write(path.mnt);
     801             : out_release:
     802           0 :         path_put(&path);
     803           0 :         if (retry_estale(error, lookup_flags)) {
     804           0 :                 lookup_flags |= LOOKUP_REVAL;
     805           0 :                 goto retry;
     806             :         }
     807             : out:
     808           0 :         return error;
     809             : }
     810             : 
     811           0 : SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
     812             :                 gid_t, group, int, flag)
     813             : {
     814           0 :         return do_fchownat(dfd, filename, user, group, flag);
     815             : }
     816             : 
     817           0 : SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
     818             : {
     819           0 :         return do_fchownat(AT_FDCWD, filename, user, group, 0);
     820             : }
     821             : 
     822           0 : SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
     823             : {
     824           0 :         return do_fchownat(AT_FDCWD, filename, user, group,
     825             :                            AT_SYMLINK_NOFOLLOW);
     826             : }
     827             : 
     828           0 : int vfs_fchown(struct file *file, uid_t user, gid_t group)
     829             : {
     830             :         int error;
     831             : 
     832           0 :         error = mnt_want_write_file(file);
     833           0 :         if (error)
     834             :                 return error;
     835           0 :         audit_file(file);
     836           0 :         error = chown_common(&file->f_path, user, group);
     837           0 :         mnt_drop_write_file(file);
     838           0 :         return error;
     839             : }
     840             : 
     841           0 : int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
     842             : {
     843           0 :         struct fd f = fdget(fd);
     844           0 :         int error = -EBADF;
     845             : 
     846           0 :         if (f.file) {
     847           0 :                 error = vfs_fchown(f.file, user, group);
     848           0 :                 fdput(f);
     849             :         }
     850           0 :         return error;
     851             : }
     852             : 
     853           0 : SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
     854             : {
     855           0 :         return ksys_fchown(fd, user, group);
     856             : }
     857             : 
     858           0 : static int do_dentry_open(struct file *f,
     859             :                           struct inode *inode,
     860             :                           int (*open)(struct inode *, struct file *))
     861             : {
     862             :         static const struct file_operations empty_fops = {};
     863             :         int error;
     864             : 
     865           0 :         path_get(&f->f_path);
     866           0 :         f->f_inode = inode;
     867           0 :         f->f_mapping = inode->i_mapping;
     868           0 :         f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
     869           0 :         f->f_sb_err = file_sample_sb_err(f);
     870             : 
     871           0 :         if (unlikely(f->f_flags & O_PATH)) {
     872           0 :                 f->f_mode = FMODE_PATH | FMODE_OPENED;
     873           0 :                 f->f_op = &empty_fops;
     874           0 :                 return 0;
     875             :         }
     876             : 
     877           0 :         if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
     878             :                 i_readcount_inc(inode);
     879           0 :         } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
     880           0 :                 error = get_write_access(inode);
     881           0 :                 if (unlikely(error))
     882             :                         goto cleanup_file;
     883           0 :                 error = __mnt_want_write(f->f_path.mnt);
     884           0 :                 if (unlikely(error)) {
     885             :                         put_write_access(inode);
     886             :                         goto cleanup_file;
     887             :                 }
     888           0 :                 f->f_mode |= FMODE_WRITER;
     889             :         }
     890             : 
     891             :         /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
     892           0 :         if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
     893           0 :                 f->f_mode |= FMODE_ATOMIC_POS;
     894             : 
     895           0 :         f->f_op = fops_get(inode->i_fop);
     896           0 :         if (WARN_ON(!f->f_op)) {
     897             :                 error = -ENODEV;
     898             :                 goto cleanup_all;
     899             :         }
     900             : 
     901           0 :         error = security_file_open(f);
     902             :         if (error)
     903             :                 goto cleanup_all;
     904             : 
     905           0 :         error = break_lease(file_inode(f), f->f_flags);
     906           0 :         if (error)
     907             :                 goto cleanup_all;
     908             : 
     909             :         /* normally all 3 are set; ->open() can clear them if needed */
     910           0 :         f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
     911           0 :         if (!open)
     912           0 :                 open = f->f_op->open;
     913           0 :         if (open) {
     914           0 :                 error = open(inode, f);
     915           0 :                 if (error)
     916             :                         goto cleanup_all;
     917             :         }
     918           0 :         f->f_mode |= FMODE_OPENED;
     919           0 :         if ((f->f_mode & FMODE_READ) &&
     920           0 :              likely(f->f_op->read || f->f_op->read_iter))
     921           0 :                 f->f_mode |= FMODE_CAN_READ;
     922           0 :         if ((f->f_mode & FMODE_WRITE) &&
     923           0 :              likely(f->f_op->write || f->f_op->write_iter))
     924           0 :                 f->f_mode |= FMODE_CAN_WRITE;
     925           0 :         if ((f->f_mode & FMODE_LSEEK) && !f->f_op->llseek)
     926           0 :                 f->f_mode &= ~FMODE_LSEEK;
     927           0 :         if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO)
     928           0 :                 f->f_mode |= FMODE_CAN_ODIRECT;
     929             : 
     930           0 :         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
     931           0 :         f->f_iocb_flags = iocb_flags(f);
     932             : 
     933           0 :         file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
     934             : 
     935           0 :         if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
     936             :                 return -EINVAL;
     937             : 
     938             :         /*
     939             :          * XXX: Huge page cache doesn't support writing yet. Drop all page
     940             :          * cache for this file before processing writes.
     941             :          */
     942           0 :         if (f->f_mode & FMODE_WRITE) {
     943             :                 /*
     944             :                  * Paired with smp_mb() in collapse_file() to ensure nr_thps
     945             :                  * is up to date and the update to i_writecount by
     946             :                  * get_write_access() is visible. Ensures subsequent insertion
     947             :                  * of THPs into the page cache will fail.
     948             :                  */
     949           0 :                 smp_mb();
     950           0 :                 if (filemap_nr_thps(inode->i_mapping)) {
     951             :                         struct address_space *mapping = inode->i_mapping;
     952             : 
     953             :                         filemap_invalidate_lock(inode->i_mapping);
     954             :                         /*
     955             :                          * unmap_mapping_range just need to be called once
     956             :                          * here, because the private pages is not need to be
     957             :                          * unmapped mapping (e.g. data segment of dynamic
     958             :                          * shared libraries here).
     959             :                          */
     960             :                         unmap_mapping_range(mapping, 0, 0, 0);
     961             :                         truncate_inode_pages(mapping, 0);
     962             :                         filemap_invalidate_unlock(inode->i_mapping);
     963             :                 }
     964             :         }
     965             : 
     966             :         /*
     967             :          * Once we return a file with FMODE_OPENED, __fput() will call
     968             :          * fsnotify_close(), so we need fsnotify_open() here for symmetry.
     969             :          */
     970           0 :         fsnotify_open(f);
     971           0 :         return 0;
     972             : 
     973             : cleanup_all:
     974           0 :         if (WARN_ON_ONCE(error > 0))
     975           0 :                 error = -EINVAL;
     976           0 :         fops_put(f->f_op);
     977           0 :         put_file_access(f);
     978             : cleanup_file:
     979           0 :         path_put(&f->f_path);
     980           0 :         f->f_path.mnt = NULL;
     981           0 :         f->f_path.dentry = NULL;
     982           0 :         f->f_inode = NULL;
     983           0 :         return error;
     984             : }
     985             : 
     986             : /**
     987             :  * finish_open - finish opening a file
     988             :  * @file: file pointer
     989             :  * @dentry: pointer to dentry
     990             :  * @open: open callback
     991             :  *
     992             :  * This can be used to finish opening a file passed to i_op->atomic_open().
     993             :  *
     994             :  * If the open callback is set to NULL, then the standard f_op->open()
     995             :  * filesystem callback is substituted.
     996             :  *
     997             :  * NB: the dentry reference is _not_ consumed.  If, for example, the dentry is
     998             :  * the return value of d_splice_alias(), then the caller needs to perform dput()
     999             :  * on it after finish_open().
    1000             :  *
    1001             :  * Returns zero on success or -errno if the open failed.
    1002             :  */
    1003           0 : int finish_open(struct file *file, struct dentry *dentry,
    1004             :                 int (*open)(struct inode *, struct file *))
    1005             : {
    1006           0 :         BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
    1007             : 
    1008           0 :         file->f_path.dentry = dentry;
    1009           0 :         return do_dentry_open(file, d_backing_inode(dentry), open);
    1010             : }
    1011             : EXPORT_SYMBOL(finish_open);
    1012             : 
    1013             : /**
    1014             :  * finish_no_open - finish ->atomic_open() without opening the file
    1015             :  *
    1016             :  * @file: file pointer
    1017             :  * @dentry: dentry or NULL (as returned from ->lookup())
    1018             :  *
    1019             :  * This can be used to set the result of a successful lookup in ->atomic_open().
    1020             :  *
    1021             :  * NB: unlike finish_open() this function does consume the dentry reference and
    1022             :  * the caller need not dput() it.
    1023             :  *
    1024             :  * Returns "0" which must be the return value of ->atomic_open() after having
    1025             :  * called this function.
    1026             :  */
    1027           0 : int finish_no_open(struct file *file, struct dentry *dentry)
    1028             : {
    1029           0 :         file->f_path.dentry = dentry;
    1030           0 :         return 0;
    1031             : }
    1032             : EXPORT_SYMBOL(finish_no_open);
    1033             : 
    1034           0 : char *file_path(struct file *filp, char *buf, int buflen)
    1035             : {
    1036           0 :         return d_path(&filp->f_path, buf, buflen);
    1037             : }
    1038             : EXPORT_SYMBOL(file_path);
    1039             : 
    1040             : /**
    1041             :  * vfs_open - open the file at the given path
    1042             :  * @path: path to open
    1043             :  * @file: newly allocated file with f_flag initialized
    1044             :  */
    1045           0 : int vfs_open(const struct path *path, struct file *file)
    1046             : {
    1047           0 :         file->f_path = *path;
    1048           0 :         return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
    1049             : }
    1050             : 
    1051           0 : struct file *dentry_open(const struct path *path, int flags,
    1052             :                          const struct cred *cred)
    1053             : {
    1054             :         int error;
    1055             :         struct file *f;
    1056             : 
    1057           0 :         validate_creds(cred);
    1058             : 
    1059             :         /* We must always pass in a valid mount pointer. */
    1060           0 :         BUG_ON(!path->mnt);
    1061             : 
    1062           0 :         f = alloc_empty_file(flags, cred);
    1063           0 :         if (!IS_ERR(f)) {
    1064           0 :                 error = vfs_open(path, f);
    1065           0 :                 if (error) {
    1066           0 :                         fput(f);
    1067           0 :                         f = ERR_PTR(error);
    1068             :                 }
    1069             :         }
    1070           0 :         return f;
    1071             : }
    1072             : EXPORT_SYMBOL(dentry_open);
    1073             : 
    1074             : /**
    1075             :  * dentry_create - Create and open a file
    1076             :  * @path: path to create
    1077             :  * @flags: O_ flags
    1078             :  * @mode: mode bits for new file
    1079             :  * @cred: credentials to use
    1080             :  *
    1081             :  * Caller must hold the parent directory's lock, and have prepared
    1082             :  * a negative dentry, placed in @path->dentry, for the new file.
    1083             :  *
    1084             :  * Caller sets @path->mnt to the vfsmount of the filesystem where
    1085             :  * the new file is to be created. The parent directory and the
    1086             :  * negative dentry must reside on the same filesystem instance.
    1087             :  *
    1088             :  * On success, returns a "struct file *". Otherwise a ERR_PTR
    1089             :  * is returned.
    1090             :  */
    1091           0 : struct file *dentry_create(const struct path *path, int flags, umode_t mode,
    1092             :                            const struct cred *cred)
    1093             : {
    1094             :         struct file *f;
    1095             :         int error;
    1096             : 
    1097           0 :         validate_creds(cred);
    1098           0 :         f = alloc_empty_file(flags, cred);
    1099           0 :         if (IS_ERR(f))
    1100             :                 return f;
    1101             : 
    1102           0 :         error = vfs_create(mnt_idmap(path->mnt),
    1103           0 :                            d_inode(path->dentry->d_parent),
    1104             :                            path->dentry, mode, true);
    1105           0 :         if (!error)
    1106           0 :                 error = vfs_open(path, f);
    1107             : 
    1108           0 :         if (unlikely(error)) {
    1109           0 :                 fput(f);
    1110           0 :                 return ERR_PTR(error);
    1111             :         }
    1112             :         return f;
    1113             : }
    1114             : EXPORT_SYMBOL(dentry_create);
    1115             : 
    1116             : /**
    1117             :  * kernel_file_open - open a file for kernel internal use
    1118             :  * @path:       path of the file to open
    1119             :  * @flags:      open flags
    1120             :  * @inode:      the inode
    1121             :  * @cred:       credentials for open
    1122             :  *
    1123             :  * Open a file for use by in-kernel consumers. The file is not accounted
    1124             :  * against nr_files and must not be installed into the file descriptor
    1125             :  * table.
    1126             :  *
    1127             :  * Return: Opened file on success, an error pointer on failure.
    1128             :  */
    1129           0 : struct file *kernel_file_open(const struct path *path, int flags,
    1130             :                                 struct inode *inode, const struct cred *cred)
    1131             : {
    1132             :         struct file *f;
    1133             :         int error;
    1134             : 
    1135           0 :         f = alloc_empty_file_noaccount(flags, cred);
    1136           0 :         if (IS_ERR(f))
    1137             :                 return f;
    1138             : 
    1139           0 :         f->f_path = *path;
    1140           0 :         error = do_dentry_open(f, inode, NULL);
    1141           0 :         if (error) {
    1142           0 :                 fput(f);
    1143           0 :                 f = ERR_PTR(error);
    1144             :         }
    1145             :         return f;
    1146             : }
    1147             : EXPORT_SYMBOL_GPL(kernel_file_open);
    1148             : 
    1149             : /**
    1150             :  * backing_file_open - open a backing file for kernel internal use
    1151             :  * @path:       path of the file to open
    1152             :  * @flags:      open flags
    1153             :  * @path:       path of the backing file
    1154             :  * @cred:       credentials for open
    1155             :  *
    1156             :  * Open a backing file for a stackable filesystem (e.g., overlayfs).
    1157             :  * @path may be on the stackable filesystem and backing inode on the
    1158             :  * underlying filesystem. In this case, we want to be able to return
    1159             :  * the @real_path of the backing inode. This is done by embedding the
    1160             :  * returned file into a container structure that also stores the path of
    1161             :  * the backing inode on the underlying filesystem, which can be
    1162             :  * retrieved using backing_file_real_path().
    1163             :  */
    1164           0 : struct file *backing_file_open(const struct path *path, int flags,
    1165             :                                const struct path *real_path,
    1166             :                                const struct cred *cred)
    1167             : {
    1168             :         struct file *f;
    1169             :         int error;
    1170             : 
    1171           0 :         f = alloc_empty_backing_file(flags, cred);
    1172           0 :         if (IS_ERR(f))
    1173             :                 return f;
    1174             : 
    1175           0 :         f->f_path = *path;
    1176           0 :         path_get(real_path);
    1177           0 :         *backing_file_real_path(f) = *real_path;
    1178           0 :         error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
    1179           0 :         if (error) {
    1180           0 :                 fput(f);
    1181           0 :                 f = ERR_PTR(error);
    1182             :         }
    1183             : 
    1184             :         return f;
    1185             : }
    1186             : EXPORT_SYMBOL_GPL(backing_file_open);
    1187             : 
    1188             : #define WILL_CREATE(flags)      (flags & (O_CREAT | __O_TMPFILE))
    1189             : #define O_PATH_FLAGS            (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
    1190             : 
    1191           0 : inline struct open_how build_open_how(int flags, umode_t mode)
    1192             : {
    1193           0 :         struct open_how how = {
    1194           0 :                 .flags = flags & VALID_OPEN_FLAGS,
    1195           0 :                 .mode = mode & S_IALLUGO,
    1196             :         };
    1197             : 
    1198             :         /* O_PATH beats everything else. */
    1199           0 :         if (how.flags & O_PATH)
    1200           0 :                 how.flags &= O_PATH_FLAGS;
    1201             :         /* Modes should only be set for create-like flags. */
    1202           0 :         if (!WILL_CREATE(how.flags))
    1203           0 :                 how.mode = 0;
    1204           0 :         return how;
    1205             : }
    1206             : 
    1207           0 : inline int build_open_flags(const struct open_how *how, struct open_flags *op)
    1208             : {
    1209           0 :         u64 flags = how->flags;
    1210           0 :         u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
    1211           0 :         int lookup_flags = 0;
    1212           0 :         int acc_mode = ACC_MODE(flags);
    1213             : 
    1214             :         BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
    1215             :                          "struct open_flags doesn't yet handle flags > 32 bits");
    1216             : 
    1217             :         /*
    1218             :          * Strip flags that either shouldn't be set by userspace like
    1219             :          * FMODE_NONOTIFY or that aren't relevant in determining struct
    1220             :          * open_flags like O_CLOEXEC.
    1221             :          */
    1222           0 :         flags &= ~strip;
    1223             : 
    1224             :         /*
    1225             :          * Older syscalls implicitly clear all of the invalid flags or argument
    1226             :          * values before calling build_open_flags(), but openat2(2) checks all
    1227             :          * of its arguments.
    1228             :          */
    1229           0 :         if (flags & ~VALID_OPEN_FLAGS)
    1230             :                 return -EINVAL;
    1231           0 :         if (how->resolve & ~VALID_RESOLVE_FLAGS)
    1232             :                 return -EINVAL;
    1233             : 
    1234             :         /* Scoping flags are mutually exclusive. */
    1235           0 :         if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
    1236             :                 return -EINVAL;
    1237             : 
    1238             :         /* Deal with the mode. */
    1239           0 :         if (WILL_CREATE(flags)) {
    1240           0 :                 if (how->mode & ~S_IALLUGO)
    1241             :                         return -EINVAL;
    1242           0 :                 op->mode = how->mode | S_IFREG;
    1243             :         } else {
    1244           0 :                 if (how->mode != 0)
    1245             :                         return -EINVAL;
    1246           0 :                 op->mode = 0;
    1247             :         }
    1248             : 
    1249             :         /*
    1250             :          * Block bugs where O_DIRECTORY | O_CREAT created regular files.
    1251             :          * Note, that blocking O_DIRECTORY | O_CREAT here also protects
    1252             :          * O_TMPFILE below which requires O_DIRECTORY being raised.
    1253             :          */
    1254           0 :         if ((flags & (O_DIRECTORY | O_CREAT)) == (O_DIRECTORY | O_CREAT))
    1255             :                 return -EINVAL;
    1256             : 
    1257             :         /* Now handle the creative implementation of O_TMPFILE. */
    1258           0 :         if (flags & __O_TMPFILE) {
    1259             :                 /*
    1260             :                  * In order to ensure programs get explicit errors when trying
    1261             :                  * to use O_TMPFILE on old kernels we enforce that O_DIRECTORY
    1262             :                  * is raised alongside __O_TMPFILE.
    1263             :                  */
    1264           0 :                 if (!(flags & O_DIRECTORY))
    1265             :                         return -EINVAL;
    1266           0 :                 if (!(acc_mode & MAY_WRITE))
    1267             :                         return -EINVAL;
    1268             :         }
    1269           0 :         if (flags & O_PATH) {
    1270             :                 /* O_PATH only permits certain other flags to be set. */
    1271           0 :                 if (flags & ~O_PATH_FLAGS)
    1272             :                         return -EINVAL;
    1273             :                 acc_mode = 0;
    1274             :         }
    1275             : 
    1276             :         /*
    1277             :          * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
    1278             :          * check for O_DSYNC if the need any syncing at all we enforce it's
    1279             :          * always set instead of having to deal with possibly weird behaviour
    1280             :          * for malicious applications setting only __O_SYNC.
    1281             :          */
    1282           0 :         if (flags & __O_SYNC)
    1283           0 :                 flags |= O_DSYNC;
    1284             : 
    1285           0 :         op->open_flag = flags;
    1286             : 
    1287             :         /* O_TRUNC implies we need access checks for write permissions */
    1288           0 :         if (flags & O_TRUNC)
    1289           0 :                 acc_mode |= MAY_WRITE;
    1290             : 
    1291             :         /* Allow the LSM permission hook to distinguish append
    1292             :            access from general write access. */
    1293           0 :         if (flags & O_APPEND)
    1294           0 :                 acc_mode |= MAY_APPEND;
    1295             : 
    1296           0 :         op->acc_mode = acc_mode;
    1297             : 
    1298           0 :         op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
    1299             : 
    1300           0 :         if (flags & O_CREAT) {
    1301           0 :                 op->intent |= LOOKUP_CREATE;
    1302           0 :                 if (flags & O_EXCL) {
    1303           0 :                         op->intent |= LOOKUP_EXCL;
    1304           0 :                         flags |= O_NOFOLLOW;
    1305             :                 }
    1306             :         }
    1307             : 
    1308           0 :         if (flags & O_DIRECTORY)
    1309           0 :                 lookup_flags |= LOOKUP_DIRECTORY;
    1310           0 :         if (!(flags & O_NOFOLLOW))
    1311           0 :                 lookup_flags |= LOOKUP_FOLLOW;
    1312             : 
    1313           0 :         if (how->resolve & RESOLVE_NO_XDEV)
    1314           0 :                 lookup_flags |= LOOKUP_NO_XDEV;
    1315           0 :         if (how->resolve & RESOLVE_NO_MAGICLINKS)
    1316           0 :                 lookup_flags |= LOOKUP_NO_MAGICLINKS;
    1317           0 :         if (how->resolve & RESOLVE_NO_SYMLINKS)
    1318           0 :                 lookup_flags |= LOOKUP_NO_SYMLINKS;
    1319           0 :         if (how->resolve & RESOLVE_BENEATH)
    1320           0 :                 lookup_flags |= LOOKUP_BENEATH;
    1321           0 :         if (how->resolve & RESOLVE_IN_ROOT)
    1322           0 :                 lookup_flags |= LOOKUP_IN_ROOT;
    1323           0 :         if (how->resolve & RESOLVE_CACHED) {
    1324             :                 /* Don't bother even trying for create/truncate/tmpfile open */
    1325           0 :                 if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
    1326             :                         return -EAGAIN;
    1327           0 :                 lookup_flags |= LOOKUP_CACHED;
    1328             :         }
    1329             : 
    1330           0 :         op->lookup_flags = lookup_flags;
    1331           0 :         return 0;
    1332             : }
    1333             : 
    1334             : /**
    1335             :  * file_open_name - open file and return file pointer
    1336             :  *
    1337             :  * @name:       struct filename containing path to open
    1338             :  * @flags:      open flags as per the open(2) second argument
    1339             :  * @mode:       mode for the new file if O_CREAT is set, else ignored
    1340             :  *
    1341             :  * This is the helper to open a file from kernelspace if you really
    1342             :  * have to.  But in generally you should not do this, so please move
    1343             :  * along, nothing to see here..
    1344             :  */
    1345           0 : struct file *file_open_name(struct filename *name, int flags, umode_t mode)
    1346             : {
    1347             :         struct open_flags op;
    1348           0 :         struct open_how how = build_open_how(flags, mode);
    1349           0 :         int err = build_open_flags(&how, &op);
    1350           0 :         if (err)
    1351           0 :                 return ERR_PTR(err);
    1352           0 :         return do_filp_open(AT_FDCWD, name, &op);
    1353             : }
    1354             : 
    1355             : /**
    1356             :  * filp_open - open file and return file pointer
    1357             :  *
    1358             :  * @filename:   path to open
    1359             :  * @flags:      open flags as per the open(2) second argument
    1360             :  * @mode:       mode for the new file if O_CREAT is set, else ignored
    1361             :  *
    1362             :  * This is the helper to open a file from kernelspace if you really
    1363             :  * have to.  But in generally you should not do this, so please move
    1364             :  * along, nothing to see here..
    1365             :  */
    1366           0 : struct file *filp_open(const char *filename, int flags, umode_t mode)
    1367             : {
    1368           0 :         struct filename *name = getname_kernel(filename);
    1369           0 :         struct file *file = ERR_CAST(name);
    1370             :         
    1371           0 :         if (!IS_ERR(name)) {
    1372           0 :                 file = file_open_name(name, flags, mode);
    1373           0 :                 putname(name);
    1374             :         }
    1375           0 :         return file;
    1376             : }
    1377             : EXPORT_SYMBOL(filp_open);
    1378             : 
    1379           0 : struct file *file_open_root(const struct path *root,
    1380             :                             const char *filename, int flags, umode_t mode)
    1381             : {
    1382             :         struct open_flags op;
    1383           0 :         struct open_how how = build_open_how(flags, mode);
    1384           0 :         int err = build_open_flags(&how, &op);
    1385           0 :         if (err)
    1386           0 :                 return ERR_PTR(err);
    1387           0 :         return do_file_open_root(root, filename, &op);
    1388             : }
    1389             : EXPORT_SYMBOL(file_open_root);
    1390             : 
    1391           0 : static long do_sys_openat2(int dfd, const char __user *filename,
    1392             :                            struct open_how *how)
    1393             : {
    1394             :         struct open_flags op;
    1395           0 :         int fd = build_open_flags(how, &op);
    1396             :         struct filename *tmp;
    1397             : 
    1398           0 :         if (fd)
    1399           0 :                 return fd;
    1400             : 
    1401           0 :         tmp = getname(filename);
    1402           0 :         if (IS_ERR(tmp))
    1403           0 :                 return PTR_ERR(tmp);
    1404             : 
    1405           0 :         fd = get_unused_fd_flags(how->flags);
    1406           0 :         if (fd >= 0) {
    1407           0 :                 struct file *f = do_filp_open(dfd, tmp, &op);
    1408           0 :                 if (IS_ERR(f)) {
    1409           0 :                         put_unused_fd(fd);
    1410           0 :                         fd = PTR_ERR(f);
    1411             :                 } else {
    1412           0 :                         fd_install(fd, f);
    1413             :                 }
    1414             :         }
    1415           0 :         putname(tmp);
    1416           0 :         return fd;
    1417             : }
    1418             : 
    1419           0 : long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
    1420             : {
    1421           0 :         struct open_how how = build_open_how(flags, mode);
    1422           0 :         return do_sys_openat2(dfd, filename, &how);
    1423             : }
    1424             : 
    1425             : 
    1426           0 : SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
    1427             : {
    1428             :         if (force_o_largefile())
    1429           0 :                 flags |= O_LARGEFILE;
    1430           0 :         return do_sys_open(AT_FDCWD, filename, flags, mode);
    1431             : }
    1432             : 
    1433           0 : SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
    1434             :                 umode_t, mode)
    1435             : {
    1436             :         if (force_o_largefile())
    1437           0 :                 flags |= O_LARGEFILE;
    1438           0 :         return do_sys_open(dfd, filename, flags, mode);
    1439             : }
    1440             : 
    1441           0 : SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
    1442             :                 struct open_how __user *, how, size_t, usize)
    1443             : {
    1444             :         int err;
    1445             :         struct open_how tmp;
    1446             : 
    1447             :         BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0);
    1448             :         BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST);
    1449             : 
    1450           0 :         if (unlikely(usize < OPEN_HOW_SIZE_VER0))
    1451             :                 return -EINVAL;
    1452             : 
    1453           0 :         err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
    1454           0 :         if (err)
    1455           0 :                 return err;
    1456             : 
    1457           0 :         audit_openat2_how(&tmp);
    1458             : 
    1459             :         /* O_LARGEFILE is only allowed for non-O_PATH. */
    1460           0 :         if (!(tmp.flags & O_PATH) && force_o_largefile())
    1461           0 :                 tmp.flags |= O_LARGEFILE;
    1462             : 
    1463           0 :         return do_sys_openat2(dfd, filename, &tmp);
    1464             : }
    1465             : 
    1466             : #ifdef CONFIG_COMPAT
    1467             : /*
    1468             :  * Exactly like sys_open(), except that it doesn't set the
    1469             :  * O_LARGEFILE flag.
    1470             :  */
    1471             : COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
    1472             : {
    1473             :         return do_sys_open(AT_FDCWD, filename, flags, mode);
    1474             : }
    1475             : 
    1476             : /*
    1477             :  * Exactly like sys_openat(), except that it doesn't set the
    1478             :  * O_LARGEFILE flag.
    1479             :  */
    1480             : COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
    1481             : {
    1482             :         return do_sys_open(dfd, filename, flags, mode);
    1483             : }
    1484             : #endif
    1485             : 
    1486             : #ifndef __alpha__
    1487             : 
    1488             : /*
    1489             :  * For backward compatibility?  Maybe this should be moved
    1490             :  * into arch/i386 instead?
    1491             :  */
    1492           0 : SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
    1493             : {
    1494           0 :         int flags = O_CREAT | O_WRONLY | O_TRUNC;
    1495             : 
    1496             :         if (force_o_largefile())
    1497           0 :                 flags |= O_LARGEFILE;
    1498           0 :         return do_sys_open(AT_FDCWD, pathname, flags, mode);
    1499             : }
    1500             : #endif
    1501             : 
    1502             : /*
    1503             :  * "id" is the POSIX thread ID. We use the
    1504             :  * files pointer for this..
    1505             :  */
    1506           0 : int filp_close(struct file *filp, fl_owner_t id)
    1507             : {
    1508           0 :         int retval = 0;
    1509             : 
    1510           0 :         if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
    1511             :                         "VFS: Close: file count is 0 (f_op=%ps)",
    1512             :                         filp->f_op)) {
    1513             :                 return 0;
    1514             :         }
    1515             : 
    1516           0 :         if (filp->f_op->flush)
    1517           0 :                 retval = filp->f_op->flush(filp, id);
    1518             : 
    1519           0 :         if (likely(!(filp->f_mode & FMODE_PATH))) {
    1520           0 :                 dnotify_flush(filp, id);
    1521           0 :                 locks_remove_posix(filp, id);
    1522             :         }
    1523           0 :         fput(filp);
    1524           0 :         return retval;
    1525             : }
    1526             : 
    1527             : EXPORT_SYMBOL(filp_close);
    1528             : 
    1529             : /*
    1530             :  * Careful here! We test whether the file pointer is NULL before
    1531             :  * releasing the fd. This ensures that one clone task can't release
    1532             :  * an fd while another clone is opening it.
    1533             :  */
    1534           0 : SYSCALL_DEFINE1(close, unsigned int, fd)
    1535             : {
    1536           0 :         int retval = close_fd(fd);
    1537             : 
    1538             :         /* can't restart close syscall because file table entry was cleared */
    1539           0 :         if (unlikely(retval == -ERESTARTSYS ||
    1540             :                      retval == -ERESTARTNOINTR ||
    1541             :                      retval == -ERESTARTNOHAND ||
    1542             :                      retval == -ERESTART_RESTARTBLOCK))
    1543           0 :                 retval = -EINTR;
    1544             : 
    1545           0 :         return retval;
    1546             : }
    1547             : 
    1548             : /**
    1549             :  * close_range() - Close all file descriptors in a given range.
    1550             :  *
    1551             :  * @fd:     starting file descriptor to close
    1552             :  * @max_fd: last file descriptor to close
    1553             :  * @flags:  reserved for future extensions
    1554             :  *
    1555             :  * This closes a range of file descriptors. All file descriptors
    1556             :  * from @fd up to and including @max_fd are closed.
    1557             :  * Currently, errors to close a given file descriptor are ignored.
    1558             :  */
    1559           0 : SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
    1560             :                 unsigned int, flags)
    1561             : {
    1562           0 :         return __close_range(fd, max_fd, flags);
    1563             : }
    1564             : 
    1565             : /*
    1566             :  * This routine simulates a hangup on the tty, to arrange that users
    1567             :  * are given clean terminals at login time.
    1568             :  */
    1569           0 : SYSCALL_DEFINE0(vhangup)
    1570             : {
    1571           0 :         if (capable(CAP_SYS_TTY_CONFIG)) {
    1572           0 :                 tty_vhangup_self();
    1573           0 :                 return 0;
    1574             :         }
    1575             :         return -EPERM;
    1576             : }
    1577             : 
    1578             : /*
    1579             :  * Called when an inode is about to be open.
    1580             :  * We use this to disallow opening large files on 32bit systems if
    1581             :  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
    1582             :  * on this flag in sys_open.
    1583             :  */
    1584           0 : int generic_file_open(struct inode * inode, struct file * filp)
    1585             : {
    1586           0 :         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
    1587             :                 return -EOVERFLOW;
    1588             :         return 0;
    1589             : }
    1590             : 
    1591             : EXPORT_SYMBOL(generic_file_open);
    1592             : 
    1593             : /*
    1594             :  * This is used by subsystems that don't want seekable
    1595             :  * file descriptors. The function is not supposed to ever fail, the only
    1596             :  * reason it returns an 'int' and not 'void' is so that it can be plugged
    1597             :  * directly into file_operations structure.
    1598             :  */
    1599           0 : int nonseekable_open(struct inode *inode, struct file *filp)
    1600             : {
    1601           0 :         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
    1602           0 :         return 0;
    1603             : }
    1604             : 
    1605             : EXPORT_SYMBOL(nonseekable_open);
    1606             : 
    1607             : /*
    1608             :  * stream_open is used by subsystems that want stream-like file descriptors.
    1609             :  * Such file descriptors are not seekable and don't have notion of position
    1610             :  * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL).
    1611             :  * Contrary to file descriptors of other regular files, .read() and .write()
    1612             :  * can run simultaneously.
    1613             :  *
    1614             :  * stream_open never fails and is marked to return int so that it could be
    1615             :  * directly used as file_operations.open .
    1616             :  */
    1617           0 : int stream_open(struct inode *inode, struct file *filp)
    1618             : {
    1619           0 :         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
    1620           0 :         filp->f_mode |= FMODE_STREAM;
    1621           0 :         return 0;
    1622             : }
    1623             : 
    1624             : EXPORT_SYMBOL(stream_open);

Generated by: LCOV version 1.14