LCOV - code coverage report
Current view: top level - fs - open.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 548 0.0 %
Date: 2023-03-27 20:00:47 Functions: 0 64 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  linux/fs/open.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  */
       7             : 
       8             : #include <linux/string.h>
       9             : #include <linux/mm.h>
      10             : #include <linux/file.h>
      11             : #include <linux/fdtable.h>
      12             : #include <linux/fsnotify.h>
      13             : #include <linux/module.h>
      14             : #include <linux/tty.h>
      15             : #include <linux/namei.h>
      16             : #include <linux/backing-dev.h>
      17             : #include <linux/capability.h>
      18             : #include <linux/securebits.h>
      19             : #include <linux/security.h>
      20             : #include <linux/mount.h>
      21             : #include <linux/fcntl.h>
      22             : #include <linux/slab.h>
      23             : #include <linux/uaccess.h>
      24             : #include <linux/fs.h>
      25             : #include <linux/personality.h>
      26             : #include <linux/pagemap.h>
      27             : #include <linux/syscalls.h>
      28             : #include <linux/rcupdate.h>
      29             : #include <linux/audit.h>
      30             : #include <linux/falloc.h>
      31             : #include <linux/fs_struct.h>
      32             : #include <linux/ima.h>
      33             : #include <linux/dnotify.h>
      34             : #include <linux/compat.h>
      35             : #include <linux/mnt_idmapping.h>
      36             : #include <linux/filelock.h>
      37             : 
      38             : #include "internal.h"
      39             : 
      40           0 : int do_truncate(struct mnt_idmap *idmap, struct dentry *dentry,
      41             :                 loff_t length, unsigned int time_attrs, struct file *filp)
      42             : {
      43             :         int ret;
      44             :         struct iattr newattrs;
      45             : 
      46             :         /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
      47           0 :         if (length < 0)
      48             :                 return -EINVAL;
      49             : 
      50           0 :         newattrs.ia_size = length;
      51           0 :         newattrs.ia_valid = ATTR_SIZE | time_attrs;
      52           0 :         if (filp) {
      53           0 :                 newattrs.ia_file = filp;
      54           0 :                 newattrs.ia_valid |= ATTR_FILE;
      55             :         }
      56             : 
      57             :         /* Remove suid, sgid, and file capabilities on truncate too */
      58           0 :         ret = dentry_needs_remove_privs(idmap, dentry);
      59           0 :         if (ret < 0)
      60             :                 return ret;
      61           0 :         if (ret)
      62           0 :                 newattrs.ia_valid |= ret | ATTR_FORCE;
      63             : 
      64           0 :         inode_lock(dentry->d_inode);
      65             :         /* Note any delegations or leases have already been broken: */
      66           0 :         ret = notify_change(idmap, dentry, &newattrs, NULL);
      67           0 :         inode_unlock(dentry->d_inode);
      68           0 :         return ret;
      69             : }
      70             : 
      71           0 : long vfs_truncate(const struct path *path, loff_t length)
      72             : {
      73             :         struct mnt_idmap *idmap;
      74             :         struct inode *inode;
      75             :         long error;
      76             : 
      77           0 :         inode = path->dentry->d_inode;
      78             : 
      79             :         /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
      80           0 :         if (S_ISDIR(inode->i_mode))
      81             :                 return -EISDIR;
      82           0 :         if (!S_ISREG(inode->i_mode))
      83             :                 return -EINVAL;
      84             : 
      85           0 :         error = mnt_want_write(path->mnt);
      86           0 :         if (error)
      87             :                 goto out;
      88             : 
      89           0 :         idmap = mnt_idmap(path->mnt);
      90           0 :         error = inode_permission(idmap, inode, MAY_WRITE);
      91           0 :         if (error)
      92             :                 goto mnt_drop_write_and_out;
      93             : 
      94           0 :         error = -EPERM;
      95           0 :         if (IS_APPEND(inode))
      96             :                 goto mnt_drop_write_and_out;
      97             : 
      98           0 :         error = get_write_access(inode);
      99           0 :         if (error)
     100             :                 goto mnt_drop_write_and_out;
     101             : 
     102             :         /*
     103             :          * Make sure that there are no leases.  get_write_access() protects
     104             :          * against the truncate racing with a lease-granting setlease().
     105             :          */
     106           0 :         error = break_lease(inode, O_WRONLY);
     107           0 :         if (error)
     108             :                 goto put_write_and_out;
     109             : 
     110           0 :         error = security_path_truncate(path);
     111             :         if (!error)
     112           0 :                 error = do_truncate(idmap, path->dentry, length, 0, NULL);
     113             : 
     114             : put_write_and_out:
     115             :         put_write_access(inode);
     116             : mnt_drop_write_and_out:
     117           0 :         mnt_drop_write(path->mnt);
     118             : out:
     119             :         return error;
     120             : }
     121             : EXPORT_SYMBOL_GPL(vfs_truncate);
     122             : 
     123           0 : long do_sys_truncate(const char __user *pathname, loff_t length)
     124             : {
     125           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW;
     126             :         struct path path;
     127             :         int error;
     128             : 
     129           0 :         if (length < 0)      /* sorry, but loff_t says... */
     130             :                 return -EINVAL;
     131             : 
     132             : retry:
     133           0 :         error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
     134           0 :         if (!error) {
     135           0 :                 error = vfs_truncate(&path, length);
     136           0 :                 path_put(&path);
     137             :         }
     138           0 :         if (retry_estale(error, lookup_flags)) {
     139             :                 lookup_flags |= LOOKUP_REVAL;
     140             :                 goto retry;
     141             :         }
     142             :         return error;
     143             : }
     144             : 
     145           0 : SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
     146             : {
     147           0 :         return do_sys_truncate(path, length);
     148             : }
     149             : 
     150             : #ifdef CONFIG_COMPAT
     151             : COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length)
     152             : {
     153             :         return do_sys_truncate(path, length);
     154             : }
     155             : #endif
     156             : 
     157           0 : long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
     158             : {
     159             :         struct inode *inode;
     160             :         struct dentry *dentry;
     161             :         struct fd f;
     162             :         int error;
     163             : 
     164           0 :         error = -EINVAL;
     165           0 :         if (length < 0)
     166             :                 goto out;
     167           0 :         error = -EBADF;
     168           0 :         f = fdget(fd);
     169           0 :         if (!f.file)
     170             :                 goto out;
     171             : 
     172             :         /* explicitly opened as large or we are on 64-bit box */
     173           0 :         if (f.file->f_flags & O_LARGEFILE)
     174           0 :                 small = 0;
     175             : 
     176           0 :         dentry = f.file->f_path.dentry;
     177           0 :         inode = dentry->d_inode;
     178           0 :         error = -EINVAL;
     179           0 :         if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
     180             :                 goto out_putf;
     181             : 
     182           0 :         error = -EINVAL;
     183             :         /* Cannot ftruncate over 2^31 bytes without large file support */
     184           0 :         if (small && length > MAX_NON_LFS)
     185             :                 goto out_putf;
     186             : 
     187           0 :         error = -EPERM;
     188             :         /* Check IS_APPEND on real upper inode */
     189           0 :         if (IS_APPEND(file_inode(f.file)))
     190             :                 goto out_putf;
     191           0 :         sb_start_write(inode->i_sb);
     192           0 :         error = security_file_truncate(f.file);
     193             :         if (!error)
     194           0 :                 error = do_truncate(file_mnt_idmap(f.file), dentry, length,
     195             :                                     ATTR_MTIME | ATTR_CTIME, f.file);
     196           0 :         sb_end_write(inode->i_sb);
     197             : out_putf:
     198           0 :         fdput(f);
     199             : out:
     200           0 :         return error;
     201             : }
     202             : 
     203           0 : SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
     204             : {
     205           0 :         return do_sys_ftruncate(fd, length, 1);
     206             : }
     207             : 
     208             : #ifdef CONFIG_COMPAT
     209             : COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length)
     210             : {
     211             :         return do_sys_ftruncate(fd, length, 1);
     212             : }
     213             : #endif
     214             : 
     215             : /* LFS versions of truncate are only needed on 32 bit machines */
     216             : #if BITS_PER_LONG == 32
     217             : SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length)
     218             : {
     219             :         return do_sys_truncate(path, length);
     220             : }
     221             : 
     222             : SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
     223             : {
     224             :         return do_sys_ftruncate(fd, length, 0);
     225             : }
     226             : #endif /* BITS_PER_LONG == 32 */
     227             : 
     228             : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_TRUNCATE64)
     229             : COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, pathname,
     230             :                        compat_arg_u64_dual(length))
     231             : {
     232             :         return ksys_truncate(pathname, compat_arg_u64_glue(length));
     233             : }
     234             : #endif
     235             : 
     236             : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FTRUNCATE64)
     237             : COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd,
     238             :                        compat_arg_u64_dual(length))
     239             : {
     240             :         return ksys_ftruncate(fd, compat_arg_u64_glue(length));
     241             : }
     242             : #endif
     243             : 
     244           0 : int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
     245             : {
     246           0 :         struct inode *inode = file_inode(file);
     247             :         long ret;
     248             : 
     249           0 :         if (offset < 0 || len <= 0)
     250             :                 return -EINVAL;
     251             : 
     252             :         /* Return error if mode is not supported */
     253           0 :         if (mode & ~FALLOC_FL_SUPPORTED_MASK)
     254             :                 return -EOPNOTSUPP;
     255             : 
     256             :         /* Punch hole and zero range are mutually exclusive */
     257           0 :         if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
     258             :             (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
     259             :                 return -EOPNOTSUPP;
     260             : 
     261             :         /* Punch hole must have keep size set */
     262           0 :         if ((mode & FALLOC_FL_PUNCH_HOLE) &&
     263             :             !(mode & FALLOC_FL_KEEP_SIZE))
     264             :                 return -EOPNOTSUPP;
     265             : 
     266             :         /* Collapse range should only be used exclusively. */
     267           0 :         if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
     268           0 :             (mode & ~FALLOC_FL_COLLAPSE_RANGE))
     269             :                 return -EINVAL;
     270             : 
     271             :         /* Insert range should only be used exclusively. */
     272           0 :         if ((mode & FALLOC_FL_INSERT_RANGE) &&
     273           0 :             (mode & ~FALLOC_FL_INSERT_RANGE))
     274             :                 return -EINVAL;
     275             : 
     276             :         /* Unshare range should only be used with allocate mode. */
     277           0 :         if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
     278           0 :             (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
     279             :                 return -EINVAL;
     280             : 
     281           0 :         if (!(file->f_mode & FMODE_WRITE))
     282             :                 return -EBADF;
     283             : 
     284             :         /*
     285             :          * We can only allow pure fallocate on append only files
     286             :          */
     287           0 :         if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
     288             :                 return -EPERM;
     289             : 
     290           0 :         if (IS_IMMUTABLE(inode))
     291             :                 return -EPERM;
     292             : 
     293             :         /*
     294             :          * We cannot allow any fallocate operation on an active swapfile
     295             :          */
     296           0 :         if (IS_SWAPFILE(inode))
     297             :                 return -ETXTBSY;
     298             : 
     299             :         /*
     300             :          * Revalidate the write permissions, in case security policy has
     301             :          * changed since the files were opened.
     302             :          */
     303           0 :         ret = security_file_permission(file, MAY_WRITE);
     304             :         if (ret)
     305             :                 return ret;
     306             : 
     307           0 :         if (S_ISFIFO(inode->i_mode))
     308             :                 return -ESPIPE;
     309             : 
     310           0 :         if (S_ISDIR(inode->i_mode))
     311             :                 return -EISDIR;
     312             : 
     313           0 :         if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
     314             :                 return -ENODEV;
     315             : 
     316             :         /* Check for wrap through zero too */
     317           0 :         if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
     318             :                 return -EFBIG;
     319             : 
     320           0 :         if (!file->f_op->fallocate)
     321             :                 return -EOPNOTSUPP;
     322             : 
     323           0 :         file_start_write(file);
     324           0 :         ret = file->f_op->fallocate(file, mode, offset, len);
     325             : 
     326             :         /*
     327             :          * Create inotify and fanotify events.
     328             :          *
     329             :          * To keep the logic simple always create events if fallocate succeeds.
     330             :          * This implies that events are even created if the file size remains
     331             :          * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE.
     332             :          */
     333           0 :         if (ret == 0)
     334             :                 fsnotify_modify(file);
     335             : 
     336           0 :         file_end_write(file);
     337           0 :         return ret;
     338             : }
     339             : EXPORT_SYMBOL_GPL(vfs_fallocate);
     340             : 
     341           0 : int ksys_fallocate(int fd, int mode, loff_t offset, loff_t len)
     342             : {
     343           0 :         struct fd f = fdget(fd);
     344           0 :         int error = -EBADF;
     345             : 
     346           0 :         if (f.file) {
     347           0 :                 error = vfs_fallocate(f.file, mode, offset, len);
     348           0 :                 fdput(f);
     349             :         }
     350           0 :         return error;
     351             : }
     352             : 
     353           0 : SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
     354             : {
     355           0 :         return ksys_fallocate(fd, mode, offset, len);
     356             : }
     357             : 
     358             : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FALLOCATE)
     359             : COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, compat_arg_u64_dual(offset),
     360             :                        compat_arg_u64_dual(len))
     361             : {
     362             :         return ksys_fallocate(fd, mode, compat_arg_u64_glue(offset),
     363             :                               compat_arg_u64_glue(len));
     364             : }
     365             : #endif
     366             : 
     367             : /*
     368             :  * access() needs to use the real uid/gid, not the effective uid/gid.
     369             :  * We do this by temporarily clearing all FS-related capabilities and
     370             :  * switching the fsuid/fsgid around to the real ones.
     371             :  *
     372             :  * Creating new credentials is expensive, so we try to skip doing it,
     373             :  * which we can if the result would match what we already got.
     374             :  */
     375           0 : static bool access_need_override_creds(int flags)
     376             : {
     377             :         const struct cred *cred;
     378             : 
     379           0 :         if (flags & AT_EACCESS)
     380             :                 return false;
     381             : 
     382           0 :         cred = current_cred();
     383           0 :         if (!uid_eq(cred->fsuid, cred->uid) ||
     384           0 :             !gid_eq(cred->fsgid, cred->gid))
     385             :                 return true;
     386             : 
     387           0 :         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
     388           0 :                 kuid_t root_uid = make_kuid(cred->user_ns, 0);
     389           0 :                 if (!uid_eq(cred->uid, root_uid)) {
     390           0 :                         if (!cap_isclear(cred->cap_effective))
     391             :                                 return true;
     392             :                 } else {
     393           0 :                         if (!cap_isidentical(cred->cap_effective,
     394             :                             cred->cap_permitted))
     395             :                                 return true;
     396             :                 }
     397             :         }
     398             : 
     399             :         return false;
     400             : }
     401             : 
     402           0 : static const struct cred *access_override_creds(void)
     403             : {
     404             :         const struct cred *old_cred;
     405             :         struct cred *override_cred;
     406             : 
     407           0 :         override_cred = prepare_creds();
     408           0 :         if (!override_cred)
     409             :                 return NULL;
     410             : 
     411             :         /*
     412             :          * XXX access_need_override_creds performs checks in hopes of skipping
     413             :          * this work. Make sure it stays in sync if making any changes in this
     414             :          * routine.
     415             :          */
     416             : 
     417           0 :         override_cred->fsuid = override_cred->uid;
     418           0 :         override_cred->fsgid = override_cred->gid;
     419             : 
     420           0 :         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
     421             :                 /* Clear the capabilities if we switch to a non-root user */
     422           0 :                 kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
     423           0 :                 if (!uid_eq(override_cred->uid, root_uid))
     424           0 :                         cap_clear(override_cred->cap_effective);
     425             :                 else
     426           0 :                         override_cred->cap_effective =
     427             :                                 override_cred->cap_permitted;
     428             :         }
     429             : 
     430             :         /*
     431             :          * The new set of credentials can *only* be used in
     432             :          * task-synchronous circumstances, and does not need
     433             :          * RCU freeing, unless somebody then takes a separate
     434             :          * reference to it.
     435             :          *
     436             :          * NOTE! This is _only_ true because this credential
     437             :          * is used purely for override_creds() that installs
     438             :          * it as the subjective cred. Other threads will be
     439             :          * accessing ->real_cred, not the subjective cred.
     440             :          *
     441             :          * If somebody _does_ make a copy of this (using the
     442             :          * 'get_current_cred()' function), that will clear the
     443             :          * non_rcu field, because now that other user may be
     444             :          * expecting RCU freeing. But normal thread-synchronous
     445             :          * cred accesses will keep things non-RCY.
     446             :          */
     447           0 :         override_cred->non_rcu = 1;
     448             : 
     449           0 :         old_cred = override_creds(override_cred);
     450             : 
     451             :         /* override_cred() gets its own ref */
     452             :         put_cred(override_cred);
     453             : 
     454             :         return old_cred;
     455             : }
     456             : 
     457           0 : static long do_faccessat(int dfd, const char __user *filename, int mode, int flags)
     458             : {
     459             :         struct path path;
     460             :         struct inode *inode;
     461             :         int res;
     462           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW;
     463           0 :         const struct cred *old_cred = NULL;
     464             : 
     465           0 :         if (mode & ~S_IRWXO)        /* where's F_OK, X_OK, W_OK, R_OK? */
     466             :                 return -EINVAL;
     467             : 
     468           0 :         if (flags & ~(AT_EACCESS | AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH))
     469             :                 return -EINVAL;
     470             : 
     471           0 :         if (flags & AT_SYMLINK_NOFOLLOW)
     472           0 :                 lookup_flags &= ~LOOKUP_FOLLOW;
     473           0 :         if (flags & AT_EMPTY_PATH)
     474           0 :                 lookup_flags |= LOOKUP_EMPTY;
     475             : 
     476           0 :         if (access_need_override_creds(flags)) {
     477           0 :                 old_cred = access_override_creds();
     478           0 :                 if (!old_cred)
     479             :                         return -ENOMEM;
     480             :         }
     481             : 
     482             : retry:
     483           0 :         res = user_path_at(dfd, filename, lookup_flags, &path);
     484           0 :         if (res)
     485             :                 goto out;
     486             : 
     487           0 :         inode = d_backing_inode(path.dentry);
     488             : 
     489           0 :         if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
     490             :                 /*
     491             :                  * MAY_EXEC on regular files is denied if the fs is mounted
     492             :                  * with the "noexec" flag.
     493             :                  */
     494           0 :                 res = -EACCES;
     495           0 :                 if (path_noexec(&path))
     496             :                         goto out_path_release;
     497             :         }
     498             : 
     499           0 :         res = inode_permission(mnt_idmap(path.mnt), inode, mode | MAY_ACCESS);
     500             :         /* SuS v2 requires we report a read only fs too */
     501           0 :         if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
     502             :                 goto out_path_release;
     503             :         /*
     504             :          * This is a rare case where using __mnt_is_readonly()
     505             :          * is OK without a mnt_want/drop_write() pair.  Since
     506             :          * no actual write to the fs is performed here, we do
     507             :          * not need to telegraph to that to anyone.
     508             :          *
     509             :          * By doing this, we accept that this access is
     510             :          * inherently racy and know that the fs may change
     511             :          * state before we even see this result.
     512             :          */
     513           0 :         if (__mnt_is_readonly(path.mnt))
     514           0 :                 res = -EROFS;
     515             : 
     516             : out_path_release:
     517           0 :         path_put(&path);
     518           0 :         if (retry_estale(res, lookup_flags)) {
     519           0 :                 lookup_flags |= LOOKUP_REVAL;
     520           0 :                 goto retry;
     521             :         }
     522             : out:
     523           0 :         if (old_cred)
     524           0 :                 revert_creds(old_cred);
     525             : 
     526           0 :         return res;
     527             : }
     528             : 
     529           0 : SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
     530             : {
     531           0 :         return do_faccessat(dfd, filename, mode, 0);
     532             : }
     533             : 
     534           0 : SYSCALL_DEFINE4(faccessat2, int, dfd, const char __user *, filename, int, mode,
     535             :                 int, flags)
     536             : {
     537           0 :         return do_faccessat(dfd, filename, mode, flags);
     538             : }
     539             : 
     540           0 : SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
     541             : {
     542           0 :         return do_faccessat(AT_FDCWD, filename, mode, 0);
     543             : }
     544             : 
     545           0 : SYSCALL_DEFINE1(chdir, const char __user *, filename)
     546             : {
     547             :         struct path path;
     548             :         int error;
     549           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
     550             : retry:
     551           0 :         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
     552           0 :         if (error)
     553             :                 goto out;
     554             : 
     555           0 :         error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
     556           0 :         if (error)
     557             :                 goto dput_and_out;
     558             : 
     559           0 :         set_fs_pwd(current->fs, &path);
     560             : 
     561             : dput_and_out:
     562           0 :         path_put(&path);
     563           0 :         if (retry_estale(error, lookup_flags)) {
     564             :                 lookup_flags |= LOOKUP_REVAL;
     565             :                 goto retry;
     566             :         }
     567             : out:
     568           0 :         return error;
     569             : }
     570             : 
     571           0 : SYSCALL_DEFINE1(fchdir, unsigned int, fd)
     572             : {
     573           0 :         struct fd f = fdget_raw(fd);
     574             :         int error;
     575             : 
     576           0 :         error = -EBADF;
     577           0 :         if (!f.file)
     578             :                 goto out;
     579             : 
     580           0 :         error = -ENOTDIR;
     581           0 :         if (!d_can_lookup(f.file->f_path.dentry))
     582             :                 goto out_putf;
     583             : 
     584           0 :         error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
     585           0 :         if (!error)
     586           0 :                 set_fs_pwd(current->fs, &f.file->f_path);
     587             : out_putf:
     588           0 :         fdput(f);
     589             : out:
     590           0 :         return error;
     591             : }
     592             : 
     593           0 : SYSCALL_DEFINE1(chroot, const char __user *, filename)
     594             : {
     595             :         struct path path;
     596             :         int error;
     597           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
     598             : retry:
     599           0 :         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
     600           0 :         if (error)
     601             :                 goto out;
     602             : 
     603           0 :         error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
     604           0 :         if (error)
     605             :                 goto dput_and_out;
     606             : 
     607           0 :         error = -EPERM;
     608           0 :         if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT))
     609             :                 goto dput_and_out;
     610           0 :         error = security_path_chroot(&path);
     611             :         if (error)
     612             :                 goto dput_and_out;
     613             : 
     614           0 :         set_fs_root(current->fs, &path);
     615           0 :         error = 0;
     616             : dput_and_out:
     617           0 :         path_put(&path);
     618           0 :         if (retry_estale(error, lookup_flags)) {
     619             :                 lookup_flags |= LOOKUP_REVAL;
     620             :                 goto retry;
     621             :         }
     622             : out:
     623           0 :         return error;
     624             : }
     625             : 
     626           0 : int chmod_common(const struct path *path, umode_t mode)
     627             : {
     628           0 :         struct inode *inode = path->dentry->d_inode;
     629           0 :         struct inode *delegated_inode = NULL;
     630             :         struct iattr newattrs;
     631             :         int error;
     632             : 
     633           0 :         error = mnt_want_write(path->mnt);
     634           0 :         if (error)
     635             :                 return error;
     636             : retry_deleg:
     637           0 :         inode_lock(inode);
     638           0 :         error = security_path_chmod(path, mode);
     639             :         if (error)
     640             :                 goto out_unlock;
     641           0 :         newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
     642           0 :         newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
     643           0 :         error = notify_change(mnt_idmap(path->mnt), path->dentry,
     644             :                               &newattrs, &delegated_inode);
     645             : out_unlock:
     646           0 :         inode_unlock(inode);
     647           0 :         if (delegated_inode) {
     648           0 :                 error = break_deleg_wait(&delegated_inode);
     649           0 :                 if (!error)
     650             :                         goto retry_deleg;
     651             :         }
     652           0 :         mnt_drop_write(path->mnt);
     653           0 :         return error;
     654             : }
     655             : 
     656           0 : int vfs_fchmod(struct file *file, umode_t mode)
     657             : {
     658           0 :         audit_file(file);
     659           0 :         return chmod_common(&file->f_path, mode);
     660             : }
     661             : 
     662           0 : SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
     663             : {
     664           0 :         struct fd f = fdget(fd);
     665           0 :         int err = -EBADF;
     666             : 
     667           0 :         if (f.file) {
     668           0 :                 err = vfs_fchmod(f.file, mode);
     669           0 :                 fdput(f);
     670             :         }
     671           0 :         return err;
     672             : }
     673             : 
     674           0 : static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
     675             : {
     676             :         struct path path;
     677             :         int error;
     678           0 :         unsigned int lookup_flags = LOOKUP_FOLLOW;
     679             : retry:
     680           0 :         error = user_path_at(dfd, filename, lookup_flags, &path);
     681           0 :         if (!error) {
     682           0 :                 error = chmod_common(&path, mode);
     683           0 :                 path_put(&path);
     684           0 :                 if (retry_estale(error, lookup_flags)) {
     685             :                         lookup_flags |= LOOKUP_REVAL;
     686             :                         goto retry;
     687             :                 }
     688             :         }
     689           0 :         return error;
     690             : }
     691             : 
     692           0 : SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
     693             :                 umode_t, mode)
     694             : {
     695           0 :         return do_fchmodat(dfd, filename, mode);
     696             : }
     697             : 
     698           0 : SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
     699             : {
     700           0 :         return do_fchmodat(AT_FDCWD, filename, mode);
     701             : }
     702             : 
     703             : /**
     704             :  * setattr_vfsuid - check and set ia_fsuid attribute
     705             :  * @kuid: new inode owner
     706             :  *
     707             :  * Check whether @kuid is valid and if so generate and set vfsuid_t in
     708             :  * ia_vfsuid.
     709             :  *
     710             :  * Return: true if @kuid is valid, false if not.
     711             :  */
     712             : static inline bool setattr_vfsuid(struct iattr *attr, kuid_t kuid)
     713             : {
     714           0 :         if (!uid_valid(kuid))
     715             :                 return false;
     716           0 :         attr->ia_valid |= ATTR_UID;
     717           0 :         attr->ia_vfsuid = VFSUIDT_INIT(kuid);
     718             :         return true;
     719             : }
     720             : 
     721             : /**
     722             :  * setattr_vfsgid - check and set ia_fsgid attribute
     723             :  * @kgid: new inode owner
     724             :  *
     725             :  * Check whether @kgid is valid and if so generate and set vfsgid_t in
     726             :  * ia_vfsgid.
     727             :  *
     728             :  * Return: true if @kgid is valid, false if not.
     729             :  */
     730             : static inline bool setattr_vfsgid(struct iattr *attr, kgid_t kgid)
     731             : {
     732           0 :         if (!gid_valid(kgid))
     733             :                 return false;
     734           0 :         attr->ia_valid |= ATTR_GID;
     735           0 :         attr->ia_vfsgid = VFSGIDT_INIT(kgid);
     736             :         return true;
     737             : }
     738             : 
     739           0 : int chown_common(const struct path *path, uid_t user, gid_t group)
     740             : {
     741             :         struct mnt_idmap *idmap;
     742             :         struct user_namespace *fs_userns;
     743           0 :         struct inode *inode = path->dentry->d_inode;
     744           0 :         struct inode *delegated_inode = NULL;
     745             :         int error;
     746             :         struct iattr newattrs;
     747             :         kuid_t uid;
     748             :         kgid_t gid;
     749             : 
     750           0 :         uid = make_kuid(current_user_ns(), user);
     751           0 :         gid = make_kgid(current_user_ns(), group);
     752             : 
     753           0 :         idmap = mnt_idmap(path->mnt);
     754           0 :         fs_userns = i_user_ns(inode);
     755             : 
     756             : retry_deleg:
     757           0 :         newattrs.ia_vfsuid = INVALID_VFSUID;
     758           0 :         newattrs.ia_vfsgid = INVALID_VFSGID;
     759           0 :         newattrs.ia_valid =  ATTR_CTIME;
     760           0 :         if ((user != (uid_t)-1) && !setattr_vfsuid(&newattrs, uid))
     761             :                 return -EINVAL;
     762           0 :         if ((group != (gid_t)-1) && !setattr_vfsgid(&newattrs, gid))
     763             :                 return -EINVAL;
     764           0 :         inode_lock(inode);
     765           0 :         if (!S_ISDIR(inode->i_mode))
     766           0 :                 newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_PRIV |
     767           0 :                                      setattr_should_drop_sgid(idmap, inode);
     768             :         /* Continue to send actual fs values, not the mount values. */
     769           0 :         error = security_path_chown(
     770             :                 path,
     771             :                 from_vfsuid(idmap, fs_userns, newattrs.ia_vfsuid),
     772             :                 from_vfsgid(idmap, fs_userns, newattrs.ia_vfsgid));
     773             :         if (!error)
     774           0 :                 error = notify_change(idmap, path->dentry, &newattrs,
     775             :                                       &delegated_inode);
     776           0 :         inode_unlock(inode);
     777           0 :         if (delegated_inode) {
     778           0 :                 error = break_deleg_wait(&delegated_inode);
     779           0 :                 if (!error)
     780             :                         goto retry_deleg;
     781             :         }
     782             :         return error;
     783             : }
     784             : 
     785           0 : int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group,
     786             :                 int flag)
     787             : {
     788             :         struct path path;
     789           0 :         int error = -EINVAL;
     790             :         int lookup_flags;
     791             : 
     792           0 :         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
     793             :                 goto out;
     794             : 
     795           0 :         lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
     796           0 :         if (flag & AT_EMPTY_PATH)
     797           0 :                 lookup_flags |= LOOKUP_EMPTY;
     798             : retry:
     799           0 :         error = user_path_at(dfd, filename, lookup_flags, &path);
     800           0 :         if (error)
     801             :                 goto out;
     802           0 :         error = mnt_want_write(path.mnt);
     803           0 :         if (error)
     804             :                 goto out_release;
     805           0 :         error = chown_common(&path, user, group);
     806           0 :         mnt_drop_write(path.mnt);
     807             : out_release:
     808           0 :         path_put(&path);
     809           0 :         if (retry_estale(error, lookup_flags)) {
     810           0 :                 lookup_flags |= LOOKUP_REVAL;
     811           0 :                 goto retry;
     812             :         }
     813             : out:
     814           0 :         return error;
     815             : }
     816             : 
     817           0 : SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
     818             :                 gid_t, group, int, flag)
     819             : {
     820           0 :         return do_fchownat(dfd, filename, user, group, flag);
     821             : }
     822             : 
     823           0 : SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
     824             : {
     825           0 :         return do_fchownat(AT_FDCWD, filename, user, group, 0);
     826             : }
     827             : 
     828           0 : SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
     829             : {
     830           0 :         return do_fchownat(AT_FDCWD, filename, user, group,
     831             :                            AT_SYMLINK_NOFOLLOW);
     832             : }
     833             : 
     834           0 : int vfs_fchown(struct file *file, uid_t user, gid_t group)
     835             : {
     836             :         int error;
     837             : 
     838           0 :         error = mnt_want_write_file(file);
     839           0 :         if (error)
     840             :                 return error;
     841           0 :         audit_file(file);
     842           0 :         error = chown_common(&file->f_path, user, group);
     843           0 :         mnt_drop_write_file(file);
     844           0 :         return error;
     845             : }
     846             : 
     847           0 : int ksys_fchown(unsigned int fd, uid_t user, gid_t group)
     848             : {
     849           0 :         struct fd f = fdget(fd);
     850           0 :         int error = -EBADF;
     851             : 
     852           0 :         if (f.file) {
     853           0 :                 error = vfs_fchown(f.file, user, group);
     854           0 :                 fdput(f);
     855             :         }
     856           0 :         return error;
     857             : }
     858             : 
     859           0 : SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
     860             : {
     861           0 :         return ksys_fchown(fd, user, group);
     862             : }
     863             : 
     864           0 : static int do_dentry_open(struct file *f,
     865             :                           struct inode *inode,
     866             :                           int (*open)(struct inode *, struct file *))
     867             : {
     868             :         static const struct file_operations empty_fops = {};
     869             :         int error;
     870             : 
     871           0 :         path_get(&f->f_path);
     872           0 :         f->f_inode = inode;
     873           0 :         f->f_mapping = inode->i_mapping;
     874           0 :         f->f_wb_err = filemap_sample_wb_err(f->f_mapping);
     875           0 :         f->f_sb_err = file_sample_sb_err(f);
     876             : 
     877           0 :         if (unlikely(f->f_flags & O_PATH)) {
     878           0 :                 f->f_mode = FMODE_PATH | FMODE_OPENED;
     879           0 :                 f->f_op = &empty_fops;
     880           0 :                 return 0;
     881             :         }
     882             : 
     883           0 :         if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) {
     884             :                 i_readcount_inc(inode);
     885           0 :         } else if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
     886           0 :                 error = get_write_access(inode);
     887           0 :                 if (unlikely(error))
     888             :                         goto cleanup_file;
     889           0 :                 error = __mnt_want_write(f->f_path.mnt);
     890           0 :                 if (unlikely(error)) {
     891             :                         put_write_access(inode);
     892             :                         goto cleanup_file;
     893             :                 }
     894           0 :                 f->f_mode |= FMODE_WRITER;
     895             :         }
     896             : 
     897             :         /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
     898           0 :         if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
     899           0 :                 f->f_mode |= FMODE_ATOMIC_POS;
     900             : 
     901           0 :         f->f_op = fops_get(inode->i_fop);
     902           0 :         if (WARN_ON(!f->f_op)) {
     903             :                 error = -ENODEV;
     904             :                 goto cleanup_all;
     905             :         }
     906             : 
     907           0 :         error = security_file_open(f);
     908             :         if (error)
     909             :                 goto cleanup_all;
     910             : 
     911           0 :         error = break_lease(file_inode(f), f->f_flags);
     912           0 :         if (error)
     913             :                 goto cleanup_all;
     914             : 
     915             :         /* normally all 3 are set; ->open() can clear them if needed */
     916           0 :         f->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
     917           0 :         if (!open)
     918           0 :                 open = f->f_op->open;
     919           0 :         if (open) {
     920           0 :                 error = open(inode, f);
     921           0 :                 if (error)
     922             :                         goto cleanup_all;
     923             :         }
     924           0 :         f->f_mode |= FMODE_OPENED;
     925           0 :         if ((f->f_mode & FMODE_READ) &&
     926           0 :              likely(f->f_op->read || f->f_op->read_iter))
     927           0 :                 f->f_mode |= FMODE_CAN_READ;
     928           0 :         if ((f->f_mode & FMODE_WRITE) &&
     929           0 :              likely(f->f_op->write || f->f_op->write_iter))
     930           0 :                 f->f_mode |= FMODE_CAN_WRITE;
     931           0 :         if ((f->f_mode & FMODE_LSEEK) && !f->f_op->llseek)
     932           0 :                 f->f_mode &= ~FMODE_LSEEK;
     933           0 :         if (f->f_mapping->a_ops && f->f_mapping->a_ops->direct_IO)
     934           0 :                 f->f_mode |= FMODE_CAN_ODIRECT;
     935             : 
     936           0 :         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
     937           0 :         f->f_iocb_flags = iocb_flags(f);
     938             : 
     939           0 :         file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
     940             : 
     941           0 :         if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
     942             :                 return -EINVAL;
     943             : 
     944             :         /*
     945             :          * XXX: Huge page cache doesn't support writing yet. Drop all page
     946             :          * cache for this file before processing writes.
     947             :          */
     948           0 :         if (f->f_mode & FMODE_WRITE) {
     949             :                 /*
     950             :                  * Paired with smp_mb() in collapse_file() to ensure nr_thps
     951             :                  * is up to date and the update to i_writecount by
     952             :                  * get_write_access() is visible. Ensures subsequent insertion
     953             :                  * of THPs into the page cache will fail.
     954             :                  */
     955           0 :                 smp_mb();
     956           0 :                 if (filemap_nr_thps(inode->i_mapping)) {
     957             :                         struct address_space *mapping = inode->i_mapping;
     958             : 
     959             :                         filemap_invalidate_lock(inode->i_mapping);
     960             :                         /*
     961             :                          * unmap_mapping_range just need to be called once
     962             :                          * here, because the private pages is not need to be
     963             :                          * unmapped mapping (e.g. data segment of dynamic
     964             :                          * shared libraries here).
     965             :                          */
     966             :                         unmap_mapping_range(mapping, 0, 0, 0);
     967             :                         truncate_inode_pages(mapping, 0);
     968             :                         filemap_invalidate_unlock(inode->i_mapping);
     969             :                 }
     970             :         }
     971             : 
     972             :         return 0;
     973             : 
     974             : cleanup_all:
     975           0 :         if (WARN_ON_ONCE(error > 0))
     976           0 :                 error = -EINVAL;
     977           0 :         fops_put(f->f_op);
     978           0 :         put_file_access(f);
     979             : cleanup_file:
     980           0 :         path_put(&f->f_path);
     981           0 :         f->f_path.mnt = NULL;
     982           0 :         f->f_path.dentry = NULL;
     983           0 :         f->f_inode = NULL;
     984           0 :         return error;
     985             : }
     986             : 
     987             : /**
     988             :  * finish_open - finish opening a file
     989             :  * @file: file pointer
     990             :  * @dentry: pointer to dentry
     991             :  * @open: open callback
     992             :  * @opened: state of open
     993             :  *
     994             :  * This can be used to finish opening a file passed to i_op->atomic_open().
     995             :  *
     996             :  * If the open callback is set to NULL, then the standard f_op->open()
     997             :  * filesystem callback is substituted.
     998             :  *
     999             :  * NB: the dentry reference is _not_ consumed.  If, for example, the dentry is
    1000             :  * the return value of d_splice_alias(), then the caller needs to perform dput()
    1001             :  * on it after finish_open().
    1002             :  *
    1003             :  * Returns zero on success or -errno if the open failed.
    1004             :  */
    1005           0 : int finish_open(struct file *file, struct dentry *dentry,
    1006             :                 int (*open)(struct inode *, struct file *))
    1007             : {
    1008           0 :         BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
    1009             : 
    1010           0 :         file->f_path.dentry = dentry;
    1011           0 :         return do_dentry_open(file, d_backing_inode(dentry), open);
    1012             : }
    1013             : EXPORT_SYMBOL(finish_open);
    1014             : 
    1015             : /**
    1016             :  * finish_no_open - finish ->atomic_open() without opening the file
    1017             :  *
    1018             :  * @file: file pointer
    1019             :  * @dentry: dentry or NULL (as returned from ->lookup())
    1020             :  *
    1021             :  * This can be used to set the result of a successful lookup in ->atomic_open().
    1022             :  *
    1023             :  * NB: unlike finish_open() this function does consume the dentry reference and
    1024             :  * the caller need not dput() it.
    1025             :  *
    1026             :  * Returns "0" which must be the return value of ->atomic_open() after having
    1027             :  * called this function.
    1028             :  */
    1029           0 : int finish_no_open(struct file *file, struct dentry *dentry)
    1030             : {
    1031           0 :         file->f_path.dentry = dentry;
    1032           0 :         return 0;
    1033             : }
    1034             : EXPORT_SYMBOL(finish_no_open);
    1035             : 
    1036           0 : char *file_path(struct file *filp, char *buf, int buflen)
    1037             : {
    1038           0 :         return d_path(&filp->f_path, buf, buflen);
    1039             : }
    1040             : EXPORT_SYMBOL(file_path);
    1041             : 
    1042             : /**
    1043             :  * vfs_open - open the file at the given path
    1044             :  * @path: path to open
    1045             :  * @file: newly allocated file with f_flag initialized
    1046             :  * @cred: credentials to use
    1047             :  */
    1048           0 : int vfs_open(const struct path *path, struct file *file)
    1049             : {
    1050           0 :         file->f_path = *path;
    1051           0 :         return do_dentry_open(file, d_backing_inode(path->dentry), NULL);
    1052             : }
    1053             : 
    1054           0 : struct file *dentry_open(const struct path *path, int flags,
    1055             :                          const struct cred *cred)
    1056             : {
    1057             :         int error;
    1058             :         struct file *f;
    1059             : 
    1060           0 :         validate_creds(cred);
    1061             : 
    1062             :         /* We must always pass in a valid mount pointer. */
    1063           0 :         BUG_ON(!path->mnt);
    1064             : 
    1065           0 :         f = alloc_empty_file(flags, cred);
    1066           0 :         if (!IS_ERR(f)) {
    1067           0 :                 error = vfs_open(path, f);
    1068           0 :                 if (error) {
    1069           0 :                         fput(f);
    1070           0 :                         f = ERR_PTR(error);
    1071             :                 }
    1072             :         }
    1073           0 :         return f;
    1074             : }
    1075             : EXPORT_SYMBOL(dentry_open);
    1076             : 
    1077             : /**
    1078             :  * dentry_create - Create and open a file
    1079             :  * @path: path to create
    1080             :  * @flags: O_ flags
    1081             :  * @mode: mode bits for new file
    1082             :  * @cred: credentials to use
    1083             :  *
    1084             :  * Caller must hold the parent directory's lock, and have prepared
    1085             :  * a negative dentry, placed in @path->dentry, for the new file.
    1086             :  *
    1087             :  * Caller sets @path->mnt to the vfsmount of the filesystem where
    1088             :  * the new file is to be created. The parent directory and the
    1089             :  * negative dentry must reside on the same filesystem instance.
    1090             :  *
    1091             :  * On success, returns a "struct file *". Otherwise a ERR_PTR
    1092             :  * is returned.
    1093             :  */
    1094           0 : struct file *dentry_create(const struct path *path, int flags, umode_t mode,
    1095             :                            const struct cred *cred)
    1096             : {
    1097             :         struct file *f;
    1098             :         int error;
    1099             : 
    1100           0 :         validate_creds(cred);
    1101           0 :         f = alloc_empty_file(flags, cred);
    1102           0 :         if (IS_ERR(f))
    1103             :                 return f;
    1104             : 
    1105           0 :         error = vfs_create(mnt_idmap(path->mnt),
    1106           0 :                            d_inode(path->dentry->d_parent),
    1107             :                            path->dentry, mode, true);
    1108           0 :         if (!error)
    1109           0 :                 error = vfs_open(path, f);
    1110             : 
    1111           0 :         if (unlikely(error)) {
    1112           0 :                 fput(f);
    1113           0 :                 return ERR_PTR(error);
    1114             :         }
    1115             :         return f;
    1116             : }
    1117             : EXPORT_SYMBOL(dentry_create);
    1118             : 
    1119           0 : struct file *open_with_fake_path(const struct path *path, int flags,
    1120             :                                 struct inode *inode, const struct cred *cred)
    1121             : {
    1122           0 :         struct file *f = alloc_empty_file_noaccount(flags, cred);
    1123           0 :         if (!IS_ERR(f)) {
    1124             :                 int error;
    1125             : 
    1126           0 :                 f->f_path = *path;
    1127           0 :                 error = do_dentry_open(f, inode, NULL);
    1128           0 :                 if (error) {
    1129           0 :                         fput(f);
    1130           0 :                         f = ERR_PTR(error);
    1131             :                 }
    1132             :         }
    1133           0 :         return f;
    1134             : }
    1135             : EXPORT_SYMBOL(open_with_fake_path);
    1136             : 
    1137             : #define WILL_CREATE(flags)      (flags & (O_CREAT | __O_TMPFILE))
    1138             : #define O_PATH_FLAGS            (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
    1139             : 
    1140           0 : inline struct open_how build_open_how(int flags, umode_t mode)
    1141             : {
    1142           0 :         struct open_how how = {
    1143           0 :                 .flags = flags & VALID_OPEN_FLAGS,
    1144           0 :                 .mode = mode & S_IALLUGO,
    1145             :         };
    1146             : 
    1147             :         /* O_PATH beats everything else. */
    1148           0 :         if (how.flags & O_PATH)
    1149           0 :                 how.flags &= O_PATH_FLAGS;
    1150             :         /* Modes should only be set for create-like flags. */
    1151           0 :         if (!WILL_CREATE(how.flags))
    1152           0 :                 how.mode = 0;
    1153           0 :         return how;
    1154             : }
    1155             : 
    1156           0 : inline int build_open_flags(const struct open_how *how, struct open_flags *op)
    1157             : {
    1158           0 :         u64 flags = how->flags;
    1159           0 :         u64 strip = FMODE_NONOTIFY | O_CLOEXEC;
    1160           0 :         int lookup_flags = 0;
    1161           0 :         int acc_mode = ACC_MODE(flags);
    1162             : 
    1163             :         BUILD_BUG_ON_MSG(upper_32_bits(VALID_OPEN_FLAGS),
    1164             :                          "struct open_flags doesn't yet handle flags > 32 bits");
    1165             : 
    1166             :         /*
    1167             :          * Strip flags that either shouldn't be set by userspace like
    1168             :          * FMODE_NONOTIFY or that aren't relevant in determining struct
    1169             :          * open_flags like O_CLOEXEC.
    1170             :          */
    1171           0 :         flags &= ~strip;
    1172             : 
    1173             :         /*
    1174             :          * Older syscalls implicitly clear all of the invalid flags or argument
    1175             :          * values before calling build_open_flags(), but openat2(2) checks all
    1176             :          * of its arguments.
    1177             :          */
    1178           0 :         if (flags & ~VALID_OPEN_FLAGS)
    1179             :                 return -EINVAL;
    1180           0 :         if (how->resolve & ~VALID_RESOLVE_FLAGS)
    1181             :                 return -EINVAL;
    1182             : 
    1183             :         /* Scoping flags are mutually exclusive. */
    1184           0 :         if ((how->resolve & RESOLVE_BENEATH) && (how->resolve & RESOLVE_IN_ROOT))
    1185             :                 return -EINVAL;
    1186             : 
    1187             :         /* Deal with the mode. */
    1188           0 :         if (WILL_CREATE(flags)) {
    1189           0 :                 if (how->mode & ~S_IALLUGO)
    1190             :                         return -EINVAL;
    1191           0 :                 op->mode = how->mode | S_IFREG;
    1192             :         } else {
    1193           0 :                 if (how->mode != 0)
    1194             :                         return -EINVAL;
    1195           0 :                 op->mode = 0;
    1196             :         }
    1197             : 
    1198             :         /*
    1199             :          * In order to ensure programs get explicit errors when trying to use
    1200             :          * O_TMPFILE on old kernels, O_TMPFILE is implemented such that it
    1201             :          * looks like (O_DIRECTORY|O_RDWR & ~O_CREAT) to old kernels. But we
    1202             :          * have to require userspace to explicitly set it.
    1203             :          */
    1204           0 :         if (flags & __O_TMPFILE) {
    1205           0 :                 if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
    1206             :                         return -EINVAL;
    1207           0 :                 if (!(acc_mode & MAY_WRITE))
    1208             :                         return -EINVAL;
    1209             :         }
    1210           0 :         if (flags & O_PATH) {
    1211             :                 /* O_PATH only permits certain other flags to be set. */
    1212           0 :                 if (flags & ~O_PATH_FLAGS)
    1213             :                         return -EINVAL;
    1214             :                 acc_mode = 0;
    1215             :         }
    1216             : 
    1217             :         /*
    1218             :          * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
    1219             :          * check for O_DSYNC if the need any syncing at all we enforce it's
    1220             :          * always set instead of having to deal with possibly weird behaviour
    1221             :          * for malicious applications setting only __O_SYNC.
    1222             :          */
    1223           0 :         if (flags & __O_SYNC)
    1224           0 :                 flags |= O_DSYNC;
    1225             : 
    1226           0 :         op->open_flag = flags;
    1227             : 
    1228             :         /* O_TRUNC implies we need access checks for write permissions */
    1229           0 :         if (flags & O_TRUNC)
    1230           0 :                 acc_mode |= MAY_WRITE;
    1231             : 
    1232             :         /* Allow the LSM permission hook to distinguish append
    1233             :            access from general write access. */
    1234           0 :         if (flags & O_APPEND)
    1235           0 :                 acc_mode |= MAY_APPEND;
    1236             : 
    1237           0 :         op->acc_mode = acc_mode;
    1238             : 
    1239           0 :         op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
    1240             : 
    1241           0 :         if (flags & O_CREAT) {
    1242           0 :                 op->intent |= LOOKUP_CREATE;
    1243           0 :                 if (flags & O_EXCL) {
    1244           0 :                         op->intent |= LOOKUP_EXCL;
    1245           0 :                         flags |= O_NOFOLLOW;
    1246             :                 }
    1247             :         }
    1248             : 
    1249           0 :         if (flags & O_DIRECTORY)
    1250           0 :                 lookup_flags |= LOOKUP_DIRECTORY;
    1251           0 :         if (!(flags & O_NOFOLLOW))
    1252           0 :                 lookup_flags |= LOOKUP_FOLLOW;
    1253             : 
    1254           0 :         if (how->resolve & RESOLVE_NO_XDEV)
    1255           0 :                 lookup_flags |= LOOKUP_NO_XDEV;
    1256           0 :         if (how->resolve & RESOLVE_NO_MAGICLINKS)
    1257           0 :                 lookup_flags |= LOOKUP_NO_MAGICLINKS;
    1258           0 :         if (how->resolve & RESOLVE_NO_SYMLINKS)
    1259           0 :                 lookup_flags |= LOOKUP_NO_SYMLINKS;
    1260           0 :         if (how->resolve & RESOLVE_BENEATH)
    1261           0 :                 lookup_flags |= LOOKUP_BENEATH;
    1262           0 :         if (how->resolve & RESOLVE_IN_ROOT)
    1263           0 :                 lookup_flags |= LOOKUP_IN_ROOT;
    1264           0 :         if (how->resolve & RESOLVE_CACHED) {
    1265             :                 /* Don't bother even trying for create/truncate/tmpfile open */
    1266           0 :                 if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
    1267             :                         return -EAGAIN;
    1268           0 :                 lookup_flags |= LOOKUP_CACHED;
    1269             :         }
    1270             : 
    1271           0 :         op->lookup_flags = lookup_flags;
    1272           0 :         return 0;
    1273             : }
    1274             : 
    1275             : /**
    1276             :  * file_open_name - open file and return file pointer
    1277             :  *
    1278             :  * @name:       struct filename containing path to open
    1279             :  * @flags:      open flags as per the open(2) second argument
    1280             :  * @mode:       mode for the new file if O_CREAT is set, else ignored
    1281             :  *
    1282             :  * This is the helper to open a file from kernelspace if you really
    1283             :  * have to.  But in generally you should not do this, so please move
    1284             :  * along, nothing to see here..
    1285             :  */
    1286           0 : struct file *file_open_name(struct filename *name, int flags, umode_t mode)
    1287             : {
    1288             :         struct open_flags op;
    1289           0 :         struct open_how how = build_open_how(flags, mode);
    1290           0 :         int err = build_open_flags(&how, &op);
    1291           0 :         if (err)
    1292           0 :                 return ERR_PTR(err);
    1293           0 :         return do_filp_open(AT_FDCWD, name, &op);
    1294             : }
    1295             : 
    1296             : /**
    1297             :  * filp_open - open file and return file pointer
    1298             :  *
    1299             :  * @filename:   path to open
    1300             :  * @flags:      open flags as per the open(2) second argument
    1301             :  * @mode:       mode for the new file if O_CREAT is set, else ignored
    1302             :  *
    1303             :  * This is the helper to open a file from kernelspace if you really
    1304             :  * have to.  But in generally you should not do this, so please move
    1305             :  * along, nothing to see here..
    1306             :  */
    1307           0 : struct file *filp_open(const char *filename, int flags, umode_t mode)
    1308             : {
    1309           0 :         struct filename *name = getname_kernel(filename);
    1310           0 :         struct file *file = ERR_CAST(name);
    1311             :         
    1312           0 :         if (!IS_ERR(name)) {
    1313           0 :                 file = file_open_name(name, flags, mode);
    1314           0 :                 putname(name);
    1315             :         }
    1316           0 :         return file;
    1317             : }
    1318             : EXPORT_SYMBOL(filp_open);
    1319             : 
    1320           0 : struct file *file_open_root(const struct path *root,
    1321             :                             const char *filename, int flags, umode_t mode)
    1322             : {
    1323             :         struct open_flags op;
    1324           0 :         struct open_how how = build_open_how(flags, mode);
    1325           0 :         int err = build_open_flags(&how, &op);
    1326           0 :         if (err)
    1327           0 :                 return ERR_PTR(err);
    1328           0 :         return do_file_open_root(root, filename, &op);
    1329             : }
    1330             : EXPORT_SYMBOL(file_open_root);
    1331             : 
    1332           0 : static long do_sys_openat2(int dfd, const char __user *filename,
    1333             :                            struct open_how *how)
    1334             : {
    1335             :         struct open_flags op;
    1336           0 :         int fd = build_open_flags(how, &op);
    1337             :         struct filename *tmp;
    1338             : 
    1339           0 :         if (fd)
    1340           0 :                 return fd;
    1341             : 
    1342           0 :         tmp = getname(filename);
    1343           0 :         if (IS_ERR(tmp))
    1344           0 :                 return PTR_ERR(tmp);
    1345             : 
    1346           0 :         fd = get_unused_fd_flags(how->flags);
    1347           0 :         if (fd >= 0) {
    1348           0 :                 struct file *f = do_filp_open(dfd, tmp, &op);
    1349           0 :                 if (IS_ERR(f)) {
    1350           0 :                         put_unused_fd(fd);
    1351           0 :                         fd = PTR_ERR(f);
    1352             :                 } else {
    1353           0 :                         fsnotify_open(f);
    1354           0 :                         fd_install(fd, f);
    1355             :                 }
    1356             :         }
    1357           0 :         putname(tmp);
    1358           0 :         return fd;
    1359             : }
    1360             : 
    1361           0 : long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
    1362             : {
    1363           0 :         struct open_how how = build_open_how(flags, mode);
    1364           0 :         return do_sys_openat2(dfd, filename, &how);
    1365             : }
    1366             : 
    1367             : 
    1368           0 : SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
    1369             : {
    1370             :         if (force_o_largefile())
    1371           0 :                 flags |= O_LARGEFILE;
    1372           0 :         return do_sys_open(AT_FDCWD, filename, flags, mode);
    1373             : }
    1374             : 
    1375           0 : SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
    1376             :                 umode_t, mode)
    1377             : {
    1378             :         if (force_o_largefile())
    1379           0 :                 flags |= O_LARGEFILE;
    1380           0 :         return do_sys_open(dfd, filename, flags, mode);
    1381             : }
    1382             : 
    1383           0 : SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
    1384             :                 struct open_how __user *, how, size_t, usize)
    1385             : {
    1386             :         int err;
    1387             :         struct open_how tmp;
    1388             : 
    1389             :         BUILD_BUG_ON(sizeof(struct open_how) < OPEN_HOW_SIZE_VER0);
    1390             :         BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_LATEST);
    1391             : 
    1392           0 :         if (unlikely(usize < OPEN_HOW_SIZE_VER0))
    1393             :                 return -EINVAL;
    1394             : 
    1395           0 :         err = copy_struct_from_user(&tmp, sizeof(tmp), how, usize);
    1396           0 :         if (err)
    1397           0 :                 return err;
    1398             : 
    1399           0 :         audit_openat2_how(&tmp);
    1400             : 
    1401             :         /* O_LARGEFILE is only allowed for non-O_PATH. */
    1402           0 :         if (!(tmp.flags & O_PATH) && force_o_largefile())
    1403           0 :                 tmp.flags |= O_LARGEFILE;
    1404             : 
    1405           0 :         return do_sys_openat2(dfd, filename, &tmp);
    1406             : }
    1407             : 
    1408             : #ifdef CONFIG_COMPAT
    1409             : /*
    1410             :  * Exactly like sys_open(), except that it doesn't set the
    1411             :  * O_LARGEFILE flag.
    1412             :  */
    1413             : COMPAT_SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
    1414             : {
    1415             :         return do_sys_open(AT_FDCWD, filename, flags, mode);
    1416             : }
    1417             : 
    1418             : /*
    1419             :  * Exactly like sys_openat(), except that it doesn't set the
    1420             :  * O_LARGEFILE flag.
    1421             :  */
    1422             : COMPAT_SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, umode_t, mode)
    1423             : {
    1424             :         return do_sys_open(dfd, filename, flags, mode);
    1425             : }
    1426             : #endif
    1427             : 
    1428             : #ifndef __alpha__
    1429             : 
    1430             : /*
    1431             :  * For backward compatibility?  Maybe this should be moved
    1432             :  * into arch/i386 instead?
    1433             :  */
    1434           0 : SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
    1435             : {
    1436           0 :         int flags = O_CREAT | O_WRONLY | O_TRUNC;
    1437             : 
    1438             :         if (force_o_largefile())
    1439           0 :                 flags |= O_LARGEFILE;
    1440           0 :         return do_sys_open(AT_FDCWD, pathname, flags, mode);
    1441             : }
    1442             : #endif
    1443             : 
    1444             : /*
    1445             :  * "id" is the POSIX thread ID. We use the
    1446             :  * files pointer for this..
    1447             :  */
    1448           0 : int filp_close(struct file *filp, fl_owner_t id)
    1449             : {
    1450           0 :         int retval = 0;
    1451             : 
    1452           0 :         if (CHECK_DATA_CORRUPTION(file_count(filp) == 0,
    1453             :                         "VFS: Close: file count is 0 (f_op=%ps)",
    1454             :                         filp->f_op)) {
    1455             :                 return 0;
    1456             :         }
    1457             : 
    1458           0 :         if (filp->f_op->flush)
    1459           0 :                 retval = filp->f_op->flush(filp, id);
    1460             : 
    1461           0 :         if (likely(!(filp->f_mode & FMODE_PATH))) {
    1462           0 :                 dnotify_flush(filp, id);
    1463           0 :                 locks_remove_posix(filp, id);
    1464             :         }
    1465           0 :         fput(filp);
    1466           0 :         return retval;
    1467             : }
    1468             : 
    1469             : EXPORT_SYMBOL(filp_close);
    1470             : 
    1471             : /*
    1472             :  * Careful here! We test whether the file pointer is NULL before
    1473             :  * releasing the fd. This ensures that one clone task can't release
    1474             :  * an fd while another clone is opening it.
    1475             :  */
    1476           0 : SYSCALL_DEFINE1(close, unsigned int, fd)
    1477             : {
    1478           0 :         int retval = close_fd(fd);
    1479             : 
    1480             :         /* can't restart close syscall because file table entry was cleared */
    1481           0 :         if (unlikely(retval == -ERESTARTSYS ||
    1482             :                      retval == -ERESTARTNOINTR ||
    1483             :                      retval == -ERESTARTNOHAND ||
    1484             :                      retval == -ERESTART_RESTARTBLOCK))
    1485           0 :                 retval = -EINTR;
    1486             : 
    1487           0 :         return retval;
    1488             : }
    1489             : 
    1490             : /**
    1491             :  * close_range() - Close all file descriptors in a given range.
    1492             :  *
    1493             :  * @fd:     starting file descriptor to close
    1494             :  * @max_fd: last file descriptor to close
    1495             :  * @flags:  reserved for future extensions
    1496             :  *
    1497             :  * This closes a range of file descriptors. All file descriptors
    1498             :  * from @fd up to and including @max_fd are closed.
    1499             :  * Currently, errors to close a given file descriptor are ignored.
    1500             :  */
    1501           0 : SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
    1502             :                 unsigned int, flags)
    1503             : {
    1504           0 :         return __close_range(fd, max_fd, flags);
    1505             : }
    1506             : 
    1507             : /*
    1508             :  * This routine simulates a hangup on the tty, to arrange that users
    1509             :  * are given clean terminals at login time.
    1510             :  */
    1511           0 : SYSCALL_DEFINE0(vhangup)
    1512             : {
    1513           0 :         if (capable(CAP_SYS_TTY_CONFIG)) {
    1514           0 :                 tty_vhangup_self();
    1515           0 :                 return 0;
    1516             :         }
    1517             :         return -EPERM;
    1518             : }
    1519             : 
    1520             : /*
    1521             :  * Called when an inode is about to be open.
    1522             :  * We use this to disallow opening large files on 32bit systems if
    1523             :  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
    1524             :  * on this flag in sys_open.
    1525             :  */
    1526           0 : int generic_file_open(struct inode * inode, struct file * filp)
    1527             : {
    1528           0 :         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
    1529             :                 return -EOVERFLOW;
    1530             :         return 0;
    1531             : }
    1532             : 
    1533             : EXPORT_SYMBOL(generic_file_open);
    1534             : 
    1535             : /*
    1536             :  * This is used by subsystems that don't want seekable
    1537             :  * file descriptors. The function is not supposed to ever fail, the only
    1538             :  * reason it returns an 'int' and not 'void' is so that it can be plugged
    1539             :  * directly into file_operations structure.
    1540             :  */
    1541           0 : int nonseekable_open(struct inode *inode, struct file *filp)
    1542             : {
    1543           0 :         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
    1544           0 :         return 0;
    1545             : }
    1546             : 
    1547             : EXPORT_SYMBOL(nonseekable_open);
    1548             : 
    1549             : /*
    1550             :  * stream_open is used by subsystems that want stream-like file descriptors.
    1551             :  * Such file descriptors are not seekable and don't have notion of position
    1552             :  * (file.f_pos is always 0 and ppos passed to .read()/.write() is always NULL).
    1553             :  * Contrary to file descriptors of other regular files, .read() and .write()
    1554             :  * can run simultaneously.
    1555             :  *
    1556             :  * stream_open never fails and is marked to return int so that it could be
    1557             :  * directly used as file_operations.open .
    1558             :  */
    1559           0 : int stream_open(struct inode *inode, struct file *filp)
    1560             : {
    1561           0 :         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
    1562           0 :         filp->f_mode |= FMODE_STREAM;
    1563           0 :         return 0;
    1564             : }
    1565             : 
    1566             : EXPORT_SYMBOL(stream_open);

Generated by: LCOV version 1.14