LCOV - code coverage report
Current view: top level - fs - namei.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 359 1892 19.0 %
Date: 2023-03-27 20:00:47 Functions: 38 152 25.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/namei.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  */
       7             : 
       8             : /*
       9             :  * Some corrections by tytso.
      10             :  */
      11             : 
      12             : /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
      13             :  * lookup logic.
      14             :  */
      15             : /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
      16             :  */
      17             : 
      18             : #include <linux/init.h>
      19             : #include <linux/export.h>
      20             : #include <linux/kernel.h>
      21             : #include <linux/slab.h>
      22             : #include <linux/fs.h>
      23             : #include <linux/filelock.h>
      24             : #include <linux/namei.h>
      25             : #include <linux/pagemap.h>
      26             : #include <linux/sched/mm.h>
      27             : #include <linux/fsnotify.h>
      28             : #include <linux/personality.h>
      29             : #include <linux/security.h>
      30             : #include <linux/ima.h>
      31             : #include <linux/syscalls.h>
      32             : #include <linux/mount.h>
      33             : #include <linux/audit.h>
      34             : #include <linux/capability.h>
      35             : #include <linux/file.h>
      36             : #include <linux/fcntl.h>
      37             : #include <linux/device_cgroup.h>
      38             : #include <linux/fs_struct.h>
      39             : #include <linux/posix_acl.h>
      40             : #include <linux/hash.h>
      41             : #include <linux/bitops.h>
      42             : #include <linux/init_task.h>
      43             : #include <linux/uaccess.h>
      44             : 
      45             : #include "internal.h"
      46             : #include "mount.h"
      47             : 
      48             : /* [Feb-1997 T. Schoebel-Theuer]
      49             :  * Fundamental changes in the pathname lookup mechanisms (namei)
      50             :  * were necessary because of omirr.  The reason is that omirr needs
      51             :  * to know the _real_ pathname, not the user-supplied one, in case
      52             :  * of symlinks (and also when transname replacements occur).
      53             :  *
      54             :  * The new code replaces the old recursive symlink resolution with
      55             :  * an iterative one (in case of non-nested symlink chains).  It does
      56             :  * this with calls to <fs>_follow_link().
      57             :  * As a side effect, dir_namei(), _namei() and follow_link() are now 
      58             :  * replaced with a single function lookup_dentry() that can handle all 
      59             :  * the special cases of the former code.
      60             :  *
      61             :  * With the new dcache, the pathname is stored at each inode, at least as
      62             :  * long as the refcount of the inode is positive.  As a side effect, the
      63             :  * size of the dcache depends on the inode cache and thus is dynamic.
      64             :  *
      65             :  * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
      66             :  * resolution to correspond with current state of the code.
      67             :  *
      68             :  * Note that the symlink resolution is not *completely* iterative.
      69             :  * There is still a significant amount of tail- and mid- recursion in
      70             :  * the algorithm.  Also, note that <fs>_readlink() is not used in
      71             :  * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
      72             :  * may return different results than <fs>_follow_link().  Many virtual
      73             :  * filesystems (including /proc) exhibit this behavior.
      74             :  */
      75             : 
      76             : /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
      77             :  * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
      78             :  * and the name already exists in form of a symlink, try to create the new
      79             :  * name indicated by the symlink. The old code always complained that the
      80             :  * name already exists, due to not following the symlink even if its target
      81             :  * is nonexistent.  The new semantics affects also mknod() and link() when
      82             :  * the name is a symlink pointing to a non-existent name.
      83             :  *
      84             :  * I don't know which semantics is the right one, since I have no access
      85             :  * to standards. But I found by trial that HP-UX 9.0 has the full "new"
      86             :  * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
      87             :  * "old" one. Personally, I think the new semantics is much more logical.
      88             :  * Note that "ln old new" where "new" is a symlink pointing to a non-existing
      89             :  * file does succeed in both HP-UX and SunOs, but not in Solaris
      90             :  * and in the old Linux semantics.
      91             :  */
      92             : 
      93             : /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
      94             :  * semantics.  See the comments in "open_namei" and "do_link" below.
      95             :  *
      96             :  * [10-Sep-98 Alan Modra] Another symlink change.
      97             :  */
      98             : 
      99             : /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
     100             :  *      inside the path - always follow.
     101             :  *      in the last component in creation/removal/renaming - never follow.
     102             :  *      if LOOKUP_FOLLOW passed - follow.
     103             :  *      if the pathname has trailing slashes - follow.
     104             :  *      otherwise - don't follow.
     105             :  * (applied in that order).
     106             :  *
     107             :  * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
     108             :  * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
     109             :  * During the 2.4 we need to fix the userland stuff depending on it -
     110             :  * hopefully we will be able to get rid of that wart in 2.5. So far only
     111             :  * XEmacs seems to be relying on it...
     112             :  */
     113             : /*
     114             :  * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
     115             :  * implemented.  Let's see if raised priority of ->s_vfs_rename_mutex gives
     116             :  * any extra contention...
     117             :  */
     118             : 
     119             : /* In order to reduce some races, while at the same time doing additional
     120             :  * checking and hopefully speeding things up, we copy filenames to the
     121             :  * kernel data space before using them..
     122             :  *
     123             :  * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
     124             :  * PATH_MAX includes the nul terminator --RR.
     125             :  */
     126             : 
     127             : #define EMBEDDED_NAME_MAX       (PATH_MAX - offsetof(struct filename, iname))
     128             : 
     129             : struct filename *
     130           0 : getname_flags(const char __user *filename, int flags, int *empty)
     131             : {
     132             :         struct filename *result;
     133             :         char *kname;
     134             :         int len;
     135             : 
     136           0 :         result = audit_reusename(filename);
     137             :         if (result)
     138             :                 return result;
     139             : 
     140           0 :         result = __getname();
     141           0 :         if (unlikely(!result))
     142             :                 return ERR_PTR(-ENOMEM);
     143             : 
     144             :         /*
     145             :          * First, try to embed the struct filename inside the names_cache
     146             :          * allocation
     147             :          */
     148           0 :         kname = (char *)result->iname;
     149           0 :         result->name = kname;
     150             : 
     151           0 :         len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX);
     152           0 :         if (unlikely(len < 0)) {
     153           0 :                 __putname(result);
     154           0 :                 return ERR_PTR(len);
     155             :         }
     156             : 
     157             :         /*
     158             :          * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
     159             :          * separate struct filename so we can dedicate the entire
     160             :          * names_cache allocation for the pathname, and re-do the copy from
     161             :          * userland.
     162             :          */
     163           0 :         if (unlikely(len == EMBEDDED_NAME_MAX)) {
     164           0 :                 const size_t size = offsetof(struct filename, iname[1]);
     165           0 :                 kname = (char *)result;
     166             : 
     167             :                 /*
     168             :                  * size is chosen that way we to guarantee that
     169             :                  * result->iname[0] is within the same object and that
     170             :                  * kname can't be equal to result->iname, no matter what.
     171             :                  */
     172           0 :                 result = kzalloc(size, GFP_KERNEL);
     173           0 :                 if (unlikely(!result)) {
     174           0 :                         __putname(kname);
     175           0 :                         return ERR_PTR(-ENOMEM);
     176             :                 }
     177           0 :                 result->name = kname;
     178           0 :                 len = strncpy_from_user(kname, filename, PATH_MAX);
     179           0 :                 if (unlikely(len < 0)) {
     180           0 :                         __putname(kname);
     181           0 :                         kfree(result);
     182           0 :                         return ERR_PTR(len);
     183             :                 }
     184           0 :                 if (unlikely(len == PATH_MAX)) {
     185           0 :                         __putname(kname);
     186           0 :                         kfree(result);
     187           0 :                         return ERR_PTR(-ENAMETOOLONG);
     188             :                 }
     189             :         }
     190             : 
     191           0 :         result->refcnt = 1;
     192             :         /* The empty path is special. */
     193           0 :         if (unlikely(!len)) {
     194           0 :                 if (empty)
     195           0 :                         *empty = 1;
     196           0 :                 if (!(flags & LOOKUP_EMPTY)) {
     197           0 :                         putname(result);
     198           0 :                         return ERR_PTR(-ENOENT);
     199             :                 }
     200             :         }
     201             : 
     202           0 :         result->uptr = filename;
     203           0 :         result->aname = NULL;
     204           0 :         audit_getname(result);
     205           0 :         return result;
     206             : }
     207             : 
     208             : struct filename *
     209           0 : getname_uflags(const char __user *filename, int uflags)
     210             : {
     211           0 :         int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
     212             : 
     213           0 :         return getname_flags(filename, flags, NULL);
     214             : }
     215             : 
     216             : struct filename *
     217           0 : getname(const char __user * filename)
     218             : {
     219           0 :         return getname_flags(filename, 0, NULL);
     220             : }
     221             : 
     222             : struct filename *
     223           3 : getname_kernel(const char * filename)
     224             : {
     225             :         struct filename *result;
     226           3 :         int len = strlen(filename) + 1;
     227             : 
     228           3 :         result = __getname();
     229           3 :         if (unlikely(!result))
     230             :                 return ERR_PTR(-ENOMEM);
     231             : 
     232           3 :         if (len <= EMBEDDED_NAME_MAX) {
     233           3 :                 result->name = (char *)result->iname;
     234           0 :         } else if (len <= PATH_MAX) {
     235           0 :                 const size_t size = offsetof(struct filename, iname[1]);
     236             :                 struct filename *tmp;
     237             : 
     238           0 :                 tmp = kmalloc(size, GFP_KERNEL);
     239           0 :                 if (unlikely(!tmp)) {
     240           0 :                         __putname(result);
     241           0 :                         return ERR_PTR(-ENOMEM);
     242             :                 }
     243           0 :                 tmp->name = (char *)result;
     244           0 :                 result = tmp;
     245             :         } else {
     246           0 :                 __putname(result);
     247           0 :                 return ERR_PTR(-ENAMETOOLONG);
     248             :         }
     249           3 :         memcpy((char *)result->name, filename, len);
     250           3 :         result->uptr = NULL;
     251           3 :         result->aname = NULL;
     252           3 :         result->refcnt = 1;
     253           3 :         audit_getname(result);
     254             : 
     255           3 :         return result;
     256             : }
     257             : 
     258           3 : void putname(struct filename *name)
     259             : {
     260           3 :         if (IS_ERR(name))
     261             :                 return;
     262             : 
     263           3 :         BUG_ON(name->refcnt <= 0);
     264             : 
     265           3 :         if (--name->refcnt > 0)
     266             :                 return;
     267             : 
     268           3 :         if (name->name != name->iname) {
     269           0 :                 __putname(name->name);
     270           0 :                 kfree(name);
     271             :         } else
     272           3 :                 __putname(name);
     273             : }
     274             : 
     275             : /**
     276             :  * check_acl - perform ACL permission checking
     277             :  * @idmap:      idmap of the mount the inode was found from
     278             :  * @inode:      inode to check permissions on
     279             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
     280             :  *
     281             :  * This function performs the ACL permission checking. Since this function
     282             :  * retrieve POSIX acls it needs to know whether it is called from a blocking or
     283             :  * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
     284             :  *
     285             :  * If the inode has been found through an idmapped mount the idmap of
     286             :  * the vfsmount must be passed through @idmap. This function will then take
     287             :  * care to map the inode according to @idmap before checking permissions.
     288             :  * On non-idmapped mounts or if permission checking is to be performed on the
     289             :  * raw inode simply passs @nop_mnt_idmap.
     290             :  */
     291             : static int check_acl(struct mnt_idmap *idmap,
     292             :                      struct inode *inode, int mask)
     293             : {
     294             : #ifdef CONFIG_FS_POSIX_ACL
     295             :         struct posix_acl *acl;
     296             : 
     297             :         if (mask & MAY_NOT_BLOCK) {
     298             :                 acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS);
     299             :                 if (!acl)
     300             :                         return -EAGAIN;
     301             :                 /* no ->get_inode_acl() calls in RCU mode... */
     302             :                 if (is_uncached_acl(acl))
     303             :                         return -ECHILD;
     304             :                 return posix_acl_permission(idmap, inode, acl, mask);
     305             :         }
     306             : 
     307             :         acl = get_inode_acl(inode, ACL_TYPE_ACCESS);
     308             :         if (IS_ERR(acl))
     309             :                 return PTR_ERR(acl);
     310             :         if (acl) {
     311             :                 int error = posix_acl_permission(idmap, inode, acl, mask);
     312             :                 posix_acl_release(acl);
     313             :                 return error;
     314             :         }
     315             : #endif
     316             : 
     317             :         return -EAGAIN;
     318             : }
     319             : 
     320             : /**
     321             :  * acl_permission_check - perform basic UNIX permission checking
     322             :  * @idmap:      idmap of the mount the inode was found from
     323             :  * @inode:      inode to check permissions on
     324             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
     325             :  *
     326             :  * This function performs the basic UNIX permission checking. Since this
     327             :  * function may retrieve POSIX acls it needs to know whether it is called from a
     328             :  * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
     329             :  *
     330             :  * If the inode has been found through an idmapped mount the idmap of
     331             :  * the vfsmount must be passed through @idmap. This function will then take
     332             :  * care to map the inode according to @idmap before checking permissions.
     333             :  * On non-idmapped mounts or if permission checking is to be performed on the
     334             :  * raw inode simply passs @nop_mnt_idmap.
     335             :  */
     336           7 : static int acl_permission_check(struct mnt_idmap *idmap,
     337             :                                 struct inode *inode, int mask)
     338             : {
     339           7 :         unsigned int mode = inode->i_mode;
     340             :         vfsuid_t vfsuid;
     341             : 
     342             :         /* Are we the owner? If so, ACL's don't matter */
     343           7 :         vfsuid = i_uid_into_vfsuid(idmap, inode);
     344          14 :         if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) {
     345           7 :                 mask &= 7;
     346           7 :                 mode >>= 6;
     347           7 :                 return (mask & ~mode) ? -EACCES : 0;
     348             :         }
     349             : 
     350             :         /* Do we have ACL's? */
     351           0 :         if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
     352             :                 int error = check_acl(idmap, inode, mask);
     353             :                 if (error != -EAGAIN)
     354             :                         return error;
     355             :         }
     356             : 
     357             :         /* Only RWX matters for group/other mode bits */
     358           0 :         mask &= 7;
     359             : 
     360             :         /*
     361             :          * Are the group permissions different from
     362             :          * the other permissions in the bits we care
     363             :          * about? Need to check group ownership if so.
     364             :          */
     365           0 :         if (mask & (mode ^ (mode >> 3))) {
     366           0 :                 vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
     367           0 :                 if (vfsgid_in_group_p(vfsgid))
     368           0 :                         mode >>= 3;
     369             :         }
     370             : 
     371             :         /* Bits in 'mode' clear that we require? */
     372           0 :         return (mask & ~mode) ? -EACCES : 0;
     373             : }
     374             : 
     375             : /**
     376             :  * generic_permission -  check for access rights on a Posix-like filesystem
     377             :  * @idmap:      idmap of the mount the inode was found from
     378             :  * @inode:      inode to check access rights for
     379             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
     380             :  *              %MAY_NOT_BLOCK ...)
     381             :  *
     382             :  * Used to check for read/write/execute permissions on a file.
     383             :  * We use "fsuid" for this, letting us set arbitrary permissions
     384             :  * for filesystem access without changing the "normal" uids which
     385             :  * are used for other things.
     386             :  *
     387             :  * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
     388             :  * request cannot be satisfied (eg. requires blocking or too much complexity).
     389             :  * It would then be called again in ref-walk mode.
     390             :  *
     391             :  * If the inode has been found through an idmapped mount the idmap of
     392             :  * the vfsmount must be passed through @idmap. This function will then take
     393             :  * care to map the inode according to @idmap before checking permissions.
     394             :  * On non-idmapped mounts or if permission checking is to be performed on the
     395             :  * raw inode simply passs @nop_mnt_idmap.
     396             :  */
     397           7 : int generic_permission(struct mnt_idmap *idmap, struct inode *inode,
     398             :                        int mask)
     399             : {
     400             :         int ret;
     401             : 
     402             :         /*
     403             :          * Do the basic permission checks.
     404             :          */
     405           7 :         ret = acl_permission_check(idmap, inode, mask);
     406           7 :         if (ret != -EACCES)
     407             :                 return ret;
     408             : 
     409           0 :         if (S_ISDIR(inode->i_mode)) {
     410             :                 /* DACs are overridable for directories */
     411           0 :                 if (!(mask & MAY_WRITE))
     412           0 :                         if (capable_wrt_inode_uidgid(idmap, inode,
     413             :                                                      CAP_DAC_READ_SEARCH))
     414             :                                 return 0;
     415           0 :                 if (capable_wrt_inode_uidgid(idmap, inode,
     416             :                                              CAP_DAC_OVERRIDE))
     417             :                         return 0;
     418           0 :                 return -EACCES;
     419             :         }
     420             : 
     421             :         /*
     422             :          * Searching includes executable on directories, else just read.
     423             :          */
     424           0 :         mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
     425           0 :         if (mask == MAY_READ)
     426           0 :                 if (capable_wrt_inode_uidgid(idmap, inode,
     427             :                                              CAP_DAC_READ_SEARCH))
     428             :                         return 0;
     429             :         /*
     430             :          * Read/write DACs are always overridable.
     431             :          * Executable DACs are overridable when there is
     432             :          * at least one exec bit set.
     433             :          */
     434           0 :         if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
     435           0 :                 if (capable_wrt_inode_uidgid(idmap, inode,
     436             :                                              CAP_DAC_OVERRIDE))
     437             :                         return 0;
     438             : 
     439             :         return -EACCES;
     440             : }
     441             : EXPORT_SYMBOL(generic_permission);
     442             : 
     443             : /**
     444             :  * do_inode_permission - UNIX permission checking
     445             :  * @idmap:      idmap of the mount the inode was found from
     446             :  * @inode:      inode to check permissions on
     447             :  * @mask:       right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
     448             :  *
     449             :  * We _really_ want to just do "generic_permission()" without
     450             :  * even looking at the inode->i_op values. So we keep a cache
     451             :  * flag in inode->i_opflags, that says "this has not special
     452             :  * permission function, use the fast case".
     453             :  */
     454           7 : static inline int do_inode_permission(struct mnt_idmap *idmap,
     455             :                                       struct inode *inode, int mask)
     456             : {
     457           7 :         if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
     458           2 :                 if (likely(inode->i_op->permission))
     459           0 :                         return inode->i_op->permission(idmap, inode, mask);
     460             : 
     461             :                 /* This gets set once for the inode lifetime */
     462           4 :                 spin_lock(&inode->i_lock);
     463           2 :                 inode->i_opflags |= IOP_FASTPERM;
     464           2 :                 spin_unlock(&inode->i_lock);
     465             :         }
     466           7 :         return generic_permission(idmap, inode, mask);
     467             : }
     468             : 
     469             : /**
     470             :  * sb_permission - Check superblock-level permissions
     471             :  * @sb: Superblock of inode to check permission on
     472             :  * @inode: Inode to check permission on
     473             :  * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
     474             :  *
     475             :  * Separate out file-system wide checks from inode-specific permission checks.
     476             :  */
     477             : static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
     478             : {
     479           7 :         if (unlikely(mask & MAY_WRITE)) {
     480           3 :                 umode_t mode = inode->i_mode;
     481             : 
     482             :                 /* Nobody gets write access to a read-only fs. */
     483           6 :                 if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
     484             :                         return -EROFS;
     485             :         }
     486             :         return 0;
     487             : }
     488             : 
     489             : /**
     490             :  * inode_permission - Check for access rights to a given inode
     491             :  * @idmap:      idmap of the mount the inode was found from
     492             :  * @inode:      Inode to check permission on
     493             :  * @mask:       Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
     494             :  *
     495             :  * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
     496             :  * this, letting us set arbitrary permissions for filesystem access without
     497             :  * changing the "normal" UIDs which are used for other things.
     498             :  *
     499             :  * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
     500             :  */
     501           7 : int inode_permission(struct mnt_idmap *idmap,
     502             :                      struct inode *inode, int mask)
     503             : {
     504             :         int retval;
     505             : 
     506          14 :         retval = sb_permission(inode->i_sb, inode, mask);
     507           7 :         if (retval)
     508             :                 return retval;
     509             : 
     510           7 :         if (unlikely(mask & MAY_WRITE)) {
     511             :                 /*
     512             :                  * Nobody gets write access to an immutable file.
     513             :                  */
     514           3 :                 if (IS_IMMUTABLE(inode))
     515             :                         return -EPERM;
     516             : 
     517             :                 /*
     518             :                  * Updating mtime will likely cause i_uid and i_gid to be
     519             :                  * written back improperly if their true value is unknown
     520             :                  * to the vfs.
     521             :                  */
     522           3 :                 if (HAS_UNMAPPED_ID(idmap, inode))
     523             :                         return -EACCES;
     524             :         }
     525             : 
     526           7 :         retval = do_inode_permission(idmap, inode, mask);
     527           7 :         if (retval)
     528             :                 return retval;
     529             : 
     530           7 :         retval = devcgroup_inode_permission(inode, mask);
     531             :         if (retval)
     532             :                 return retval;
     533             : 
     534           7 :         return security_inode_permission(inode, mask);
     535             : }
     536             : EXPORT_SYMBOL(inode_permission);
     537             : 
     538             : /**
     539             :  * path_get - get a reference to a path
     540             :  * @path: path to get the reference to
     541             :  *
     542             :  * Given a path increment the reference count to the dentry and the vfsmount.
     543             :  */
     544           2 : void path_get(const struct path *path)
     545             : {
     546           2 :         mntget(path->mnt);
     547           4 :         dget(path->dentry);
     548           2 : }
     549             : EXPORT_SYMBOL(path_get);
     550             : 
     551             : /**
     552             :  * path_put - put a reference to a path
     553             :  * @path: path to put the reference to
     554             :  *
     555             :  * Given a path decrement the reference count to the dentry and the vfsmount.
     556             :  */
     557           0 : void path_put(const struct path *path)
     558             : {
     559           6 :         dput(path->dentry);
     560           6 :         mntput(path->mnt);
     561           0 : }
     562             : EXPORT_SYMBOL(path_put);
     563             : 
     564             : #define EMBEDDED_LEVELS 2
     565             : struct nameidata {
     566             :         struct path     path;
     567             :         struct qstr     last;
     568             :         struct path     root;
     569             :         struct inode    *inode; /* path.dentry.d_inode */
     570             :         unsigned int    flags, state;
     571             :         unsigned        seq, next_seq, m_seq, r_seq;
     572             :         int             last_type;
     573             :         unsigned        depth;
     574             :         int             total_link_count;
     575             :         struct saved {
     576             :                 struct path link;
     577             :                 struct delayed_call done;
     578             :                 const char *name;
     579             :                 unsigned seq;
     580             :         } *stack, internal[EMBEDDED_LEVELS];
     581             :         struct filename *name;
     582             :         struct nameidata *saved;
     583             :         unsigned        root_seq;
     584             :         int             dfd;
     585             :         vfsuid_t        dir_vfsuid;
     586             :         umode_t         dir_mode;
     587             : } __randomize_layout;
     588             : 
     589             : #define ND_ROOT_PRESET 1
     590             : #define ND_ROOT_GRABBED 2
     591             : #define ND_JUMPED 4
     592             : 
     593             : static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name)
     594             : {
     595           3 :         struct nameidata *old = current->nameidata;
     596           3 :         p->stack = p->internal;
     597           3 :         p->depth = 0;
     598           3 :         p->dfd = dfd;
     599           3 :         p->name = name;
     600           3 :         p->path.mnt = NULL;
     601           3 :         p->path.dentry = NULL;
     602           3 :         p->total_link_count = old ? old->total_link_count : 0;
     603           3 :         p->saved = old;
     604           3 :         current->nameidata = p;
     605             : }
     606             : 
     607             : static inline void set_nameidata(struct nameidata *p, int dfd, struct filename *name,
     608             :                           const struct path *root)
     609             : {
     610           3 :         __set_nameidata(p, dfd, name);
     611           3 :         p->state = 0;
     612           0 :         if (unlikely(root)) {
     613           0 :                 p->state = ND_ROOT_PRESET;
     614           0 :                 p->root = *root;
     615             :         }
     616             : }
     617             : 
     618           3 : static void restore_nameidata(void)
     619             : {
     620           3 :         struct nameidata *now = current->nameidata, *old = now->saved;
     621             : 
     622           3 :         current->nameidata = old;
     623           3 :         if (old)
     624           0 :                 old->total_link_count = now->total_link_count;
     625           3 :         if (now->stack != now->internal)
     626           0 :                 kfree(now->stack);
     627           3 : }
     628             : 
     629           0 : static bool nd_alloc_stack(struct nameidata *nd)
     630             : {
     631             :         struct saved *p;
     632             : 
     633           0 :         p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
     634           0 :                          nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
     635           0 :         if (unlikely(!p))
     636             :                 return false;
     637           0 :         memcpy(p, nd->internal, sizeof(nd->internal));
     638           0 :         nd->stack = p;
     639           0 :         return true;
     640             : }
     641             : 
     642             : /**
     643             :  * path_connected - Verify that a dentry is below mnt.mnt_root
     644             :  *
     645             :  * Rename can sometimes move a file or directory outside of a bind
     646             :  * mount, path_connected allows those cases to be detected.
     647             :  */
     648             : static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
     649             : {
     650           0 :         struct super_block *sb = mnt->mnt_sb;
     651             : 
     652             :         /* Bind mounts can have disconnected paths */
     653           0 :         if (mnt->mnt_root == sb->s_root)
     654             :                 return true;
     655             : 
     656           0 :         return is_subdir(dentry, mnt->mnt_root);
     657             : }
     658             : 
     659             : static void drop_links(struct nameidata *nd)
     660             : {
     661           3 :         int i = nd->depth;
     662           3 :         while (i--) {
     663           0 :                 struct saved *last = nd->stack + i;
     664           0 :                 do_delayed_call(&last->done);
     665           0 :                 clear_delayed_call(&last->done);
     666             :         }
     667             : }
     668             : 
     669             : static void leave_rcu(struct nameidata *nd)
     670             : {
     671           3 :         nd->flags &= ~LOOKUP_RCU;
     672           3 :         nd->seq = nd->next_seq = 0;
     673             :         rcu_read_unlock();
     674             : }
     675             : 
     676           3 : static void terminate_walk(struct nameidata *nd)
     677             : {
     678           6 :         drop_links(nd);
     679           3 :         if (!(nd->flags & LOOKUP_RCU)) {
     680             :                 int i;
     681           6 :                 path_put(&nd->path);
     682           3 :                 for (i = 0; i < nd->depth; i++)
     683           0 :                         path_put(&nd->stack[i].link);
     684           3 :                 if (nd->state & ND_ROOT_GRABBED) {
     685           0 :                         path_put(&nd->root);
     686           0 :                         nd->state &= ~ND_ROOT_GRABBED;
     687             :                 }
     688             :         } else {
     689             :                 leave_rcu(nd);
     690             :         }
     691           3 :         nd->depth = 0;
     692           3 :         nd->path.mnt = NULL;
     693           3 :         nd->path.dentry = NULL;
     694           3 : }
     695             : 
     696             : /* path_put is needed afterwards regardless of success or failure */
     697           3 : static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
     698             : {
     699           3 :         int res = __legitimize_mnt(path->mnt, mseq);
     700           3 :         if (unlikely(res)) {
     701           0 :                 if (res > 0)
     702           0 :                         path->mnt = NULL;
     703           0 :                 path->dentry = NULL;
     704           0 :                 return false;
     705             :         }
     706           3 :         if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) {
     707           0 :                 path->dentry = NULL;
     708           0 :                 return false;
     709             :         }
     710           9 :         return !read_seqcount_retry(&path->dentry->d_seq, seq);
     711             : }
     712             : 
     713             : static inline bool legitimize_path(struct nameidata *nd,
     714             :                             struct path *path, unsigned seq)
     715             : {
     716           3 :         return __legitimize_path(path, seq, nd->m_seq);
     717             : }
     718             : 
     719           3 : static bool legitimize_links(struct nameidata *nd)
     720             : {
     721             :         int i;
     722           3 :         if (unlikely(nd->flags & LOOKUP_CACHED)) {
     723           0 :                 drop_links(nd);
     724           0 :                 nd->depth = 0;
     725           0 :                 return false;
     726             :         }
     727           0 :         for (i = 0; i < nd->depth; i++) {
     728           0 :                 struct saved *last = nd->stack + i;
     729           0 :                 if (unlikely(!legitimize_path(nd, &last->link, last->seq))) {
     730           0 :                         drop_links(nd);
     731           0 :                         nd->depth = i + 1;
     732           0 :                         return false;
     733             :                 }
     734             :         }
     735             :         return true;
     736             : }
     737             : 
     738           3 : static bool legitimize_root(struct nameidata *nd)
     739             : {
     740             :         /* Nothing to do if nd->root is zero or is managed by the VFS user. */
     741           3 :         if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET))
     742             :                 return true;
     743           0 :         nd->state |= ND_ROOT_GRABBED;
     744           0 :         return legitimize_path(nd, &nd->root, nd->root_seq);
     745             : }
     746             : 
     747             : /*
     748             :  * Path walking has 2 modes, rcu-walk and ref-walk (see
     749             :  * Documentation/filesystems/path-lookup.txt).  In situations when we can't
     750             :  * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
     751             :  * normal reference counts on dentries and vfsmounts to transition to ref-walk
     752             :  * mode.  Refcounts are grabbed at the last known good point before rcu-walk
     753             :  * got stuck, so ref-walk may continue from there. If this is not successful
     754             :  * (eg. a seqcount has changed), then failure is returned and it's up to caller
     755             :  * to restart the path walk from the beginning in ref-walk mode.
     756             :  */
     757             : 
     758             : /**
     759             :  * try_to_unlazy - try to switch to ref-walk mode.
     760             :  * @nd: nameidata pathwalk data
     761             :  * Returns: true on success, false on failure
     762             :  *
     763             :  * try_to_unlazy attempts to legitimize the current nd->path and nd->root
     764             :  * for ref-walk mode.
     765             :  * Must be called from rcu-walk context.
     766             :  * Nothing should touch nameidata between try_to_unlazy() failure and
     767             :  * terminate_walk().
     768             :  */
     769           3 : static bool try_to_unlazy(struct nameidata *nd)
     770             : {
     771           3 :         struct dentry *parent = nd->path.dentry;
     772             : 
     773           3 :         BUG_ON(!(nd->flags & LOOKUP_RCU));
     774             : 
     775           3 :         if (unlikely(!legitimize_links(nd)))
     776             :                 goto out1;
     777           6 :         if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
     778             :                 goto out;
     779           3 :         if (unlikely(!legitimize_root(nd)))
     780             :                 goto out;
     781           3 :         leave_rcu(nd);
     782           3 :         BUG_ON(nd->inode != parent->d_inode);
     783             :         return true;
     784             : 
     785             : out1:
     786           0 :         nd->path.mnt = NULL;
     787           0 :         nd->path.dentry = NULL;
     788             : out:
     789           0 :         leave_rcu(nd);
     790           0 :         return false;
     791             : }
     792             : 
     793             : /**
     794             :  * try_to_unlazy_next - try to switch to ref-walk mode.
     795             :  * @nd: nameidata pathwalk data
     796             :  * @dentry: next dentry to step into
     797             :  * Returns: true on success, false on failure
     798             :  *
     799             :  * Similar to try_to_unlazy(), but here we have the next dentry already
     800             :  * picked by rcu-walk and want to legitimize that in addition to the current
     801             :  * nd->path and nd->root for ref-walk mode.  Must be called from rcu-walk context.
     802             :  * Nothing should touch nameidata between try_to_unlazy_next() failure and
     803             :  * terminate_walk().
     804             :  */
     805           0 : static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
     806             : {
     807             :         int res;
     808           0 :         BUG_ON(!(nd->flags & LOOKUP_RCU));
     809             : 
     810           0 :         if (unlikely(!legitimize_links(nd)))
     811             :                 goto out2;
     812           0 :         res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
     813           0 :         if (unlikely(res)) {
     814           0 :                 if (res > 0)
     815             :                         goto out2;
     816             :                 goto out1;
     817             :         }
     818           0 :         if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
     819             :                 goto out1;
     820             : 
     821             :         /*
     822             :          * We need to move both the parent and the dentry from the RCU domain
     823             :          * to be properly refcounted. And the sequence number in the dentry
     824             :          * validates *both* dentry counters, since we checked the sequence
     825             :          * number of the parent after we got the child sequence number. So we
     826             :          * know the parent must still be valid if the child sequence number is
     827             :          */
     828           0 :         if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
     829             :                 goto out;
     830           0 :         if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
     831             :                 goto out_dput;
     832             :         /*
     833             :          * Sequence counts matched. Now make sure that the root is
     834             :          * still valid and get it if required.
     835             :          */
     836           0 :         if (unlikely(!legitimize_root(nd)))
     837             :                 goto out_dput;
     838           0 :         leave_rcu(nd);
     839           0 :         return true;
     840             : 
     841             : out2:
     842           0 :         nd->path.mnt = NULL;
     843             : out1:
     844           0 :         nd->path.dentry = NULL;
     845             : out:
     846           0 :         leave_rcu(nd);
     847           0 :         return false;
     848             : out_dput:
     849           0 :         leave_rcu(nd);
     850           0 :         dput(dentry);
     851           0 :         return false;
     852             : }
     853             : 
     854             : static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
     855             : {
     856           1 :         if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
     857           0 :                 return dentry->d_op->d_revalidate(dentry, flags);
     858             :         else
     859             :                 return 1;
     860             : }
     861             : 
     862             : /**
     863             :  * complete_walk - successful completion of path walk
     864             :  * @nd:  pointer nameidata
     865             :  *
     866             :  * If we had been in RCU mode, drop out of it and legitimize nd->path.
     867             :  * Revalidate the final result, unless we'd already done that during
     868             :  * the path walk or the filesystem doesn't ask for it.  Return 0 on
     869             :  * success, -error on failure.  In case of failure caller does not
     870             :  * need to drop nd->path.
     871             :  */
     872           3 : static int complete_walk(struct nameidata *nd)
     873             : {
     874           3 :         struct dentry *dentry = nd->path.dentry;
     875             :         int status;
     876             : 
     877           3 :         if (nd->flags & LOOKUP_RCU) {
     878             :                 /*
     879             :                  * We don't want to zero nd->root for scoped-lookups or
     880             :                  * externally-managed nd->root.
     881             :                  */
     882           3 :                 if (!(nd->state & ND_ROOT_PRESET))
     883           3 :                         if (!(nd->flags & LOOKUP_IS_SCOPED))
     884           3 :                                 nd->root.mnt = NULL;
     885           3 :                 nd->flags &= ~LOOKUP_CACHED;
     886           3 :                 if (!try_to_unlazy(nd))
     887             :                         return -ECHILD;
     888             :         }
     889             : 
     890           3 :         if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
     891             :                 /*
     892             :                  * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't
     893             :                  * ever step outside the root during lookup" and should already
     894             :                  * be guaranteed by the rest of namei, we want to avoid a namei
     895             :                  * BUG resulting in userspace being given a path that was not
     896             :                  * scoped within the root at some point during the lookup.
     897             :                  *
     898             :                  * So, do a final sanity-check to make sure that in the
     899             :                  * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED)
     900             :                  * we won't silently return an fd completely outside of the
     901             :                  * requested root to userspace.
     902             :                  *
     903             :                  * Userspace could move the path outside the root after this
     904             :                  * check, but as discussed elsewhere this is not a concern (the
     905             :                  * resolved file was inside the root at some point).
     906             :                  */
     907           0 :                 if (!path_is_under(&nd->path, &nd->root))
     908             :                         return -EXDEV;
     909             :         }
     910             : 
     911           3 :         if (likely(!(nd->state & ND_JUMPED)))
     912             :                 return 0;
     913             : 
     914           0 :         if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE)))
     915             :                 return 0;
     916             : 
     917           0 :         status = dentry->d_op->d_weak_revalidate(dentry, nd->flags);
     918           0 :         if (status > 0)
     919             :                 return 0;
     920             : 
     921           0 :         if (!status)
     922           0 :                 status = -ESTALE;
     923             : 
     924             :         return status;
     925             : }
     926             : 
     927           3 : static int set_root(struct nameidata *nd)
     928             : {
     929           3 :         struct fs_struct *fs = current->fs;
     930             : 
     931             :         /*
     932             :          * Jumping to the real root in a scoped-lookup is a BUG in namei, but we
     933             :          * still have to ensure it doesn't happen because it will cause a breakout
     934             :          * from the dirfd.
     935             :          */
     936           3 :         if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED))
     937             :                 return -ENOTRECOVERABLE;
     938             : 
     939           3 :         if (nd->flags & LOOKUP_RCU) {
     940             :                 unsigned seq;
     941             : 
     942             :                 do {
     943           6 :                         seq = read_seqcount_begin(&fs->seq);
     944           3 :                         nd->root = fs->root;
     945           9 :                         nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
     946           9 :                 } while (read_seqcount_retry(&fs->seq, seq));
     947             :         } else {
     948           0 :                 get_fs_root(fs, &nd->root);
     949           0 :                 nd->state |= ND_ROOT_GRABBED;
     950             :         }
     951             :         return 0;
     952             : }
     953             : 
     954           3 : static int nd_jump_root(struct nameidata *nd)
     955             : {
     956           3 :         if (unlikely(nd->flags & LOOKUP_BENEATH))
     957             :                 return -EXDEV;
     958           3 :         if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
     959             :                 /* Absolute path arguments to path_init() are allowed. */
     960           0 :                 if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt)
     961             :                         return -EXDEV;
     962             :         }
     963           3 :         if (!nd->root.mnt) {
     964           3 :                 int error = set_root(nd);
     965           3 :                 if (error)
     966             :                         return error;
     967             :         }
     968           3 :         if (nd->flags & LOOKUP_RCU) {
     969             :                 struct dentry *d;
     970           3 :                 nd->path = nd->root;
     971           3 :                 d = nd->path.dentry;
     972           3 :                 nd->inode = d->d_inode;
     973           3 :                 nd->seq = nd->root_seq;
     974           9 :                 if (read_seqcount_retry(&d->d_seq, nd->seq))
     975             :                         return -ECHILD;
     976             :         } else {
     977           0 :                 path_put(&nd->path);
     978           0 :                 nd->path = nd->root;
     979           0 :                 path_get(&nd->path);
     980           0 :                 nd->inode = nd->path.dentry->d_inode;
     981             :         }
     982           3 :         nd->state |= ND_JUMPED;
     983           3 :         return 0;
     984             : }
     985             : 
     986             : /*
     987             :  * Helper to directly jump to a known parsed path from ->get_link,
     988             :  * caller must have taken a reference to path beforehand.
     989             :  */
     990           0 : int nd_jump_link(const struct path *path)
     991             : {
     992           0 :         int error = -ELOOP;
     993           0 :         struct nameidata *nd = current->nameidata;
     994             : 
     995           0 :         if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS))
     996             :                 goto err;
     997             : 
     998           0 :         error = -EXDEV;
     999           0 :         if (unlikely(nd->flags & LOOKUP_NO_XDEV)) {
    1000           0 :                 if (nd->path.mnt != path->mnt)
    1001             :                         goto err;
    1002             :         }
    1003             :         /* Not currently safe for scoped-lookups. */
    1004           0 :         if (unlikely(nd->flags & LOOKUP_IS_SCOPED))
    1005             :                 goto err;
    1006             : 
    1007           0 :         path_put(&nd->path);
    1008           0 :         nd->path = *path;
    1009           0 :         nd->inode = nd->path.dentry->d_inode;
    1010           0 :         nd->state |= ND_JUMPED;
    1011           0 :         return 0;
    1012             : 
    1013             : err:
    1014           0 :         path_put(path);
    1015           0 :         return error;
    1016             : }
    1017             : 
    1018           0 : static inline void put_link(struct nameidata *nd)
    1019             : {
    1020           0 :         struct saved *last = nd->stack + --nd->depth;
    1021           0 :         do_delayed_call(&last->done);
    1022           0 :         if (!(nd->flags & LOOKUP_RCU))
    1023           0 :                 path_put(&last->link);
    1024           0 : }
    1025             : 
    1026             : static int sysctl_protected_symlinks __read_mostly;
    1027             : static int sysctl_protected_hardlinks __read_mostly;
    1028             : static int sysctl_protected_fifos __read_mostly;
    1029             : static int sysctl_protected_regular __read_mostly;
    1030             : 
    1031             : #ifdef CONFIG_SYSCTL
    1032             : static struct ctl_table namei_sysctls[] = {
    1033             :         {
    1034             :                 .procname       = "protected_symlinks",
    1035             :                 .data           = &sysctl_protected_symlinks,
    1036             :                 .maxlen         = sizeof(int),
    1037             :                 .mode           = 0644,
    1038             :                 .proc_handler   = proc_dointvec_minmax,
    1039             :                 .extra1         = SYSCTL_ZERO,
    1040             :                 .extra2         = SYSCTL_ONE,
    1041             :         },
    1042             :         {
    1043             :                 .procname       = "protected_hardlinks",
    1044             :                 .data           = &sysctl_protected_hardlinks,
    1045             :                 .maxlen         = sizeof(int),
    1046             :                 .mode           = 0644,
    1047             :                 .proc_handler   = proc_dointvec_minmax,
    1048             :                 .extra1         = SYSCTL_ZERO,
    1049             :                 .extra2         = SYSCTL_ONE,
    1050             :         },
    1051             :         {
    1052             :                 .procname       = "protected_fifos",
    1053             :                 .data           = &sysctl_protected_fifos,
    1054             :                 .maxlen         = sizeof(int),
    1055             :                 .mode           = 0644,
    1056             :                 .proc_handler   = proc_dointvec_minmax,
    1057             :                 .extra1         = SYSCTL_ZERO,
    1058             :                 .extra2         = SYSCTL_TWO,
    1059             :         },
    1060             :         {
    1061             :                 .procname       = "protected_regular",
    1062             :                 .data           = &sysctl_protected_regular,
    1063             :                 .maxlen         = sizeof(int),
    1064             :                 .mode           = 0644,
    1065             :                 .proc_handler   = proc_dointvec_minmax,
    1066             :                 .extra1         = SYSCTL_ZERO,
    1067             :                 .extra2         = SYSCTL_TWO,
    1068             :         },
    1069             :         { }
    1070             : };
    1071             : 
    1072           1 : static int __init init_fs_namei_sysctls(void)
    1073             : {
    1074           1 :         register_sysctl_init("fs", namei_sysctls);
    1075           1 :         return 0;
    1076             : }
    1077             : fs_initcall(init_fs_namei_sysctls);
    1078             : 
    1079             : #endif /* CONFIG_SYSCTL */
    1080             : 
    1081             : /**
    1082             :  * may_follow_link - Check symlink following for unsafe situations
    1083             :  * @nd: nameidata pathwalk data
    1084             :  *
    1085             :  * In the case of the sysctl_protected_symlinks sysctl being enabled,
    1086             :  * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
    1087             :  * in a sticky world-writable directory. This is to protect privileged
    1088             :  * processes from failing races against path names that may change out
    1089             :  * from under them by way of other users creating malicious symlinks.
    1090             :  * It will permit symlinks to be followed only when outside a sticky
    1091             :  * world-writable directory, or when the uid of the symlink and follower
    1092             :  * match, or when the directory owner matches the symlink's owner.
    1093             :  *
    1094             :  * Returns 0 if following the symlink is allowed, -ve on error.
    1095             :  */
    1096           0 : static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
    1097             : {
    1098             :         struct mnt_idmap *idmap;
    1099             :         vfsuid_t vfsuid;
    1100             : 
    1101           0 :         if (!sysctl_protected_symlinks)
    1102             :                 return 0;
    1103             : 
    1104           0 :         idmap = mnt_idmap(nd->path.mnt);
    1105           0 :         vfsuid = i_uid_into_vfsuid(idmap, inode);
    1106             :         /* Allowed if owner and follower match. */
    1107           0 :         if (vfsuid_eq_kuid(vfsuid, current_fsuid()))
    1108             :                 return 0;
    1109             : 
    1110             :         /* Allowed if parent directory not sticky and world-writable. */
    1111           0 :         if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
    1112             :                 return 0;
    1113             : 
    1114             :         /* Allowed if parent directory and link owner match. */
    1115           0 :         if (vfsuid_valid(nd->dir_vfsuid) && vfsuid_eq(nd->dir_vfsuid, vfsuid))
    1116             :                 return 0;
    1117             : 
    1118           0 :         if (nd->flags & LOOKUP_RCU)
    1119             :                 return -ECHILD;
    1120             : 
    1121           0 :         audit_inode(nd->name, nd->stack[0].link.dentry, 0);
    1122           0 :         audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link");
    1123           0 :         return -EACCES;
    1124             : }
    1125             : 
    1126             : /**
    1127             :  * safe_hardlink_source - Check for safe hardlink conditions
    1128             :  * @idmap: idmap of the mount the inode was found from
    1129             :  * @inode: the source inode to hardlink from
    1130             :  *
    1131             :  * Return false if at least one of the following conditions:
    1132             :  *    - inode is not a regular file
    1133             :  *    - inode is setuid
    1134             :  *    - inode is setgid and group-exec
    1135             :  *    - access failure for read and write
    1136             :  *
    1137             :  * Otherwise returns true.
    1138             :  */
    1139           0 : static bool safe_hardlink_source(struct mnt_idmap *idmap,
    1140             :                                  struct inode *inode)
    1141             : {
    1142           0 :         umode_t mode = inode->i_mode;
    1143             : 
    1144             :         /* Special files should not get pinned to the filesystem. */
    1145           0 :         if (!S_ISREG(mode))
    1146             :                 return false;
    1147             : 
    1148             :         /* Setuid files should not get pinned to the filesystem. */
    1149           0 :         if (mode & S_ISUID)
    1150             :                 return false;
    1151             : 
    1152             :         /* Executable setgid files should not get pinned to the filesystem. */
    1153           0 :         if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
    1154             :                 return false;
    1155             : 
    1156             :         /* Hardlinking to unreadable or unwritable sources is dangerous. */
    1157           0 :         if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE))
    1158             :                 return false;
    1159             : 
    1160           0 :         return true;
    1161             : }
    1162             : 
    1163             : /**
    1164             :  * may_linkat - Check permissions for creating a hardlink
    1165             :  * @idmap: idmap of the mount the inode was found from
    1166             :  * @link:  the source to hardlink from
    1167             :  *
    1168             :  * Block hardlink when all of:
    1169             :  *  - sysctl_protected_hardlinks enabled
    1170             :  *  - fsuid does not match inode
    1171             :  *  - hardlink source is unsafe (see safe_hardlink_source() above)
    1172             :  *  - not CAP_FOWNER in a namespace with the inode owner uid mapped
    1173             :  *
    1174             :  * If the inode has been found through an idmapped mount the idmap of
    1175             :  * the vfsmount must be passed through @idmap. This function will then take
    1176             :  * care to map the inode according to @idmap before checking permissions.
    1177             :  * On non-idmapped mounts or if permission checking is to be performed on the
    1178             :  * raw inode simply pass @nop_mnt_idmap.
    1179             :  *
    1180             :  * Returns 0 if successful, -ve on error.
    1181             :  */
    1182           0 : int may_linkat(struct mnt_idmap *idmap, const struct path *link)
    1183             : {
    1184           0 :         struct inode *inode = link->dentry->d_inode;
    1185             : 
    1186             :         /* Inode writeback is not safe when the uid or gid are invalid. */
    1187           0 :         if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
    1188           0 :             !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
    1189             :                 return -EOVERFLOW;
    1190             : 
    1191           0 :         if (!sysctl_protected_hardlinks)
    1192             :                 return 0;
    1193             : 
    1194             :         /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
    1195             :          * otherwise, it must be a safe source.
    1196             :          */
    1197           0 :         if (safe_hardlink_source(idmap, inode) ||
    1198           0 :             inode_owner_or_capable(idmap, inode))
    1199             :                 return 0;
    1200             : 
    1201             :         audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
    1202             :         return -EPERM;
    1203             : }
    1204             : 
    1205             : /**
    1206             :  * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
    1207             :  *                        should be allowed, or not, on files that already
    1208             :  *                        exist.
    1209             :  * @idmap: idmap of the mount the inode was found from
    1210             :  * @nd: nameidata pathwalk data
    1211             :  * @inode: the inode of the file to open
    1212             :  *
    1213             :  * Block an O_CREAT open of a FIFO (or a regular file) when:
    1214             :  *   - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
    1215             :  *   - the file already exists
    1216             :  *   - we are in a sticky directory
    1217             :  *   - we don't own the file
    1218             :  *   - the owner of the directory doesn't own the file
    1219             :  *   - the directory is world writable
    1220             :  * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
    1221             :  * the directory doesn't have to be world writable: being group writable will
    1222             :  * be enough.
    1223             :  *
    1224             :  * If the inode has been found through an idmapped mount the idmap of
    1225             :  * the vfsmount must be passed through @idmap. This function will then take
    1226             :  * care to map the inode according to @idmap before checking permissions.
    1227             :  * On non-idmapped mounts or if permission checking is to be performed on the
    1228             :  * raw inode simply pass @nop_mnt_idmap.
    1229             :  *
    1230             :  * Returns 0 if the open is allowed, -ve on error.
    1231             :  */
    1232           0 : static int may_create_in_sticky(struct mnt_idmap *idmap,
    1233             :                                 struct nameidata *nd, struct inode *const inode)
    1234             : {
    1235           0 :         umode_t dir_mode = nd->dir_mode;
    1236           0 :         vfsuid_t dir_vfsuid = nd->dir_vfsuid;
    1237             : 
    1238           0 :         if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
    1239           0 :             (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
    1240           0 :             likely(!(dir_mode & S_ISVTX)) ||
    1241           0 :             vfsuid_eq(i_uid_into_vfsuid(idmap, inode), dir_vfsuid) ||
    1242           0 :             vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid()))
    1243             :                 return 0;
    1244             : 
    1245           0 :         if (likely(dir_mode & 0002) ||
    1246           0 :             (dir_mode & 0020 &&
    1247           0 :              ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
    1248           0 :               (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
    1249           0 :                 const char *operation = S_ISFIFO(inode->i_mode) ?
    1250             :                                         "sticky_create_fifo" :
    1251             :                                         "sticky_create_regular";
    1252             :                 audit_log_path_denied(AUDIT_ANOM_CREAT, operation);
    1253             :                 return -EACCES;
    1254             :         }
    1255             :         return 0;
    1256             : }
    1257             : 
    1258             : /*
    1259             :  * follow_up - Find the mountpoint of path's vfsmount
    1260             :  *
    1261             :  * Given a path, find the mountpoint of its source file system.
    1262             :  * Replace @path with the path of the mountpoint in the parent mount.
    1263             :  * Up is towards /.
    1264             :  *
    1265             :  * Return 1 if we went up a level and 0 if we were already at the
    1266             :  * root.
    1267             :  */
    1268           0 : int follow_up(struct path *path)
    1269             : {
    1270           0 :         struct mount *mnt = real_mount(path->mnt);
    1271             :         struct mount *parent;
    1272             :         struct dentry *mountpoint;
    1273             : 
    1274           0 :         read_seqlock_excl(&mount_lock);
    1275           0 :         parent = mnt->mnt_parent;
    1276           0 :         if (parent == mnt) {
    1277           0 :                 read_sequnlock_excl(&mount_lock);
    1278           0 :                 return 0;
    1279             :         }
    1280           0 :         mntget(&parent->mnt);
    1281           0 :         mountpoint = dget(mnt->mnt_mountpoint);
    1282           0 :         read_sequnlock_excl(&mount_lock);
    1283           0 :         dput(path->dentry);
    1284           0 :         path->dentry = mountpoint;
    1285           0 :         mntput(path->mnt);
    1286           0 :         path->mnt = &parent->mnt;
    1287           0 :         return 1;
    1288             : }
    1289             : EXPORT_SYMBOL(follow_up);
    1290             : 
    1291             : static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
    1292             :                                   struct path *path, unsigned *seqp)
    1293             : {
    1294           0 :         while (mnt_has_parent(m)) {
    1295           0 :                 struct dentry *mountpoint = m->mnt_mountpoint;
    1296             : 
    1297           0 :                 m = m->mnt_parent;
    1298           0 :                 if (unlikely(root->dentry == mountpoint &&
    1299             :                              root->mnt == &m->mnt))
    1300             :                         break;
    1301           0 :                 if (mountpoint != m->mnt.mnt_root) {
    1302           0 :                         path->mnt = &m->mnt;
    1303           0 :                         path->dentry = mountpoint;
    1304           0 :                         *seqp = read_seqcount_begin(&mountpoint->d_seq);
    1305             :                         return true;
    1306             :                 }
    1307             :         }
    1308             :         return false;
    1309             : }
    1310             : 
    1311           0 : static bool choose_mountpoint(struct mount *m, const struct path *root,
    1312             :                               struct path *path)
    1313             : {
    1314             :         bool found;
    1315             : 
    1316             :         rcu_read_lock();
    1317             :         while (1) {
    1318           0 :                 unsigned seq, mseq = read_seqbegin(&mount_lock);
    1319             : 
    1320           0 :                 found = choose_mountpoint_rcu(m, root, path, &seq);
    1321           0 :                 if (unlikely(!found)) {
    1322           0 :                         if (!read_seqretry(&mount_lock, mseq))
    1323             :                                 break;
    1324             :                 } else {
    1325           0 :                         if (likely(__legitimize_path(path, seq, mseq)))
    1326             :                                 break;
    1327           0 :                         rcu_read_unlock();
    1328           0 :                         path_put(path);
    1329             :                         rcu_read_lock();
    1330             :                 }
    1331             :         }
    1332             :         rcu_read_unlock();
    1333           0 :         return found;
    1334             : }
    1335             : 
    1336             : /*
    1337             :  * Perform an automount
    1338             :  * - return -EISDIR to tell follow_managed() to stop and return the path we
    1339             :  *   were called with.
    1340             :  */
    1341           0 : static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
    1342             : {
    1343           0 :         struct dentry *dentry = path->dentry;
    1344             : 
    1345             :         /* We don't want to mount if someone's just doing a stat -
    1346             :          * unless they're stat'ing a directory and appended a '/' to
    1347             :          * the name.
    1348             :          *
    1349             :          * We do, however, want to mount if someone wants to open or
    1350             :          * create a file of any type under the mountpoint, wants to
    1351             :          * traverse through the mountpoint or wants to open the
    1352             :          * mounted directory.  Also, autofs may mark negative dentries
    1353             :          * as being automount points.  These will need the attentions
    1354             :          * of the daemon to instantiate them before they can be used.
    1355             :          */
    1356           0 :         if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
    1357           0 :                            LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
    1358           0 :             dentry->d_inode)
    1359             :                 return -EISDIR;
    1360             : 
    1361           0 :         if (count && (*count)++ >= MAXSYMLINKS)
    1362             :                 return -ELOOP;
    1363             : 
    1364           0 :         return finish_automount(dentry->d_op->d_automount(path), path);
    1365             : }
    1366             : 
    1367             : /*
    1368             :  * mount traversal - out-of-line part.  One note on ->d_flags accesses -
    1369             :  * dentries are pinned but not locked here, so negative dentry can go
    1370             :  * positive right under us.  Use of smp_load_acquire() provides a barrier
    1371             :  * sufficient for ->d_inode and ->d_flags consistency.
    1372             :  */
    1373           0 : static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
    1374             :                              int *count, unsigned lookup_flags)
    1375             : {
    1376           0 :         struct vfsmount *mnt = path->mnt;
    1377           0 :         bool need_mntput = false;
    1378           0 :         int ret = 0;
    1379             : 
    1380           0 :         while (flags & DCACHE_MANAGED_DENTRY) {
    1381             :                 /* Allow the filesystem to manage the transit without i_mutex
    1382             :                  * being held. */
    1383           0 :                 if (flags & DCACHE_MANAGE_TRANSIT) {
    1384           0 :                         ret = path->dentry->d_op->d_manage(path, false);
    1385           0 :                         flags = smp_load_acquire(&path->dentry->d_flags);
    1386           0 :                         if (ret < 0)
    1387             :                                 break;
    1388             :                 }
    1389             : 
    1390           0 :                 if (flags & DCACHE_MOUNTED) {       // something's mounted on it..
    1391           0 :                         struct vfsmount *mounted = lookup_mnt(path);
    1392           0 :                         if (mounted) {          // ... in our namespace
    1393           0 :                                 dput(path->dentry);
    1394           0 :                                 if (need_mntput)
    1395           0 :                                         mntput(path->mnt);
    1396           0 :                                 path->mnt = mounted;
    1397           0 :                                 path->dentry = dget(mounted->mnt_root);
    1398             :                                 // here we know it's positive
    1399           0 :                                 flags = path->dentry->d_flags;
    1400           0 :                                 need_mntput = true;
    1401           0 :                                 continue;
    1402             :                         }
    1403             :                 }
    1404             : 
    1405           0 :                 if (!(flags & DCACHE_NEED_AUTOMOUNT))
    1406             :                         break;
    1407             : 
    1408             :                 // uncovered automount point
    1409           0 :                 ret = follow_automount(path, count, lookup_flags);
    1410           0 :                 flags = smp_load_acquire(&path->dentry->d_flags);
    1411           0 :                 if (ret < 0)
    1412             :                         break;
    1413             :         }
    1414             : 
    1415           0 :         if (ret == -EISDIR)
    1416           0 :                 ret = 0;
    1417             :         // possible if you race with several mount --move
    1418           0 :         if (need_mntput && path->mnt == mnt)
    1419           0 :                 mntput(path->mnt);
    1420           0 :         if (!ret && unlikely(d_flags_negative(flags)))
    1421           0 :                 ret = -ENOENT;
    1422           0 :         *jumped = need_mntput;
    1423           0 :         return ret;
    1424             : }
    1425             : 
    1426           0 : static inline int traverse_mounts(struct path *path, bool *jumped,
    1427             :                                   int *count, unsigned lookup_flags)
    1428             : {
    1429           0 :         unsigned flags = smp_load_acquire(&path->dentry->d_flags);
    1430             : 
    1431             :         /* fastpath */
    1432           0 :         if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
    1433           0 :                 *jumped = false;
    1434           0 :                 if (unlikely(d_flags_negative(flags)))
    1435             :                         return -ENOENT;
    1436           0 :                 return 0;
    1437             :         }
    1438           0 :         return __traverse_mounts(path, flags, jumped, count, lookup_flags);
    1439             : }
    1440             : 
    1441           0 : int follow_down_one(struct path *path)
    1442             : {
    1443             :         struct vfsmount *mounted;
    1444             : 
    1445           0 :         mounted = lookup_mnt(path);
    1446           0 :         if (mounted) {
    1447           0 :                 dput(path->dentry);
    1448           0 :                 mntput(path->mnt);
    1449           0 :                 path->mnt = mounted;
    1450           0 :                 path->dentry = dget(mounted->mnt_root);
    1451           0 :                 return 1;
    1452             :         }
    1453             :         return 0;
    1454             : }
    1455             : EXPORT_SYMBOL(follow_down_one);
    1456             : 
    1457             : /*
    1458             :  * Follow down to the covering mount currently visible to userspace.  At each
    1459             :  * point, the filesystem owning that dentry may be queried as to whether the
    1460             :  * caller is permitted to proceed or not.
    1461             :  */
    1462           0 : int follow_down(struct path *path, unsigned int flags)
    1463             : {
    1464           0 :         struct vfsmount *mnt = path->mnt;
    1465             :         bool jumped;
    1466           0 :         int ret = traverse_mounts(path, &jumped, NULL, flags);
    1467             : 
    1468           0 :         if (path->mnt != mnt)
    1469           0 :                 mntput(mnt);
    1470           0 :         return ret;
    1471             : }
    1472             : EXPORT_SYMBOL(follow_down);
    1473             : 
    1474             : /*
    1475             :  * Try to skip to top of mountpoint pile in rcuwalk mode.  Fail if
    1476             :  * we meet a managed dentry that would need blocking.
    1477             :  */
    1478           1 : static bool __follow_mount_rcu(struct nameidata *nd, struct path *path)
    1479             : {
    1480           1 :         struct dentry *dentry = path->dentry;
    1481           1 :         unsigned int flags = dentry->d_flags;
    1482             : 
    1483           1 :         if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
    1484             :                 return true;
    1485             : 
    1486           0 :         if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1487             :                 return false;
    1488             : 
    1489             :         for (;;) {
    1490             :                 /*
    1491             :                  * Don't forget we might have a non-mountpoint managed dentry
    1492             :                  * that wants to block transit.
    1493             :                  */
    1494           0 :                 if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
    1495           0 :                         int res = dentry->d_op->d_manage(path, true);
    1496           0 :                         if (res)
    1497           0 :                                 return res == -EISDIR;
    1498           0 :                         flags = dentry->d_flags;
    1499             :                 }
    1500             : 
    1501           0 :                 if (flags & DCACHE_MOUNTED) {
    1502           0 :                         struct mount *mounted = __lookup_mnt(path->mnt, dentry);
    1503           0 :                         if (mounted) {
    1504           0 :                                 path->mnt = &mounted->mnt;
    1505           0 :                                 dentry = path->dentry = mounted->mnt.mnt_root;
    1506           0 :                                 nd->state |= ND_JUMPED;
    1507           0 :                                 nd->next_seq = read_seqcount_begin(&dentry->d_seq);
    1508           0 :                                 flags = dentry->d_flags;
    1509             :                                 // makes sure that non-RCU pathwalk could reach
    1510             :                                 // this state.
    1511           0 :                                 if (read_seqretry(&mount_lock, nd->m_seq))
    1512             :                                         return false;
    1513           0 :                                 continue;
    1514             :                         }
    1515           0 :                         if (read_seqretry(&mount_lock, nd->m_seq))
    1516             :                                 return false;
    1517             :                 }
    1518           0 :                 return !(flags & DCACHE_NEED_AUTOMOUNT);
    1519             :         }
    1520             : }
    1521             : 
    1522           1 : static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
    1523             :                           struct path *path)
    1524             : {
    1525             :         bool jumped;
    1526             :         int ret;
    1527             : 
    1528           1 :         path->mnt = nd->path.mnt;
    1529           1 :         path->dentry = dentry;
    1530           1 :         if (nd->flags & LOOKUP_RCU) {
    1531           1 :                 unsigned int seq = nd->next_seq;
    1532           1 :                 if (likely(__follow_mount_rcu(nd, path)))
    1533             :                         return 0;
    1534             :                 // *path and nd->next_seq might've been clobbered
    1535           0 :                 path->mnt = nd->path.mnt;
    1536           0 :                 path->dentry = dentry;
    1537           0 :                 nd->next_seq = seq;
    1538           0 :                 if (!try_to_unlazy_next(nd, dentry))
    1539             :                         return -ECHILD;
    1540             :         }
    1541           0 :         ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
    1542           0 :         if (jumped) {
    1543           0 :                 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1544             :                         ret = -EXDEV;
    1545             :                 else
    1546           0 :                         nd->state |= ND_JUMPED;
    1547             :         }
    1548           0 :         if (unlikely(ret)) {
    1549           0 :                 dput(path->dentry);
    1550           0 :                 if (path->mnt != nd->path.mnt)
    1551           0 :                         mntput(path->mnt);
    1552             :         }
    1553             :         return ret;
    1554             : }
    1555             : 
    1556             : /*
    1557             :  * This looks up the name in dcache and possibly revalidates the found dentry.
    1558             :  * NULL is returned if the dentry does not exist in the cache.
    1559             :  */
    1560           3 : static struct dentry *lookup_dcache(const struct qstr *name,
    1561             :                                     struct dentry *dir,
    1562             :                                     unsigned int flags)
    1563             : {
    1564           3 :         struct dentry *dentry = d_lookup(dir, name);
    1565           3 :         if (dentry) {
    1566           0 :                 int error = d_revalidate(dentry, flags);
    1567           0 :                 if (unlikely(error <= 0)) {
    1568           0 :                         if (!error)
    1569           0 :                                 d_invalidate(dentry);
    1570           0 :                         dput(dentry);
    1571           0 :                         return ERR_PTR(error);
    1572             :                 }
    1573             :         }
    1574             :         return dentry;
    1575             : }
    1576             : 
    1577             : /*
    1578             :  * Parent directory has inode locked exclusive.  This is one
    1579             :  * and only case when ->lookup() gets called on non in-lookup
    1580             :  * dentries - as the matter of fact, this only gets called
    1581             :  * when directory is guaranteed to have no in-lookup children
    1582             :  * at all.
    1583             :  */
    1584           3 : static struct dentry *__lookup_hash(const struct qstr *name,
    1585             :                 struct dentry *base, unsigned int flags)
    1586             : {
    1587           3 :         struct dentry *dentry = lookup_dcache(name, base, flags);
    1588             :         struct dentry *old;
    1589           3 :         struct inode *dir = base->d_inode;
    1590             : 
    1591           3 :         if (dentry)
    1592             :                 return dentry;
    1593             : 
    1594             :         /* Don't create child dentry for a dead directory. */
    1595           3 :         if (unlikely(IS_DEADDIR(dir)))
    1596             :                 return ERR_PTR(-ENOENT);
    1597             : 
    1598           3 :         dentry = d_alloc(base, name);
    1599           3 :         if (unlikely(!dentry))
    1600             :                 return ERR_PTR(-ENOMEM);
    1601             : 
    1602           3 :         old = dir->i_op->lookup(dir, dentry, flags);
    1603           3 :         if (unlikely(old)) {
    1604           0 :                 dput(dentry);
    1605           0 :                 dentry = old;
    1606             :         }
    1607             :         return dentry;
    1608             : }
    1609             : 
    1610           1 : static struct dentry *lookup_fast(struct nameidata *nd)
    1611             : {
    1612           1 :         struct dentry *dentry, *parent = nd->path.dentry;
    1613           1 :         int status = 1;
    1614             : 
    1615             :         /*
    1616             :          * Rename seqlock is not required here because in the off chance
    1617             :          * of a false negative due to a concurrent rename, the caller is
    1618             :          * going to fall back to non-racy lookup.
    1619             :          */
    1620           1 :         if (nd->flags & LOOKUP_RCU) {
    1621           1 :                 dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
    1622           1 :                 if (unlikely(!dentry)) {
    1623           0 :                         if (!try_to_unlazy(nd))
    1624             :                                 return ERR_PTR(-ECHILD);
    1625           0 :                         return NULL;
    1626             :                 }
    1627             : 
    1628             :                 /*
    1629             :                  * This sequence count validates that the parent had no
    1630             :                  * changes while we did the lookup of the dentry above.
    1631             :                  */
    1632           3 :                 if (read_seqcount_retry(&parent->d_seq, nd->seq))
    1633             :                         return ERR_PTR(-ECHILD);
    1634             : 
    1635           2 :                 status = d_revalidate(dentry, nd->flags);
    1636           1 :                 if (likely(status > 0))
    1637             :                         return dentry;
    1638           0 :                 if (!try_to_unlazy_next(nd, dentry))
    1639             :                         return ERR_PTR(-ECHILD);
    1640           0 :                 if (status == -ECHILD)
    1641             :                         /* we'd been told to redo it in non-rcu mode */
    1642           0 :                         status = d_revalidate(dentry, nd->flags);
    1643             :         } else {
    1644           0 :                 dentry = __d_lookup(parent, &nd->last);
    1645           0 :                 if (unlikely(!dentry))
    1646             :                         return NULL;
    1647           0 :                 status = d_revalidate(dentry, nd->flags);
    1648             :         }
    1649           0 :         if (unlikely(status <= 0)) {
    1650           0 :                 if (!status)
    1651           0 :                         d_invalidate(dentry);
    1652           0 :                 dput(dentry);
    1653           0 :                 return ERR_PTR(status);
    1654             :         }
    1655             :         return dentry;
    1656             : }
    1657             : 
    1658             : /* Fast lookup failed, do it the slow way */
    1659           0 : static struct dentry *__lookup_slow(const struct qstr *name,
    1660             :                                     struct dentry *dir,
    1661             :                                     unsigned int flags)
    1662             : {
    1663             :         struct dentry *dentry, *old;
    1664           0 :         struct inode *inode = dir->d_inode;
    1665           0 :         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    1666             : 
    1667             :         /* Don't go there if it's already dead */
    1668           0 :         if (unlikely(IS_DEADDIR(inode)))
    1669             :                 return ERR_PTR(-ENOENT);
    1670             : again:
    1671           0 :         dentry = d_alloc_parallel(dir, name, &wq);
    1672           0 :         if (IS_ERR(dentry))
    1673             :                 return dentry;
    1674           0 :         if (unlikely(!d_in_lookup(dentry))) {
    1675           0 :                 int error = d_revalidate(dentry, flags);
    1676           0 :                 if (unlikely(error <= 0)) {
    1677           0 :                         if (!error) {
    1678           0 :                                 d_invalidate(dentry);
    1679           0 :                                 dput(dentry);
    1680           0 :                                 goto again;
    1681             :                         }
    1682           0 :                         dput(dentry);
    1683           0 :                         dentry = ERR_PTR(error);
    1684             :                 }
    1685             :         } else {
    1686           0 :                 old = inode->i_op->lookup(inode, dentry, flags);
    1687           0 :                 d_lookup_done(dentry);
    1688           0 :                 if (unlikely(old)) {
    1689           0 :                         dput(dentry);
    1690           0 :                         dentry = old;
    1691             :                 }
    1692             :         }
    1693             :         return dentry;
    1694             : }
    1695             : 
    1696           0 : static struct dentry *lookup_slow(const struct qstr *name,
    1697             :                                   struct dentry *dir,
    1698             :                                   unsigned int flags)
    1699             : {
    1700           0 :         struct inode *inode = dir->d_inode;
    1701             :         struct dentry *res;
    1702           0 :         inode_lock_shared(inode);
    1703           0 :         res = __lookup_slow(name, dir, flags);
    1704           0 :         inode_unlock_shared(inode);
    1705           0 :         return res;
    1706             : }
    1707             : 
    1708           4 : static inline int may_lookup(struct mnt_idmap *idmap,
    1709             :                              struct nameidata *nd)
    1710             : {
    1711           4 :         if (nd->flags & LOOKUP_RCU) {
    1712           4 :                 int err = inode_permission(idmap, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
    1713           4 :                 if (err != -ECHILD || !try_to_unlazy(nd))
    1714             :                         return err;
    1715             :         }
    1716           0 :         return inode_permission(idmap, nd->inode, MAY_EXEC);
    1717             : }
    1718             : 
    1719           0 : static int reserve_stack(struct nameidata *nd, struct path *link)
    1720             : {
    1721           0 :         if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
    1722             :                 return -ELOOP;
    1723             : 
    1724           0 :         if (likely(nd->depth != EMBEDDED_LEVELS))
    1725             :                 return 0;
    1726           0 :         if (likely(nd->stack != nd->internal))
    1727             :                 return 0;
    1728           0 :         if (likely(nd_alloc_stack(nd)))
    1729             :                 return 0;
    1730             : 
    1731           0 :         if (nd->flags & LOOKUP_RCU) {
    1732             :                 // we need to grab link before we do unlazy.  And we can't skip
    1733             :                 // unlazy even if we fail to grab the link - cleanup needs it
    1734           0 :                 bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
    1735             : 
    1736           0 :                 if (!try_to_unlazy(nd) || !grabbed_link)
    1737             :                         return -ECHILD;
    1738             : 
    1739           0 :                 if (nd_alloc_stack(nd))
    1740             :                         return 0;
    1741             :         }
    1742             :         return -ENOMEM;
    1743             : }
    1744             : 
    1745             : enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
    1746             : 
    1747           0 : static const char *pick_link(struct nameidata *nd, struct path *link,
    1748             :                      struct inode *inode, int flags)
    1749             : {
    1750             :         struct saved *last;
    1751             :         const char *res;
    1752           0 :         int error = reserve_stack(nd, link);
    1753             : 
    1754           0 :         if (unlikely(error)) {
    1755           0 :                 if (!(nd->flags & LOOKUP_RCU))
    1756             :                         path_put(link);
    1757           0 :                 return ERR_PTR(error);
    1758             :         }
    1759           0 :         last = nd->stack + nd->depth++;
    1760           0 :         last->link = *link;
    1761           0 :         clear_delayed_call(&last->done);
    1762           0 :         last->seq = nd->next_seq;
    1763             : 
    1764           0 :         if (flags & WALK_TRAILING) {
    1765           0 :                 error = may_follow_link(nd, inode);
    1766           0 :                 if (unlikely(error))
    1767           0 :                         return ERR_PTR(error);
    1768             :         }
    1769             : 
    1770           0 :         if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) ||
    1771           0 :                         unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW))
    1772             :                 return ERR_PTR(-ELOOP);
    1773             : 
    1774           0 :         if (!(nd->flags & LOOKUP_RCU)) {
    1775           0 :                 touch_atime(&last->link);
    1776           0 :                 cond_resched();
    1777           0 :         } else if (atime_needs_update(&last->link, inode)) {
    1778           0 :                 if (!try_to_unlazy(nd))
    1779             :                         return ERR_PTR(-ECHILD);
    1780           0 :                 touch_atime(&last->link);
    1781             :         }
    1782             : 
    1783           0 :         error = security_inode_follow_link(link->dentry, inode,
    1784           0 :                                            nd->flags & LOOKUP_RCU);
    1785             :         if (unlikely(error))
    1786             :                 return ERR_PTR(error);
    1787             : 
    1788           0 :         res = READ_ONCE(inode->i_link);
    1789           0 :         if (!res) {
    1790             :                 const char * (*get)(struct dentry *, struct inode *,
    1791             :                                 struct delayed_call *);
    1792           0 :                 get = inode->i_op->get_link;
    1793           0 :                 if (nd->flags & LOOKUP_RCU) {
    1794           0 :                         res = get(NULL, inode, &last->done);
    1795           0 :                         if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd))
    1796           0 :                                 res = get(link->dentry, inode, &last->done);
    1797             :                 } else {
    1798           0 :                         res = get(link->dentry, inode, &last->done);
    1799             :                 }
    1800           0 :                 if (!res)
    1801             :                         goto all_done;
    1802           0 :                 if (IS_ERR(res))
    1803             :                         return res;
    1804             :         }
    1805           0 :         if (*res == '/') {
    1806           0 :                 error = nd_jump_root(nd);
    1807           0 :                 if (unlikely(error))
    1808           0 :                         return ERR_PTR(error);
    1809           0 :                 while (unlikely(*++res == '/'))
    1810             :                         ;
    1811             :         }
    1812           0 :         if (*res)
    1813             :                 return res;
    1814             : all_done: // pure jump
    1815           0 :         put_link(nd);
    1816           0 :         return NULL;
    1817             : }
    1818             : 
    1819             : /*
    1820             :  * Do we need to follow links? We _really_ want to be able
    1821             :  * to do this check without having to look at inode->i_op,
    1822             :  * so we keep a cache of "no, this doesn't need follow_link"
    1823             :  * for the common case.
    1824             :  *
    1825             :  * NOTE: dentry must be what nd->next_seq had been sampled from.
    1826             :  */
    1827           1 : static const char *step_into(struct nameidata *nd, int flags,
    1828             :                      struct dentry *dentry)
    1829             : {
    1830             :         struct path path;
    1831             :         struct inode *inode;
    1832           1 :         int err = handle_mounts(nd, dentry, &path);
    1833             : 
    1834           1 :         if (err < 0)
    1835           0 :                 return ERR_PTR(err);
    1836           1 :         inode = path.dentry->d_inode;
    1837           2 :         if (likely(!d_is_symlink(path.dentry)) ||
    1838           0 :            ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
    1839           0 :            (flags & WALK_NOFOLLOW)) {
    1840             :                 /* not a symlink or should not follow */
    1841           1 :                 if (nd->flags & LOOKUP_RCU) {
    1842           3 :                         if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
    1843             :                                 return ERR_PTR(-ECHILD);
    1844           1 :                         if (unlikely(!inode))
    1845             :                                 return ERR_PTR(-ENOENT);
    1846             :                 } else {
    1847           0 :                         dput(nd->path.dentry);
    1848           0 :                         if (nd->path.mnt != path.mnt)
    1849           0 :                                 mntput(nd->path.mnt);
    1850             :                 }
    1851           1 :                 nd->path = path;
    1852           1 :                 nd->inode = inode;
    1853           1 :                 nd->seq = nd->next_seq;
    1854           1 :                 return NULL;
    1855             :         }
    1856           0 :         if (nd->flags & LOOKUP_RCU) {
    1857             :                 /* make sure that d_is_symlink above matches inode */
    1858           0 :                 if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
    1859             :                         return ERR_PTR(-ECHILD);
    1860             :         } else {
    1861           0 :                 if (path.mnt == nd->path.mnt)
    1862           0 :                         mntget(path.mnt);
    1863             :         }
    1864           0 :         return pick_link(nd, &path, inode, flags);
    1865             : }
    1866             : 
    1867           0 : static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
    1868             : {
    1869             :         struct dentry *parent, *old;
    1870             : 
    1871           0 :         if (path_equal(&nd->path, &nd->root))
    1872             :                 goto in_root;
    1873           0 :         if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
    1874             :                 struct path path;
    1875             :                 unsigned seq;
    1876           0 :                 if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
    1877           0 :                                            &nd->root, &path, &seq))
    1878             :                         goto in_root;
    1879           0 :                 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1880             :                         return ERR_PTR(-ECHILD);
    1881           0 :                 nd->path = path;
    1882           0 :                 nd->inode = path.dentry->d_inode;
    1883           0 :                 nd->seq = seq;
    1884             :                 // makes sure that non-RCU pathwalk could reach this state
    1885           0 :                 if (read_seqretry(&mount_lock, nd->m_seq))
    1886             :                         return ERR_PTR(-ECHILD);
    1887             :                 /* we know that mountpoint was pinned */
    1888             :         }
    1889           0 :         old = nd->path.dentry;
    1890           0 :         parent = old->d_parent;
    1891           0 :         nd->next_seq = read_seqcount_begin(&parent->d_seq);
    1892             :         // makes sure that non-RCU pathwalk could reach this state
    1893           0 :         if (read_seqcount_retry(&old->d_seq, nd->seq))
    1894             :                 return ERR_PTR(-ECHILD);
    1895           0 :         if (unlikely(!path_connected(nd->path.mnt, parent)))
    1896             :                 return ERR_PTR(-ECHILD);
    1897           0 :         return parent;
    1898             : in_root:
    1899           0 :         if (read_seqretry(&mount_lock, nd->m_seq))
    1900             :                 return ERR_PTR(-ECHILD);
    1901           0 :         if (unlikely(nd->flags & LOOKUP_BENEATH))
    1902             :                 return ERR_PTR(-ECHILD);
    1903           0 :         nd->next_seq = nd->seq;
    1904           0 :         return nd->path.dentry;
    1905             : }
    1906             : 
    1907           0 : static struct dentry *follow_dotdot(struct nameidata *nd)
    1908             : {
    1909             :         struct dentry *parent;
    1910             : 
    1911           0 :         if (path_equal(&nd->path, &nd->root))
    1912             :                 goto in_root;
    1913           0 :         if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
    1914             :                 struct path path;
    1915             : 
    1916           0 :                 if (!choose_mountpoint(real_mount(nd->path.mnt),
    1917           0 :                                        &nd->root, &path))
    1918             :                         goto in_root;
    1919           0 :                 path_put(&nd->path);
    1920           0 :                 nd->path = path;
    1921           0 :                 nd->inode = path.dentry->d_inode;
    1922           0 :                 if (unlikely(nd->flags & LOOKUP_NO_XDEV))
    1923           0 :                         return ERR_PTR(-EXDEV);
    1924             :         }
    1925             :         /* rare case of legitimate dget_parent()... */
    1926           0 :         parent = dget_parent(nd->path.dentry);
    1927           0 :         if (unlikely(!path_connected(nd->path.mnt, parent))) {
    1928           0 :                 dput(parent);
    1929           0 :                 return ERR_PTR(-ENOENT);
    1930             :         }
    1931             :         return parent;
    1932             : 
    1933             : in_root:
    1934           0 :         if (unlikely(nd->flags & LOOKUP_BENEATH))
    1935             :                 return ERR_PTR(-EXDEV);
    1936           0 :         return dget(nd->path.dentry);
    1937             : }
    1938             : 
    1939           0 : static const char *handle_dots(struct nameidata *nd, int type)
    1940             : {
    1941           0 :         if (type == LAST_DOTDOT) {
    1942           0 :                 const char *error = NULL;
    1943             :                 struct dentry *parent;
    1944             : 
    1945           0 :                 if (!nd->root.mnt) {
    1946           0 :                         error = ERR_PTR(set_root(nd));
    1947           0 :                         if (error)
    1948             :                                 return error;
    1949             :                 }
    1950           0 :                 if (nd->flags & LOOKUP_RCU)
    1951           0 :                         parent = follow_dotdot_rcu(nd);
    1952             :                 else
    1953           0 :                         parent = follow_dotdot(nd);
    1954           0 :                 if (IS_ERR(parent))
    1955             :                         return ERR_CAST(parent);
    1956           0 :                 error = step_into(nd, WALK_NOFOLLOW, parent);
    1957           0 :                 if (unlikely(error))
    1958             :                         return error;
    1959             : 
    1960           0 :                 if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
    1961             :                         /*
    1962             :                          * If there was a racing rename or mount along our
    1963             :                          * path, then we can't be sure that ".." hasn't jumped
    1964             :                          * above nd->root (and so userspace should retry or use
    1965             :                          * some fallback).
    1966             :                          */
    1967           0 :                         smp_rmb();
    1968           0 :                         if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))
    1969             :                                 return ERR_PTR(-EAGAIN);
    1970           0 :                         if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq))
    1971             :                                 return ERR_PTR(-EAGAIN);
    1972             :                 }
    1973             :         }
    1974             :         return NULL;
    1975             : }
    1976             : 
    1977           1 : static const char *walk_component(struct nameidata *nd, int flags)
    1978             : {
    1979             :         struct dentry *dentry;
    1980             :         /*
    1981             :          * "." and ".." are special - ".." especially so because it has
    1982             :          * to be able to know about the current root directory and
    1983             :          * parent relationships.
    1984             :          */
    1985           1 :         if (unlikely(nd->last_type != LAST_NORM)) {
    1986           0 :                 if (!(flags & WALK_MORE) && nd->depth)
    1987           0 :                         put_link(nd);
    1988           0 :                 return handle_dots(nd, nd->last_type);
    1989             :         }
    1990           1 :         dentry = lookup_fast(nd);
    1991           1 :         if (IS_ERR(dentry))
    1992             :                 return ERR_CAST(dentry);
    1993           1 :         if (unlikely(!dentry)) {
    1994           0 :                 dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
    1995           0 :                 if (IS_ERR(dentry))
    1996             :                         return ERR_CAST(dentry);
    1997             :         }
    1998           1 :         if (!(flags & WALK_MORE) && nd->depth)
    1999           0 :                 put_link(nd);
    2000           1 :         return step_into(nd, flags, dentry);
    2001             : }
    2002             : 
    2003             : /*
    2004             :  * We can do the critical dentry name comparison and hashing
    2005             :  * operations one word at a time, but we are limited to:
    2006             :  *
    2007             :  * - Architectures with fast unaligned word accesses. We could
    2008             :  *   do a "get_unaligned()" if this helps and is sufficiently
    2009             :  *   fast.
    2010             :  *
    2011             :  * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we
    2012             :  *   do not trap on the (extremely unlikely) case of a page
    2013             :  *   crossing operation.
    2014             :  *
    2015             :  * - Furthermore, we need an efficient 64-bit compile for the
    2016             :  *   64-bit case in order to generate the "number of bytes in
    2017             :  *   the final mask". Again, that could be replaced with a
    2018             :  *   efficient population count instruction or similar.
    2019             :  */
    2020             : #ifdef CONFIG_DCACHE_WORD_ACCESS
    2021             : 
    2022             : #include <asm/word-at-a-time.h>
    2023             : 
    2024             : #ifdef HASH_MIX
    2025             : 
    2026             : /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
    2027             : 
    2028             : #elif defined(CONFIG_64BIT)
    2029             : /*
    2030             :  * Register pressure in the mixing function is an issue, particularly
    2031             :  * on 32-bit x86, but almost any function requires one state value and
    2032             :  * one temporary.  Instead, use a function designed for two state values
    2033             :  * and no temporaries.
    2034             :  *
    2035             :  * This function cannot create a collision in only two iterations, so
    2036             :  * we have two iterations to achieve avalanche.  In those two iterations,
    2037             :  * we have six layers of mixing, which is enough to spread one bit's
    2038             :  * influence out to 2^6 = 64 state bits.
    2039             :  *
    2040             :  * Rotate constants are scored by considering either 64 one-bit input
    2041             :  * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
    2042             :  * probability of that delta causing a change to each of the 128 output
    2043             :  * bits, using a sample of random initial states.
    2044             :  *
    2045             :  * The Shannon entropy of the computed probabilities is then summed
    2046             :  * to produce a score.  Ideally, any input change has a 50% chance of
    2047             :  * toggling any given output bit.
    2048             :  *
    2049             :  * Mixing scores (in bits) for (12,45):
    2050             :  * Input delta: 1-bit      2-bit
    2051             :  * 1 round:     713.3    42542.6
    2052             :  * 2 rounds:   2753.7   140389.8
    2053             :  * 3 rounds:   5954.1   233458.2
    2054             :  * 4 rounds:   7862.6   256672.2
    2055             :  * Perfect:    8192     258048
    2056             :  *            (64*128) (64*63/2 * 128)
    2057             :  */
    2058             : #define HASH_MIX(x, y, a)       \
    2059             :         (       x ^= (a),       \
    2060             :         y ^= x, x = rol64(x,12),\
    2061             :         x += y, y = rol64(y,45),\
    2062             :         y *= 9                  )
    2063             : 
    2064             : /*
    2065             :  * Fold two longs into one 32-bit hash value.  This must be fast, but
    2066             :  * latency isn't quite as critical, as there is a fair bit of additional
    2067             :  * work done before the hash value is used.
    2068             :  */
    2069             : static inline unsigned int fold_hash(unsigned long x, unsigned long y)
    2070             : {
    2071           6 :         y ^= x * GOLDEN_RATIO_64;
    2072           6 :         y *= GOLDEN_RATIO_64;
    2073           6 :         return y >> 32;
    2074             : }
    2075             : 
    2076             : #else   /* 32-bit case */
    2077             : 
    2078             : /*
    2079             :  * Mixing scores (in bits) for (7,20):
    2080             :  * Input delta: 1-bit      2-bit
    2081             :  * 1 round:     330.3     9201.6
    2082             :  * 2 rounds:   1246.4    25475.4
    2083             :  * 3 rounds:   1907.1    31295.1
    2084             :  * 4 rounds:   2042.3    31718.6
    2085             :  * Perfect:    2048      31744
    2086             :  *            (32*64)   (32*31/2 * 64)
    2087             :  */
    2088             : #define HASH_MIX(x, y, a)       \
    2089             :         (       x ^= (a),       \
    2090             :         y ^= x, x = rol32(x, 7),\
    2091             :         x += y, y = rol32(y,20),\
    2092             :         y *= 9                  )
    2093             : 
    2094             : static inline unsigned int fold_hash(unsigned long x, unsigned long y)
    2095             : {
    2096             :         /* Use arch-optimized multiply if one exists */
    2097             :         return __hash_32(y ^ __hash_32(x));
    2098             : }
    2099             : 
    2100             : #endif
    2101             : 
    2102             : /*
    2103             :  * Return the hash of a string of known length.  This is carfully
    2104             :  * designed to match hash_name(), which is the more critical function.
    2105             :  * In particular, we must end by hashing a final word containing 0..7
    2106             :  * payload bytes, to match the way that hash_name() iterates until it
    2107             :  * finds the delimiter after the name.
    2108             :  */
    2109           0 : unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
    2110             : {
    2111           0 :         unsigned long a, x = 0, y = (unsigned long)salt;
    2112             : 
    2113             :         for (;;) {
    2114           0 :                 if (!len)
    2115             :                         goto done;
    2116           0 :                 a = load_unaligned_zeropad(name);
    2117           0 :                 if (len < sizeof(unsigned long))
    2118             :                         break;
    2119           0 :                 HASH_MIX(x, y, a);
    2120           0 :                 name += sizeof(unsigned long);
    2121           0 :                 len -= sizeof(unsigned long);
    2122             :         }
    2123           0 :         x ^= a & bytemask_from_count(len);
    2124             : done:
    2125           0 :         return fold_hash(x, y);
    2126             : }
    2127             : EXPORT_SYMBOL(full_name_hash);
    2128             : 
    2129             : /* Return the "hash_len" (hash and length) of a null-terminated string */
    2130           2 : u64 hashlen_string(const void *salt, const char *name)
    2131             : {
    2132           2 :         unsigned long a = 0, x = 0, y = (unsigned long)salt;
    2133             :         unsigned long adata, mask, len;
    2134           2 :         const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
    2135             : 
    2136           2 :         len = 0;
    2137           2 :         goto inside;
    2138             : 
    2139             :         do {
    2140           3 :                 HASH_MIX(x, y, a);
    2141           1 :                 len += sizeof(unsigned long);
    2142             : inside:
    2143           6 :                 a = load_unaligned_zeropad(name+len);
    2144           3 :         } while (!has_zero(a, &adata, &constants));
    2145             : 
    2146           2 :         adata = prep_zero_mask(a, adata, &constants);
    2147           4 :         mask = create_zero_mask(adata);
    2148           2 :         x ^= a & zero_bytemask(mask);
    2149             : 
    2150           4 :         return hashlen_create(fold_hash(x, y), len + find_zero(mask));
    2151             : }
    2152             : EXPORT_SYMBOL(hashlen_string);
    2153             : 
    2154             : /*
    2155             :  * Calculate the length and hash of the path component, and
    2156             :  * return the "hash_len" as the result.
    2157             :  */
    2158           4 : static inline u64 hash_name(const void *salt, const char *name)
    2159             : {
    2160           4 :         unsigned long a = 0, b, x = 0, y = (unsigned long)salt;
    2161             :         unsigned long adata, bdata, mask, len;
    2162           4 :         const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
    2163             : 
    2164           4 :         len = 0;
    2165           4 :         goto inside;
    2166             : 
    2167             :         do {
    2168           0 :                 HASH_MIX(x, y, a);
    2169           0 :                 len += sizeof(unsigned long);
    2170             : inside:
    2171           8 :                 a = load_unaligned_zeropad(name+len);
    2172           4 :                 b = a ^ REPEAT_BYTE('/');
    2173           8 :         } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
    2174             : 
    2175           4 :         adata = prep_zero_mask(a, adata, &constants);
    2176           4 :         bdata = prep_zero_mask(b, bdata, &constants);
    2177           8 :         mask = create_zero_mask(adata | bdata);
    2178           4 :         x ^= a & zero_bytemask(mask);
    2179             : 
    2180           8 :         return hashlen_create(fold_hash(x, y), len + find_zero(mask));
    2181             : }
    2182             : 
    2183             : #else   /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
    2184             : 
    2185             : /* Return the hash of a string of known length */
    2186             : unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
    2187             : {
    2188             :         unsigned long hash = init_name_hash(salt);
    2189             :         while (len--)
    2190             :                 hash = partial_name_hash((unsigned char)*name++, hash);
    2191             :         return end_name_hash(hash);
    2192             : }
    2193             : EXPORT_SYMBOL(full_name_hash);
    2194             : 
    2195             : /* Return the "hash_len" (hash and length) of a null-terminated string */
    2196             : u64 hashlen_string(const void *salt, const char *name)
    2197             : {
    2198             :         unsigned long hash = init_name_hash(salt);
    2199             :         unsigned long len = 0, c;
    2200             : 
    2201             :         c = (unsigned char)*name;
    2202             :         while (c) {
    2203             :                 len++;
    2204             :                 hash = partial_name_hash(c, hash);
    2205             :                 c = (unsigned char)name[len];
    2206             :         }
    2207             :         return hashlen_create(end_name_hash(hash), len);
    2208             : }
    2209             : EXPORT_SYMBOL(hashlen_string);
    2210             : 
    2211             : /*
    2212             :  * We know there's a real path component here of at least
    2213             :  * one character.
    2214             :  */
    2215             : static inline u64 hash_name(const void *salt, const char *name)
    2216             : {
    2217             :         unsigned long hash = init_name_hash(salt);
    2218             :         unsigned long len = 0, c;
    2219             : 
    2220             :         c = (unsigned char)*name;
    2221             :         do {
    2222             :                 len++;
    2223             :                 hash = partial_name_hash(c, hash);
    2224             :                 c = (unsigned char)name[len];
    2225             :         } while (c && c != '/');
    2226             :         return hashlen_create(end_name_hash(hash), len);
    2227             : }
    2228             : 
    2229             : #endif
    2230             : 
    2231             : /*
    2232             :  * Name resolution.
    2233             :  * This is the basic name resolution function, turning a pathname into
    2234             :  * the final dentry. We expect 'base' to be positive and a directory.
    2235             :  *
    2236             :  * Returns 0 and nd will have valid dentry and mnt on success.
    2237             :  * Returns error and drops reference to input namei data on failure.
    2238             :  */
    2239           3 : static int link_path_walk(const char *name, struct nameidata *nd)
    2240             : {
    2241           3 :         int depth = 0; // depth <= nd->depth
    2242             :         int err;
    2243             : 
    2244           3 :         nd->last_type = LAST_ROOT;
    2245           3 :         nd->flags |= LOOKUP_PARENT;
    2246           3 :         if (IS_ERR(name))
    2247           0 :                 return PTR_ERR(name);
    2248           6 :         while (*name=='/')
    2249           3 :                 name++;
    2250           3 :         if (!*name) {
    2251           0 :                 nd->dir_mode = 0; // short-circuit the 'hardening' idiocy
    2252           0 :                 return 0;
    2253             :         }
    2254             : 
    2255             :         /* At this point we know we have a real path component. */
    2256             :         for(;;) {
    2257             :                 struct mnt_idmap *idmap;
    2258             :                 const char *link;
    2259             :                 u64 hash_len;
    2260             :                 int type;
    2261             : 
    2262           8 :                 idmap = mnt_idmap(nd->path.mnt);
    2263           4 :                 err = may_lookup(idmap, nd);
    2264           4 :                 if (err)
    2265             :                         return err;
    2266             : 
    2267           4 :                 hash_len = hash_name(nd->path.dentry, name);
    2268             : 
    2269           4 :                 type = LAST_NORM;
    2270           4 :                 if (name[0] == '.') switch (hashlen_len(hash_len)) {
    2271             :                         case 2:
    2272           0 :                                 if (name[1] == '.') {
    2273           0 :                                         type = LAST_DOTDOT;
    2274           0 :                                         nd->state |= ND_JUMPED;
    2275             :                                 }
    2276             :                                 break;
    2277             :                         case 1:
    2278           0 :                                 type = LAST_DOT;
    2279             :                 }
    2280           4 :                 if (likely(type == LAST_NORM)) {
    2281           4 :                         struct dentry *parent = nd->path.dentry;
    2282           4 :                         nd->state &= ~ND_JUMPED;
    2283           4 :                         if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
    2284           0 :                                 struct qstr this = { { .hash_len = hash_len }, .name = name };
    2285           0 :                                 err = parent->d_op->d_hash(parent, &this);
    2286           0 :                                 if (err < 0)
    2287           0 :                                         return err;
    2288           0 :                                 hash_len = this.hash_len;
    2289           0 :                                 name = this.name;
    2290             :                         }
    2291             :                 }
    2292             : 
    2293           4 :                 nd->last.hash_len = hash_len;
    2294           4 :                 nd->last.name = name;
    2295           4 :                 nd->last_type = type;
    2296             : 
    2297           4 :                 name += hashlen_len(hash_len);
    2298           4 :                 if (!*name)
    2299             :                         goto OK;
    2300             :                 /*
    2301             :                  * If it wasn't NUL, we know it was '/'. Skip that
    2302             :                  * slash, and continue until no more slashes.
    2303             :                  */
    2304             :                 do {
    2305           1 :                         name++;
    2306           1 :                 } while (unlikely(*name == '/'));
    2307           1 :                 if (unlikely(!*name)) {
    2308             : OK:
    2309             :                         /* pathname or trailing symlink, done */
    2310           3 :                         if (!depth) {
    2311           6 :                                 nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode);
    2312           3 :                                 nd->dir_mode = nd->inode->i_mode;
    2313           3 :                                 nd->flags &= ~LOOKUP_PARENT;
    2314           3 :                                 return 0;
    2315             :                         }
    2316             :                         /* last component of nested symlink */
    2317           0 :                         name = nd->stack[--depth].name;
    2318           0 :                         link = walk_component(nd, 0);
    2319             :                 } else {
    2320             :                         /* not the last component */
    2321           1 :                         link = walk_component(nd, WALK_MORE);
    2322             :                 }
    2323           1 :                 if (unlikely(link)) {
    2324           0 :                         if (IS_ERR(link))
    2325           0 :                                 return PTR_ERR(link);
    2326             :                         /* a symlink to follow */
    2327           0 :                         nd->stack[depth++].name = name;
    2328           0 :                         name = link;
    2329           0 :                         continue;
    2330             :                 }
    2331           2 :                 if (unlikely(!d_can_lookup(nd->path.dentry))) {
    2332           0 :                         if (nd->flags & LOOKUP_RCU) {
    2333           0 :                                 if (!try_to_unlazy(nd))
    2334             :                                         return -ECHILD;
    2335             :                         }
    2336             :                         return -ENOTDIR;
    2337             :                 }
    2338             :         }
    2339             : }
    2340             : 
    2341             : /* must be paired with terminate_walk() */
    2342           3 : static const char *path_init(struct nameidata *nd, unsigned flags)
    2343             : {
    2344             :         int error;
    2345           3 :         const char *s = nd->name->name;
    2346             : 
    2347             :         /* LOOKUP_CACHED requires RCU, ask caller to retry */
    2348           3 :         if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
    2349             :                 return ERR_PTR(-EAGAIN);
    2350             : 
    2351           3 :         if (!*s)
    2352           0 :                 flags &= ~LOOKUP_RCU;
    2353           3 :         if (flags & LOOKUP_RCU)
    2354             :                 rcu_read_lock();
    2355             :         else
    2356           0 :                 nd->seq = nd->next_seq = 0;
    2357             : 
    2358           3 :         nd->flags = flags;
    2359           3 :         nd->state |= ND_JUMPED;
    2360             : 
    2361           6 :         nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
    2362           6 :         nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
    2363           3 :         smp_rmb();
    2364             : 
    2365           3 :         if (nd->state & ND_ROOT_PRESET) {
    2366           0 :                 struct dentry *root = nd->root.dentry;
    2367           0 :                 struct inode *inode = root->d_inode;
    2368           0 :                 if (*s && unlikely(!d_can_lookup(root)))
    2369             :                         return ERR_PTR(-ENOTDIR);
    2370           0 :                 nd->path = nd->root;
    2371           0 :                 nd->inode = inode;
    2372           0 :                 if (flags & LOOKUP_RCU) {
    2373           0 :                         nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
    2374           0 :                         nd->root_seq = nd->seq;
    2375             :                 } else {
    2376           0 :                         path_get(&nd->path);
    2377             :                 }
    2378             :                 return s;
    2379             :         }
    2380             : 
    2381           3 :         nd->root.mnt = NULL;
    2382             : 
    2383             :         /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
    2384           3 :         if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
    2385           3 :                 error = nd_jump_root(nd);
    2386           3 :                 if (unlikely(error))
    2387           0 :                         return ERR_PTR(error);
    2388             :                 return s;
    2389             :         }
    2390             : 
    2391             :         /* Relative pathname -- get the starting-point it is relative to. */
    2392           0 :         if (nd->dfd == AT_FDCWD) {
    2393           0 :                 if (flags & LOOKUP_RCU) {
    2394           0 :                         struct fs_struct *fs = current->fs;
    2395             :                         unsigned seq;
    2396             : 
    2397             :                         do {
    2398           0 :                                 seq = read_seqcount_begin(&fs->seq);
    2399           0 :                                 nd->path = fs->pwd;
    2400           0 :                                 nd->inode = nd->path.dentry->d_inode;
    2401           0 :                                 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
    2402           0 :                         } while (read_seqcount_retry(&fs->seq, seq));
    2403             :                 } else {
    2404           0 :                         get_fs_pwd(current->fs, &nd->path);
    2405           0 :                         nd->inode = nd->path.dentry->d_inode;
    2406             :                 }
    2407             :         } else {
    2408             :                 /* Caller must check execute permissions on the starting path component */
    2409           0 :                 struct fd f = fdget_raw(nd->dfd);
    2410             :                 struct dentry *dentry;
    2411             : 
    2412           0 :                 if (!f.file)
    2413           0 :                         return ERR_PTR(-EBADF);
    2414             : 
    2415           0 :                 dentry = f.file->f_path.dentry;
    2416             : 
    2417           0 :                 if (*s && unlikely(!d_can_lookup(dentry))) {
    2418           0 :                         fdput(f);
    2419             :                         return ERR_PTR(-ENOTDIR);
    2420             :                 }
    2421             : 
    2422           0 :                 nd->path = f.file->f_path;
    2423           0 :                 if (flags & LOOKUP_RCU) {
    2424           0 :                         nd->inode = nd->path.dentry->d_inode;
    2425           0 :                         nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
    2426             :                 } else {
    2427           0 :                         path_get(&nd->path);
    2428           0 :                         nd->inode = nd->path.dentry->d_inode;
    2429             :                 }
    2430           0 :                 fdput(f);
    2431             :         }
    2432             : 
    2433             :         /* For scoped-lookups we need to set the root to the dirfd as well. */
    2434           0 :         if (flags & LOOKUP_IS_SCOPED) {
    2435           0 :                 nd->root = nd->path;
    2436           0 :                 if (flags & LOOKUP_RCU) {
    2437           0 :                         nd->root_seq = nd->seq;
    2438             :                 } else {
    2439           0 :                         path_get(&nd->root);
    2440           0 :                         nd->state |= ND_ROOT_GRABBED;
    2441             :                 }
    2442             :         }
    2443             :         return s;
    2444             : }
    2445             : 
    2446           0 : static inline const char *lookup_last(struct nameidata *nd)
    2447             : {
    2448           0 :         if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
    2449           0 :                 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
    2450             : 
    2451           0 :         return walk_component(nd, WALK_TRAILING);
    2452             : }
    2453             : 
    2454           0 : static int handle_lookup_down(struct nameidata *nd)
    2455             : {
    2456           0 :         if (!(nd->flags & LOOKUP_RCU))
    2457           0 :                 dget(nd->path.dentry);
    2458           0 :         nd->next_seq = nd->seq;
    2459           0 :         return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
    2460             : }
    2461             : 
    2462             : /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
    2463           0 : static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path)
    2464             : {
    2465           0 :         const char *s = path_init(nd, flags);
    2466             :         int err;
    2467             : 
    2468           0 :         if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) {
    2469           0 :                 err = handle_lookup_down(nd);
    2470           0 :                 if (unlikely(err < 0))
    2471           0 :                         s = ERR_PTR(err);
    2472             :         }
    2473             : 
    2474           0 :         while (!(err = link_path_walk(s, nd)) &&
    2475             :                (s = lookup_last(nd)) != NULL)
    2476             :                 ;
    2477           0 :         if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
    2478           0 :                 err = handle_lookup_down(nd);
    2479           0 :                 nd->state &= ~ND_JUMPED; // no d_weak_revalidate(), please...
    2480             :         }
    2481           0 :         if (!err)
    2482           0 :                 err = complete_walk(nd);
    2483             : 
    2484           0 :         if (!err && nd->flags & LOOKUP_DIRECTORY)
    2485           0 :                 if (!d_can_lookup(nd->path.dentry))
    2486           0 :                         err = -ENOTDIR;
    2487           0 :         if (!err) {
    2488           0 :                 *path = nd->path;
    2489           0 :                 nd->path.mnt = NULL;
    2490           0 :                 nd->path.dentry = NULL;
    2491             :         }
    2492           0 :         terminate_walk(nd);
    2493           0 :         return err;
    2494             : }
    2495             : 
    2496           0 : int filename_lookup(int dfd, struct filename *name, unsigned flags,
    2497             :                     struct path *path, struct path *root)
    2498             : {
    2499             :         int retval;
    2500             :         struct nameidata nd;
    2501           0 :         if (IS_ERR(name))
    2502           0 :                 return PTR_ERR(name);
    2503           0 :         set_nameidata(&nd, dfd, name, root);
    2504           0 :         retval = path_lookupat(&nd, flags | LOOKUP_RCU, path);
    2505           0 :         if (unlikely(retval == -ECHILD))
    2506           0 :                 retval = path_lookupat(&nd, flags, path);
    2507           0 :         if (unlikely(retval == -ESTALE))
    2508           0 :                 retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
    2509             : 
    2510             :         if (likely(!retval))
    2511             :                 audit_inode(name, path->dentry,
    2512             :                             flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
    2513           0 :         restore_nameidata();
    2514           0 :         return retval;
    2515             : }
    2516             : 
    2517             : /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
    2518           3 : static int path_parentat(struct nameidata *nd, unsigned flags,
    2519             :                                 struct path *parent)
    2520             : {
    2521           3 :         const char *s = path_init(nd, flags);
    2522           3 :         int err = link_path_walk(s, nd);
    2523           3 :         if (!err)
    2524           3 :                 err = complete_walk(nd);
    2525           3 :         if (!err) {
    2526           3 :                 *parent = nd->path;
    2527           3 :                 nd->path.mnt = NULL;
    2528           3 :                 nd->path.dentry = NULL;
    2529             :         }
    2530           3 :         terminate_walk(nd);
    2531           3 :         return err;
    2532             : }
    2533             : 
    2534             : /* Note: this does not consume "name" */
    2535           3 : static int filename_parentat(int dfd, struct filename *name,
    2536             :                              unsigned int flags, struct path *parent,
    2537             :                              struct qstr *last, int *type)
    2538             : {
    2539             :         int retval;
    2540             :         struct nameidata nd;
    2541             : 
    2542           3 :         if (IS_ERR(name))
    2543           0 :                 return PTR_ERR(name);
    2544           3 :         set_nameidata(&nd, dfd, name, NULL);
    2545           3 :         retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
    2546           3 :         if (unlikely(retval == -ECHILD))
    2547           0 :                 retval = path_parentat(&nd, flags, parent);
    2548           3 :         if (unlikely(retval == -ESTALE))
    2549           0 :                 retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent);
    2550           3 :         if (likely(!retval)) {
    2551           3 :                 *last = nd.last;
    2552           3 :                 *type = nd.last_type;
    2553           3 :                 audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
    2554             :         }
    2555           3 :         restore_nameidata();
    2556           3 :         return retval;
    2557             : }
    2558             : 
    2559             : /* does lookup, returns the object with parent locked */
    2560           0 : static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
    2561             : {
    2562             :         struct dentry *d;
    2563             :         struct qstr last;
    2564             :         int type, error;
    2565             : 
    2566           0 :         error = filename_parentat(AT_FDCWD, name, 0, path, &last, &type);
    2567           0 :         if (error)
    2568           0 :                 return ERR_PTR(error);
    2569           0 :         if (unlikely(type != LAST_NORM)) {
    2570           0 :                 path_put(path);
    2571           0 :                 return ERR_PTR(-EINVAL);
    2572             :         }
    2573           0 :         inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
    2574           0 :         d = __lookup_hash(&last, path->dentry, 0);
    2575           0 :         if (IS_ERR(d)) {
    2576           0 :                 inode_unlock(path->dentry->d_inode);
    2577             :                 path_put(path);
    2578             :         }
    2579             :         return d;
    2580             : }
    2581             : 
    2582           0 : struct dentry *kern_path_locked(const char *name, struct path *path)
    2583             : {
    2584           0 :         struct filename *filename = getname_kernel(name);
    2585           0 :         struct dentry *res = __kern_path_locked(filename, path);
    2586             : 
    2587           0 :         putname(filename);
    2588           0 :         return res;
    2589             : }
    2590             : 
    2591           0 : int kern_path(const char *name, unsigned int flags, struct path *path)
    2592             : {
    2593           0 :         struct filename *filename = getname_kernel(name);
    2594           0 :         int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL);
    2595             : 
    2596           0 :         putname(filename);
    2597           0 :         return ret;
    2598             : 
    2599             : }
    2600             : EXPORT_SYMBOL(kern_path);
    2601             : 
    2602             : /**
    2603             :  * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
    2604             :  * @dentry:  pointer to dentry of the base directory
    2605             :  * @mnt: pointer to vfs mount of the base directory
    2606             :  * @name: pointer to file name
    2607             :  * @flags: lookup flags
    2608             :  * @path: pointer to struct path to fill
    2609             :  */
    2610           0 : int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
    2611             :                     const char *name, unsigned int flags,
    2612             :                     struct path *path)
    2613             : {
    2614             :         struct filename *filename;
    2615           0 :         struct path root = {.mnt = mnt, .dentry = dentry};
    2616             :         int ret;
    2617             : 
    2618           0 :         filename = getname_kernel(name);
    2619             :         /* the first argument of filename_lookup() is ignored with root */
    2620           0 :         ret = filename_lookup(AT_FDCWD, filename, flags, path, &root);
    2621           0 :         putname(filename);
    2622           0 :         return ret;
    2623             : }
    2624             : EXPORT_SYMBOL(vfs_path_lookup);
    2625             : 
    2626           0 : static int lookup_one_common(struct mnt_idmap *idmap,
    2627             :                              const char *name, struct dentry *base, int len,
    2628             :                              struct qstr *this)
    2629             : {
    2630           0 :         this->name = name;
    2631           0 :         this->len = len;
    2632           0 :         this->hash = full_name_hash(base, name, len);
    2633           0 :         if (!len)
    2634             :                 return -EACCES;
    2635             : 
    2636           0 :         if (unlikely(name[0] == '.')) {
    2637           0 :                 if (len < 2 || (len == 2 && name[1] == '.'))
    2638             :                         return -EACCES;
    2639             :         }
    2640             : 
    2641           0 :         while (len--) {
    2642           0 :                 unsigned int c = *(const unsigned char *)name++;
    2643           0 :                 if (c == '/' || c == '\0')
    2644             :                         return -EACCES;
    2645             :         }
    2646             :         /*
    2647             :          * See if the low-level filesystem might want
    2648             :          * to use its own hash..
    2649             :          */
    2650           0 :         if (base->d_flags & DCACHE_OP_HASH) {
    2651           0 :                 int err = base->d_op->d_hash(base, this);
    2652           0 :                 if (err < 0)
    2653             :                         return err;
    2654             :         }
    2655             : 
    2656           0 :         return inode_permission(idmap, base->d_inode, MAY_EXEC);
    2657             : }
    2658             : 
    2659             : /**
    2660             :  * try_lookup_one_len - filesystem helper to lookup single pathname component
    2661             :  * @name:       pathname component to lookup
    2662             :  * @base:       base directory to lookup from
    2663             :  * @len:        maximum length @len should be interpreted to
    2664             :  *
    2665             :  * Look up a dentry by name in the dcache, returning NULL if it does not
    2666             :  * currently exist.  The function does not try to create a dentry.
    2667             :  *
    2668             :  * Note that this routine is purely a helper for filesystem usage and should
    2669             :  * not be called by generic code.
    2670             :  *
    2671             :  * The caller must hold base->i_mutex.
    2672             :  */
    2673           0 : struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len)
    2674             : {
    2675             :         struct qstr this;
    2676             :         int err;
    2677             : 
    2678           0 :         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
    2679             : 
    2680           0 :         err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
    2681           0 :         if (err)
    2682           0 :                 return ERR_PTR(err);
    2683             : 
    2684           0 :         return lookup_dcache(&this, base, 0);
    2685             : }
    2686             : EXPORT_SYMBOL(try_lookup_one_len);
    2687             : 
    2688             : /**
    2689             :  * lookup_one_len - filesystem helper to lookup single pathname component
    2690             :  * @name:       pathname component to lookup
    2691             :  * @base:       base directory to lookup from
    2692             :  * @len:        maximum length @len should be interpreted to
    2693             :  *
    2694             :  * Note that this routine is purely a helper for filesystem usage and should
    2695             :  * not be called by generic code.
    2696             :  *
    2697             :  * The caller must hold base->i_mutex.
    2698             :  */
    2699           0 : struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
    2700             : {
    2701             :         struct dentry *dentry;
    2702             :         struct qstr this;
    2703             :         int err;
    2704             : 
    2705           0 :         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
    2706             : 
    2707           0 :         err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this);
    2708           0 :         if (err)
    2709           0 :                 return ERR_PTR(err);
    2710             : 
    2711           0 :         dentry = lookup_dcache(&this, base, 0);
    2712           0 :         return dentry ? dentry : __lookup_slow(&this, base, 0);
    2713             : }
    2714             : EXPORT_SYMBOL(lookup_one_len);
    2715             : 
    2716             : /**
    2717             :  * lookup_one - filesystem helper to lookup single pathname component
    2718             :  * @idmap:      idmap of the mount the lookup is performed from
    2719             :  * @name:       pathname component to lookup
    2720             :  * @base:       base directory to lookup from
    2721             :  * @len:        maximum length @len should be interpreted to
    2722             :  *
    2723             :  * Note that this routine is purely a helper for filesystem usage and should
    2724             :  * not be called by generic code.
    2725             :  *
    2726             :  * The caller must hold base->i_mutex.
    2727             :  */
    2728           0 : struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name,
    2729             :                           struct dentry *base, int len)
    2730             : {
    2731             :         struct dentry *dentry;
    2732             :         struct qstr this;
    2733             :         int err;
    2734             : 
    2735           0 :         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
    2736             : 
    2737           0 :         err = lookup_one_common(idmap, name, base, len, &this);
    2738           0 :         if (err)
    2739           0 :                 return ERR_PTR(err);
    2740             : 
    2741           0 :         dentry = lookup_dcache(&this, base, 0);
    2742           0 :         return dentry ? dentry : __lookup_slow(&this, base, 0);
    2743             : }
    2744             : EXPORT_SYMBOL(lookup_one);
    2745             : 
    2746             : /**
    2747             :  * lookup_one_unlocked - filesystem helper to lookup single pathname component
    2748             :  * @idmap:      idmap of the mount the lookup is performed from
    2749             :  * @name:       pathname component to lookup
    2750             :  * @base:       base directory to lookup from
    2751             :  * @len:        maximum length @len should be interpreted to
    2752             :  *
    2753             :  * Note that this routine is purely a helper for filesystem usage and should
    2754             :  * not be called by generic code.
    2755             :  *
    2756             :  * Unlike lookup_one_len, it should be called without the parent
    2757             :  * i_mutex held, and will take the i_mutex itself if necessary.
    2758             :  */
    2759           0 : struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap,
    2760             :                                    const char *name, struct dentry *base,
    2761             :                                    int len)
    2762             : {
    2763             :         struct qstr this;
    2764             :         int err;
    2765             :         struct dentry *ret;
    2766             : 
    2767           0 :         err = lookup_one_common(idmap, name, base, len, &this);
    2768           0 :         if (err)
    2769           0 :                 return ERR_PTR(err);
    2770             : 
    2771           0 :         ret = lookup_dcache(&this, base, 0);
    2772           0 :         if (!ret)
    2773           0 :                 ret = lookup_slow(&this, base, 0);
    2774             :         return ret;
    2775             : }
    2776             : EXPORT_SYMBOL(lookup_one_unlocked);
    2777             : 
    2778             : /**
    2779             :  * lookup_one_positive_unlocked - filesystem helper to lookup single
    2780             :  *                                pathname component
    2781             :  * @idmap:      idmap of the mount the lookup is performed from
    2782             :  * @name:       pathname component to lookup
    2783             :  * @base:       base directory to lookup from
    2784             :  * @len:        maximum length @len should be interpreted to
    2785             :  *
    2786             :  * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns
    2787             :  * known positive or ERR_PTR(). This is what most of the users want.
    2788             :  *
    2789             :  * Note that pinned negative with unlocked parent _can_ become positive at any
    2790             :  * time, so callers of lookup_one_unlocked() need to be very careful; pinned
    2791             :  * positives have >d_inode stable, so this one avoids such problems.
    2792             :  *
    2793             :  * Note that this routine is purely a helper for filesystem usage and should
    2794             :  * not be called by generic code.
    2795             :  *
    2796             :  * The helper should be called without i_mutex held.
    2797             :  */
    2798           0 : struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap,
    2799             :                                             const char *name,
    2800             :                                             struct dentry *base, int len)
    2801             : {
    2802           0 :         struct dentry *ret = lookup_one_unlocked(idmap, name, base, len);
    2803             : 
    2804           0 :         if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
    2805           0 :                 dput(ret);
    2806           0 :                 ret = ERR_PTR(-ENOENT);
    2807             :         }
    2808           0 :         return ret;
    2809             : }
    2810             : EXPORT_SYMBOL(lookup_one_positive_unlocked);
    2811             : 
    2812             : /**
    2813             :  * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
    2814             :  * @name:       pathname component to lookup
    2815             :  * @base:       base directory to lookup from
    2816             :  * @len:        maximum length @len should be interpreted to
    2817             :  *
    2818             :  * Note that this routine is purely a helper for filesystem usage and should
    2819             :  * not be called by generic code.
    2820             :  *
    2821             :  * Unlike lookup_one_len, it should be called without the parent
    2822             :  * i_mutex held, and will take the i_mutex itself if necessary.
    2823             :  */
    2824           0 : struct dentry *lookup_one_len_unlocked(const char *name,
    2825             :                                        struct dentry *base, int len)
    2826             : {
    2827           0 :         return lookup_one_unlocked(&nop_mnt_idmap, name, base, len);
    2828             : }
    2829             : EXPORT_SYMBOL(lookup_one_len_unlocked);
    2830             : 
    2831             : /*
    2832             :  * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT)
    2833             :  * on negatives.  Returns known positive or ERR_PTR(); that's what
    2834             :  * most of the users want.  Note that pinned negative with unlocked parent
    2835             :  * _can_ become positive at any time, so callers of lookup_one_len_unlocked()
    2836             :  * need to be very careful; pinned positives have ->d_inode stable, so
    2837             :  * this one avoids such problems.
    2838             :  */
    2839           0 : struct dentry *lookup_positive_unlocked(const char *name,
    2840             :                                        struct dentry *base, int len)
    2841             : {
    2842           0 :         return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len);
    2843             : }
    2844             : EXPORT_SYMBOL(lookup_positive_unlocked);
    2845             : 
    2846             : #ifdef CONFIG_UNIX98_PTYS
    2847           0 : int path_pts(struct path *path)
    2848             : {
    2849             :         /* Find something mounted on "pts" in the same directory as
    2850             :          * the input path.
    2851             :          */
    2852           0 :         struct dentry *parent = dget_parent(path->dentry);
    2853             :         struct dentry *child;
    2854           0 :         struct qstr this = QSTR_INIT("pts", 3);
    2855             : 
    2856           0 :         if (unlikely(!path_connected(path->mnt, parent))) {
    2857           0 :                 dput(parent);
    2858           0 :                 return -ENOENT;
    2859             :         }
    2860           0 :         dput(path->dentry);
    2861           0 :         path->dentry = parent;
    2862           0 :         child = d_hash_and_lookup(parent, &this);
    2863           0 :         if (!child)
    2864             :                 return -ENOENT;
    2865             : 
    2866           0 :         path->dentry = child;
    2867           0 :         dput(parent);
    2868           0 :         follow_down(path, 0);
    2869           0 :         return 0;
    2870             : }
    2871             : #endif
    2872             : 
    2873           0 : int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
    2874             :                  struct path *path, int *empty)
    2875             : {
    2876           0 :         struct filename *filename = getname_flags(name, flags, empty);
    2877           0 :         int ret = filename_lookup(dfd, filename, flags, path, NULL);
    2878             : 
    2879           0 :         putname(filename);
    2880           0 :         return ret;
    2881             : }
    2882             : EXPORT_SYMBOL(user_path_at_empty);
    2883             : 
    2884           0 : int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
    2885             :                    struct inode *inode)
    2886             : {
    2887           0 :         kuid_t fsuid = current_fsuid();
    2888             : 
    2889           0 :         if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), fsuid))
    2890             :                 return 0;
    2891           0 :         if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, dir), fsuid))
    2892             :                 return 0;
    2893           0 :         return !capable_wrt_inode_uidgid(idmap, inode, CAP_FOWNER);
    2894             : }
    2895             : EXPORT_SYMBOL(__check_sticky);
    2896             : 
    2897             : /*
    2898             :  *      Check whether we can remove a link victim from directory dir, check
    2899             :  *  whether the type of victim is right.
    2900             :  *  1. We can't do it if dir is read-only (done in permission())
    2901             :  *  2. We should have write and exec permissions on dir
    2902             :  *  3. We can't remove anything from append-only dir
    2903             :  *  4. We can't do anything with immutable dir (done in permission())
    2904             :  *  5. If the sticky bit on dir is set we should either
    2905             :  *      a. be owner of dir, or
    2906             :  *      b. be owner of victim, or
    2907             :  *      c. have CAP_FOWNER capability
    2908             :  *  6. If the victim is append-only or immutable we can't do antyhing with
    2909             :  *     links pointing to it.
    2910             :  *  7. If the victim has an unknown uid or gid we can't change the inode.
    2911             :  *  8. If we were asked to remove a directory and victim isn't one - ENOTDIR.
    2912             :  *  9. If we were asked to remove a non-directory and victim isn't one - EISDIR.
    2913             :  * 10. We can't remove a root or mountpoint.
    2914             :  * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
    2915             :  *     nfs_async_unlink().
    2916             :  */
    2917           0 : static int may_delete(struct mnt_idmap *idmap, struct inode *dir,
    2918             :                       struct dentry *victim, bool isdir)
    2919             : {
    2920           0 :         struct inode *inode = d_backing_inode(victim);
    2921             :         int error;
    2922             : 
    2923           0 :         if (d_is_negative(victim))
    2924             :                 return -ENOENT;
    2925           0 :         BUG_ON(!inode);
    2926             : 
    2927           0 :         BUG_ON(victim->d_parent->d_inode != dir);
    2928             : 
    2929             :         /* Inode writeback is not safe when the uid or gid are invalid. */
    2930           0 :         if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
    2931           0 :             !vfsgid_valid(i_gid_into_vfsgid(idmap, inode)))
    2932             :                 return -EOVERFLOW;
    2933             : 
    2934           0 :         audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
    2935             : 
    2936           0 :         error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
    2937           0 :         if (error)
    2938             :                 return error;
    2939           0 :         if (IS_APPEND(dir))
    2940             :                 return -EPERM;
    2941             : 
    2942           0 :         if (check_sticky(idmap, dir, inode) || IS_APPEND(inode) ||
    2943           0 :             IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
    2944           0 :             HAS_UNMAPPED_ID(idmap, inode))
    2945             :                 return -EPERM;
    2946           0 :         if (isdir) {
    2947           0 :                 if (!d_is_dir(victim))
    2948             :                         return -ENOTDIR;
    2949           0 :                 if (IS_ROOT(victim))
    2950             :                         return -EBUSY;
    2951           0 :         } else if (d_is_dir(victim))
    2952             :                 return -EISDIR;
    2953           0 :         if (IS_DEADDIR(dir))
    2954             :                 return -ENOENT;
    2955           0 :         if (victim->d_flags & DCACHE_NFSFS_RENAMED)
    2956             :                 return -EBUSY;
    2957           0 :         return 0;
    2958             : }
    2959             : 
    2960             : /*      Check whether we can create an object with dentry child in directory
    2961             :  *  dir.
    2962             :  *  1. We can't do it if child already exists (open has special treatment for
    2963             :  *     this case, but since we are inlined it's OK)
    2964             :  *  2. We can't do it if dir is read-only (done in permission())
    2965             :  *  3. We can't do it if the fs can't represent the fsuid or fsgid.
    2966             :  *  4. We should have write and exec permissions on dir
    2967             :  *  5. We can't do it if dir is immutable (done in permission())
    2968             :  */
    2969           3 : static inline int may_create(struct mnt_idmap *idmap,
    2970             :                              struct inode *dir, struct dentry *child)
    2971             : {
    2972           3 :         audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
    2973           3 :         if (child->d_inode)
    2974             :                 return -EEXIST;
    2975           3 :         if (IS_DEADDIR(dir))
    2976             :                 return -ENOENT;
    2977           3 :         if (!fsuidgid_has_mapping(dir->i_sb, idmap))
    2978             :                 return -EOVERFLOW;
    2979             : 
    2980           3 :         return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
    2981             : }
    2982             : 
    2983             : /*
    2984             :  * p1 and p2 should be directories on the same fs.
    2985             :  */
    2986           0 : struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
    2987             : {
    2988             :         struct dentry *p;
    2989             : 
    2990           0 :         if (p1 == p2) {
    2991           0 :                 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
    2992           0 :                 return NULL;
    2993             :         }
    2994             : 
    2995           0 :         mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
    2996             : 
    2997           0 :         p = d_ancestor(p2, p1);
    2998           0 :         if (p) {
    2999           0 :                 inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
    3000           0 :                 inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
    3001           0 :                 return p;
    3002             :         }
    3003             : 
    3004           0 :         p = d_ancestor(p1, p2);
    3005           0 :         if (p) {
    3006           0 :                 inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
    3007           0 :                 inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
    3008           0 :                 return p;
    3009             :         }
    3010             : 
    3011           0 :         inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
    3012           0 :         inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
    3013           0 :         return NULL;
    3014             : }
    3015             : EXPORT_SYMBOL(lock_rename);
    3016             : 
    3017           0 : void unlock_rename(struct dentry *p1, struct dentry *p2)
    3018             : {
    3019           0 :         inode_unlock(p1->d_inode);
    3020           0 :         if (p1 != p2) {
    3021           0 :                 inode_unlock(p2->d_inode);
    3022           0 :                 mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
    3023             :         }
    3024           0 : }
    3025             : EXPORT_SYMBOL(unlock_rename);
    3026             : 
    3027             : /**
    3028             :  * mode_strip_umask - handle vfs umask stripping
    3029             :  * @dir:        parent directory of the new inode
    3030             :  * @mode:       mode of the new inode to be created in @dir
    3031             :  *
    3032             :  * Umask stripping depends on whether or not the filesystem supports POSIX
    3033             :  * ACLs. If the filesystem doesn't support it umask stripping is done directly
    3034             :  * in here. If the filesystem does support POSIX ACLs umask stripping is
    3035             :  * deferred until the filesystem calls posix_acl_create().
    3036             :  *
    3037             :  * Returns: mode
    3038             :  */
    3039             : static inline umode_t mode_strip_umask(const struct inode *dir, umode_t mode)
    3040             : {
    3041           3 :         if (!IS_POSIXACL(dir))
    3042           3 :                 mode &= ~current_umask();
    3043             :         return mode;
    3044             : }
    3045             : 
    3046             : /**
    3047             :  * vfs_prepare_mode - prepare the mode to be used for a new inode
    3048             :  * @idmap:      idmap of the mount the inode was found from
    3049             :  * @dir:        parent directory of the new inode
    3050             :  * @mode:       mode of the new inode
    3051             :  * @mask_perms: allowed permission by the vfs
    3052             :  * @type:       type of file to be created
    3053             :  *
    3054             :  * This helper consolidates and enforces vfs restrictions on the @mode of a new
    3055             :  * object to be created.
    3056             :  *
    3057             :  * Umask stripping depends on whether the filesystem supports POSIX ACLs (see
    3058             :  * the kernel documentation for mode_strip_umask()). Moving umask stripping
    3059             :  * after setgid stripping allows the same ordering for both non-POSIX ACL and
    3060             :  * POSIX ACL supporting filesystems.
    3061             :  *
    3062             :  * Note that it's currently valid for @type to be 0 if a directory is created.
    3063             :  * Filesystems raise that flag individually and we need to check whether each
    3064             :  * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a
    3065             :  * non-zero type.
    3066             :  *
    3067             :  * Returns: mode to be passed to the filesystem
    3068             :  */
    3069           3 : static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
    3070             :                                        const struct inode *dir, umode_t mode,
    3071             :                                        umode_t mask_perms, umode_t type)
    3072             : {
    3073           3 :         mode = mode_strip_sgid(idmap, dir, mode);
    3074           6 :         mode = mode_strip_umask(dir, mode);
    3075             : 
    3076             :         /*
    3077             :          * Apply the vfs mandated allowed permission mask and set the type of
    3078             :          * file to be created before we call into the filesystem.
    3079             :          */
    3080           3 :         mode &= (mask_perms & ~S_IFMT);
    3081           3 :         mode |= (type & S_IFMT);
    3082             : 
    3083           3 :         return mode;
    3084             : }
    3085             : 
    3086             : /**
    3087             :  * vfs_create - create new file
    3088             :  * @idmap:      idmap of the mount the inode was found from
    3089             :  * @dir:        inode of @dentry
    3090             :  * @dentry:     pointer to dentry of the base directory
    3091             :  * @mode:       mode of the new file
    3092             :  * @want_excl:  whether the file must not yet exist
    3093             :  *
    3094             :  * Create a new file.
    3095             :  *
    3096             :  * If the inode has been found through an idmapped mount the idmap of
    3097             :  * the vfsmount must be passed through @idmap. This function will then take
    3098             :  * care to map the inode according to @idmap before checking permissions.
    3099             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3100             :  * raw inode simply passs @nop_mnt_idmap.
    3101             :  */
    3102           0 : int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
    3103             :                struct dentry *dentry, umode_t mode, bool want_excl)
    3104             : {
    3105             :         int error;
    3106             : 
    3107           0 :         error = may_create(idmap, dir, dentry);
    3108           0 :         if (error)
    3109             :                 return error;
    3110             : 
    3111           0 :         if (!dir->i_op->create)
    3112             :                 return -EACCES; /* shouldn't it be ENOSYS? */
    3113             : 
    3114           0 :         mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG);
    3115           0 :         error = security_inode_create(dir, dentry, mode);
    3116             :         if (error)
    3117             :                 return error;
    3118           0 :         error = dir->i_op->create(idmap, dir, dentry, mode, want_excl);
    3119           0 :         if (!error)
    3120             :                 fsnotify_create(dir, dentry);
    3121             :         return error;
    3122             : }
    3123             : EXPORT_SYMBOL(vfs_create);
    3124             : 
    3125           0 : int vfs_mkobj(struct dentry *dentry, umode_t mode,
    3126             :                 int (*f)(struct dentry *, umode_t, void *),
    3127             :                 void *arg)
    3128             : {
    3129           0 :         struct inode *dir = dentry->d_parent->d_inode;
    3130           0 :         int error = may_create(&nop_mnt_idmap, dir, dentry);
    3131           0 :         if (error)
    3132             :                 return error;
    3133             : 
    3134           0 :         mode &= S_IALLUGO;
    3135           0 :         mode |= S_IFREG;
    3136           0 :         error = security_inode_create(dir, dentry, mode);
    3137             :         if (error)
    3138             :                 return error;
    3139           0 :         error = f(dentry, mode, arg);
    3140           0 :         if (!error)
    3141             :                 fsnotify_create(dir, dentry);
    3142             :         return error;
    3143             : }
    3144             : EXPORT_SYMBOL(vfs_mkobj);
    3145             : 
    3146           0 : bool may_open_dev(const struct path *path)
    3147             : {
    3148           0 :         return !(path->mnt->mnt_flags & MNT_NODEV) &&
    3149           0 :                 !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
    3150             : }
    3151             : 
    3152           0 : static int may_open(struct mnt_idmap *idmap, const struct path *path,
    3153             :                     int acc_mode, int flag)
    3154             : {
    3155           0 :         struct dentry *dentry = path->dentry;
    3156           0 :         struct inode *inode = dentry->d_inode;
    3157             :         int error;
    3158             : 
    3159           0 :         if (!inode)
    3160             :                 return -ENOENT;
    3161             : 
    3162           0 :         switch (inode->i_mode & S_IFMT) {
    3163             :         case S_IFLNK:
    3164             :                 return -ELOOP;
    3165             :         case S_IFDIR:
    3166           0 :                 if (acc_mode & MAY_WRITE)
    3167             :                         return -EISDIR;
    3168           0 :                 if (acc_mode & MAY_EXEC)
    3169             :                         return -EACCES;
    3170             :                 break;
    3171             :         case S_IFBLK:
    3172             :         case S_IFCHR:
    3173           0 :                 if (!may_open_dev(path))
    3174             :                         return -EACCES;
    3175             :                 fallthrough;
    3176             :         case S_IFIFO:
    3177             :         case S_IFSOCK:
    3178           0 :                 if (acc_mode & MAY_EXEC)
    3179             :                         return -EACCES;
    3180           0 :                 flag &= ~O_TRUNC;
    3181           0 :                 break;
    3182             :         case S_IFREG:
    3183           0 :                 if ((acc_mode & MAY_EXEC) && path_noexec(path))
    3184             :                         return -EACCES;
    3185             :                 break;
    3186             :         }
    3187             : 
    3188           0 :         error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);
    3189           0 :         if (error)
    3190             :                 return error;
    3191             : 
    3192             :         /*
    3193             :          * An append-only file must be opened in append mode for writing.
    3194             :          */
    3195           0 :         if (IS_APPEND(inode)) {
    3196           0 :                 if  ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
    3197             :                         return -EPERM;
    3198           0 :                 if (flag & O_TRUNC)
    3199             :                         return -EPERM;
    3200             :         }
    3201             : 
    3202             :         /* O_NOATIME can only be set by the owner or superuser */
    3203           0 :         if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode))
    3204             :                 return -EPERM;
    3205             : 
    3206             :         return 0;
    3207             : }
    3208             : 
    3209           0 : static int handle_truncate(struct mnt_idmap *idmap, struct file *filp)
    3210             : {
    3211           0 :         const struct path *path = &filp->f_path;
    3212           0 :         struct inode *inode = path->dentry->d_inode;
    3213           0 :         int error = get_write_access(inode);
    3214           0 :         if (error)
    3215             :                 return error;
    3216             : 
    3217           0 :         error = security_file_truncate(filp);
    3218             :         if (!error) {
    3219           0 :                 error = do_truncate(idmap, path->dentry, 0,
    3220             :                                     ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
    3221             :                                     filp);
    3222             :         }
    3223           0 :         put_write_access(inode);
    3224           0 :         return error;
    3225             : }
    3226             : 
    3227             : static inline int open_to_namei_flags(int flag)
    3228             : {
    3229           0 :         if ((flag & O_ACCMODE) == 3)
    3230           0 :                 flag--;
    3231             :         return flag;
    3232             : }
    3233             : 
    3234           0 : static int may_o_create(struct mnt_idmap *idmap,
    3235             :                         const struct path *dir, struct dentry *dentry,
    3236             :                         umode_t mode)
    3237             : {
    3238           0 :         int error = security_path_mknod(dir, dentry, mode, 0);
    3239             :         if (error)
    3240             :                 return error;
    3241             : 
    3242           0 :         if (!fsuidgid_has_mapping(dir->dentry->d_sb, idmap))
    3243             :                 return -EOVERFLOW;
    3244             : 
    3245           0 :         error = inode_permission(idmap, dir->dentry->d_inode,
    3246             :                                  MAY_WRITE | MAY_EXEC);
    3247           0 :         if (error)
    3248             :                 return error;
    3249             : 
    3250           0 :         return security_inode_create(dir->dentry->d_inode, dentry, mode);
    3251             : }
    3252             : 
    3253             : /*
    3254             :  * Attempt to atomically look up, create and open a file from a negative
    3255             :  * dentry.
    3256             :  *
    3257             :  * Returns 0 if successful.  The file will have been created and attached to
    3258             :  * @file by the filesystem calling finish_open().
    3259             :  *
    3260             :  * If the file was looked up only or didn't need creating, FMODE_OPENED won't
    3261             :  * be set.  The caller will need to perform the open themselves.  @path will
    3262             :  * have been updated to point to the new dentry.  This may be negative.
    3263             :  *
    3264             :  * Returns an error code otherwise.
    3265             :  */
    3266           0 : static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
    3267             :                                   struct file *file,
    3268             :                                   int open_flag, umode_t mode)
    3269             : {
    3270           0 :         struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
    3271           0 :         struct inode *dir =  nd->path.dentry->d_inode;
    3272             :         int error;
    3273             : 
    3274           0 :         if (nd->flags & LOOKUP_DIRECTORY)
    3275           0 :                 open_flag |= O_DIRECTORY;
    3276             : 
    3277           0 :         file->f_path.dentry = DENTRY_NOT_SET;
    3278           0 :         file->f_path.mnt = nd->path.mnt;
    3279           0 :         error = dir->i_op->atomic_open(dir, dentry, file,
    3280           0 :                                        open_to_namei_flags(open_flag), mode);
    3281           0 :         d_lookup_done(dentry);
    3282           0 :         if (!error) {
    3283           0 :                 if (file->f_mode & FMODE_OPENED) {
    3284           0 :                         if (unlikely(dentry != file->f_path.dentry)) {
    3285           0 :                                 dput(dentry);
    3286           0 :                                 dentry = dget(file->f_path.dentry);
    3287             :                         }
    3288           0 :                 } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
    3289             :                         error = -EIO;
    3290             :                 } else {
    3291           0 :                         if (file->f_path.dentry) {
    3292           0 :                                 dput(dentry);
    3293           0 :                                 dentry = file->f_path.dentry;
    3294             :                         }
    3295           0 :                         if (unlikely(d_is_negative(dentry)))
    3296           0 :                                 error = -ENOENT;
    3297             :                 }
    3298             :         }
    3299           0 :         if (error) {
    3300           0 :                 dput(dentry);
    3301           0 :                 dentry = ERR_PTR(error);
    3302             :         }
    3303           0 :         return dentry;
    3304             : }
    3305             : 
    3306             : /*
    3307             :  * Look up and maybe create and open the last component.
    3308             :  *
    3309             :  * Must be called with parent locked (exclusive in O_CREAT case).
    3310             :  *
    3311             :  * Returns 0 on success, that is, if
    3312             :  *  the file was successfully atomically created (if necessary) and opened, or
    3313             :  *  the file was not completely opened at this time, though lookups and
    3314             :  *  creations were performed.
    3315             :  * These case are distinguished by presence of FMODE_OPENED on file->f_mode.
    3316             :  * In the latter case dentry returned in @path might be negative if O_CREAT
    3317             :  * hadn't been specified.
    3318             :  *
    3319             :  * An error code is returned on failure.
    3320             :  */
    3321           0 : static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
    3322             :                                   const struct open_flags *op,
    3323             :                                   bool got_write)
    3324             : {
    3325             :         struct mnt_idmap *idmap;
    3326           0 :         struct dentry *dir = nd->path.dentry;
    3327           0 :         struct inode *dir_inode = dir->d_inode;
    3328           0 :         int open_flag = op->open_flag;
    3329             :         struct dentry *dentry;
    3330           0 :         int error, create_error = 0;
    3331           0 :         umode_t mode = op->mode;
    3332           0 :         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    3333             : 
    3334           0 :         if (unlikely(IS_DEADDIR(dir_inode)))
    3335             :                 return ERR_PTR(-ENOENT);
    3336             : 
    3337           0 :         file->f_mode &= ~FMODE_CREATED;
    3338           0 :         dentry = d_lookup(dir, &nd->last);
    3339             :         for (;;) {
    3340           0 :                 if (!dentry) {
    3341           0 :                         dentry = d_alloc_parallel(dir, &nd->last, &wq);
    3342           0 :                         if (IS_ERR(dentry))
    3343             :                                 return dentry;
    3344             :                 }
    3345           0 :                 if (d_in_lookup(dentry))
    3346             :                         break;
    3347             : 
    3348           0 :                 error = d_revalidate(dentry, nd->flags);
    3349           0 :                 if (likely(error > 0))
    3350             :                         break;
    3351           0 :                 if (error)
    3352             :                         goto out_dput;
    3353           0 :                 d_invalidate(dentry);
    3354           0 :                 dput(dentry);
    3355           0 :                 dentry = NULL;
    3356             :         }
    3357           0 :         if (dentry->d_inode) {
    3358             :                 /* Cached positive dentry: will open in f_op->open */
    3359             :                 return dentry;
    3360             :         }
    3361             : 
    3362             :         /*
    3363             :          * Checking write permission is tricky, bacuse we don't know if we are
    3364             :          * going to actually need it: O_CREAT opens should work as long as the
    3365             :          * file exists.  But checking existence breaks atomicity.  The trick is
    3366             :          * to check access and if not granted clear O_CREAT from the flags.
    3367             :          *
    3368             :          * Another problem is returing the "right" error value (e.g. for an
    3369             :          * O_EXCL open we want to return EEXIST not EROFS).
    3370             :          */
    3371           0 :         if (unlikely(!got_write))
    3372           0 :                 open_flag &= ~O_TRUNC;
    3373           0 :         idmap = mnt_idmap(nd->path.mnt);
    3374           0 :         if (open_flag & O_CREAT) {
    3375           0 :                 if (open_flag & O_EXCL)
    3376           0 :                         open_flag &= ~O_TRUNC;
    3377           0 :                 mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode);
    3378           0 :                 if (likely(got_write))
    3379           0 :                         create_error = may_o_create(idmap, &nd->path,
    3380             :                                                     dentry, mode);
    3381             :                 else
    3382             :                         create_error = -EROFS;
    3383             :         }
    3384           0 :         if (create_error)
    3385           0 :                 open_flag &= ~O_CREAT;
    3386           0 :         if (dir_inode->i_op->atomic_open) {
    3387           0 :                 dentry = atomic_open(nd, dentry, file, open_flag, mode);
    3388           0 :                 if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
    3389           0 :                         dentry = ERR_PTR(create_error);
    3390             :                 return dentry;
    3391             :         }
    3392             : 
    3393           0 :         if (d_in_lookup(dentry)) {
    3394           0 :                 struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
    3395             :                                                              nd->flags);
    3396           0 :                 d_lookup_done(dentry);
    3397           0 :                 if (unlikely(res)) {
    3398           0 :                         if (IS_ERR(res)) {
    3399           0 :                                 error = PTR_ERR(res);
    3400           0 :                                 goto out_dput;
    3401             :                         }
    3402           0 :                         dput(dentry);
    3403           0 :                         dentry = res;
    3404             :                 }
    3405             :         }
    3406             : 
    3407             :         /* Negative dentry, just create the file */
    3408           0 :         if (!dentry->d_inode && (open_flag & O_CREAT)) {
    3409           0 :                 file->f_mode |= FMODE_CREATED;
    3410           0 :                 audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
    3411           0 :                 if (!dir_inode->i_op->create) {
    3412             :                         error = -EACCES;
    3413             :                         goto out_dput;
    3414             :                 }
    3415             : 
    3416           0 :                 error = dir_inode->i_op->create(idmap, dir_inode, dentry,
    3417           0 :                                                 mode, open_flag & O_EXCL);
    3418           0 :                 if (error)
    3419             :                         goto out_dput;
    3420             :         }
    3421           0 :         if (unlikely(create_error) && !dentry->d_inode) {
    3422             :                 error = create_error;
    3423             :                 goto out_dput;
    3424             :         }
    3425             :         return dentry;
    3426             : 
    3427             : out_dput:
    3428           0 :         dput(dentry);
    3429           0 :         return ERR_PTR(error);
    3430             : }
    3431             : 
    3432           0 : static const char *open_last_lookups(struct nameidata *nd,
    3433             :                    struct file *file, const struct open_flags *op)
    3434             : {
    3435           0 :         struct dentry *dir = nd->path.dentry;
    3436           0 :         int open_flag = op->open_flag;
    3437           0 :         bool got_write = false;
    3438             :         struct dentry *dentry;
    3439             :         const char *res;
    3440             : 
    3441           0 :         nd->flags |= op->intent;
    3442             : 
    3443           0 :         if (nd->last_type != LAST_NORM) {
    3444           0 :                 if (nd->depth)
    3445           0 :                         put_link(nd);
    3446           0 :                 return handle_dots(nd, nd->last_type);
    3447             :         }
    3448             : 
    3449           0 :         if (!(open_flag & O_CREAT)) {
    3450           0 :                 if (nd->last.name[nd->last.len])
    3451           0 :                         nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
    3452             :                 /* we _can_ be in RCU mode here */
    3453           0 :                 dentry = lookup_fast(nd);
    3454           0 :                 if (IS_ERR(dentry))
    3455             :                         return ERR_CAST(dentry);
    3456           0 :                 if (likely(dentry))
    3457             :                         goto finish_lookup;
    3458             : 
    3459           0 :                 BUG_ON(nd->flags & LOOKUP_RCU);
    3460             :         } else {
    3461             :                 /* create side of things */
    3462           0 :                 if (nd->flags & LOOKUP_RCU) {
    3463           0 :                         if (!try_to_unlazy(nd))
    3464             :                                 return ERR_PTR(-ECHILD);
    3465             :                 }
    3466           0 :                 audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
    3467             :                 /* trailing slashes? */
    3468           0 :                 if (unlikely(nd->last.name[nd->last.len]))
    3469             :                         return ERR_PTR(-EISDIR);
    3470             :         }
    3471             : 
    3472           0 :         if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
    3473           0 :                 got_write = !mnt_want_write(nd->path.mnt);
    3474             :                 /*
    3475             :                  * do _not_ fail yet - we might not need that or fail with
    3476             :                  * a different error; let lookup_open() decide; we'll be
    3477             :                  * dropping this one anyway.
    3478             :                  */
    3479             :         }
    3480           0 :         if (open_flag & O_CREAT)
    3481           0 :                 inode_lock(dir->d_inode);
    3482             :         else
    3483           0 :                 inode_lock_shared(dir->d_inode);
    3484           0 :         dentry = lookup_open(nd, file, op, got_write);
    3485           0 :         if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
    3486           0 :                 fsnotify_create(dir->d_inode, dentry);
    3487           0 :         if (open_flag & O_CREAT)
    3488           0 :                 inode_unlock(dir->d_inode);
    3489             :         else
    3490           0 :                 inode_unlock_shared(dir->d_inode);
    3491             : 
    3492           0 :         if (got_write)
    3493           0 :                 mnt_drop_write(nd->path.mnt);
    3494             : 
    3495           0 :         if (IS_ERR(dentry))
    3496             :                 return ERR_CAST(dentry);
    3497             : 
    3498           0 :         if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
    3499           0 :                 dput(nd->path.dentry);
    3500           0 :                 nd->path.dentry = dentry;
    3501           0 :                 return NULL;
    3502             :         }
    3503             : 
    3504             : finish_lookup:
    3505           0 :         if (nd->depth)
    3506           0 :                 put_link(nd);
    3507           0 :         res = step_into(nd, WALK_TRAILING, dentry);
    3508           0 :         if (unlikely(res))
    3509           0 :                 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
    3510             :         return res;
    3511             : }
    3512             : 
    3513             : /*
    3514             :  * Handle the last step of open()
    3515             :  */
    3516           0 : static int do_open(struct nameidata *nd,
    3517             :                    struct file *file, const struct open_flags *op)
    3518             : {
    3519             :         struct mnt_idmap *idmap;
    3520           0 :         int open_flag = op->open_flag;
    3521             :         bool do_truncate;
    3522             :         int acc_mode;
    3523             :         int error;
    3524             : 
    3525           0 :         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
    3526           0 :                 error = complete_walk(nd);
    3527           0 :                 if (error)
    3528             :                         return error;
    3529             :         }
    3530             :         if (!(file->f_mode & FMODE_CREATED))
    3531             :                 audit_inode(nd->name, nd->path.dentry, 0);
    3532           0 :         idmap = mnt_idmap(nd->path.mnt);
    3533           0 :         if (open_flag & O_CREAT) {
    3534           0 :                 if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
    3535             :                         return -EEXIST;
    3536           0 :                 if (d_is_dir(nd->path.dentry))
    3537             :                         return -EISDIR;
    3538           0 :                 error = may_create_in_sticky(idmap, nd,
    3539             :                                              d_backing_inode(nd->path.dentry));
    3540           0 :                 if (unlikely(error))
    3541             :                         return error;
    3542             :         }
    3543           0 :         if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
    3544             :                 return -ENOTDIR;
    3545             : 
    3546           0 :         do_truncate = false;
    3547           0 :         acc_mode = op->acc_mode;
    3548           0 :         if (file->f_mode & FMODE_CREATED) {
    3549             :                 /* Don't check for write permission, don't truncate */
    3550           0 :                 open_flag &= ~O_TRUNC;
    3551           0 :                 acc_mode = 0;
    3552           0 :         } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
    3553           0 :                 error = mnt_want_write(nd->path.mnt);
    3554           0 :                 if (error)
    3555             :                         return error;
    3556             :                 do_truncate = true;
    3557             :         }
    3558           0 :         error = may_open(idmap, &nd->path, acc_mode, open_flag);
    3559           0 :         if (!error && !(file->f_mode & FMODE_OPENED))
    3560           0 :                 error = vfs_open(&nd->path, file);
    3561           0 :         if (!error)
    3562           0 :                 error = ima_file_check(file, op->acc_mode);
    3563           0 :         if (!error && do_truncate)
    3564           0 :                 error = handle_truncate(idmap, file);
    3565           0 :         if (unlikely(error > 0)) {
    3566           0 :                 WARN_ON(1);
    3567           0 :                 error = -EINVAL;
    3568             :         }
    3569           0 :         if (do_truncate)
    3570           0 :                 mnt_drop_write(nd->path.mnt);
    3571             :         return error;
    3572             : }
    3573             : 
    3574             : /**
    3575             :  * vfs_tmpfile - create tmpfile
    3576             :  * @idmap:      idmap of the mount the inode was found from
    3577             :  * @dentry:     pointer to dentry of the base directory
    3578             :  * @mode:       mode of the new tmpfile
    3579             :  * @open_flag:  flags
    3580             :  *
    3581             :  * Create a temporary file.
    3582             :  *
    3583             :  * If the inode has been found through an idmapped mount the idmap of
    3584             :  * the vfsmount must be passed through @idmap. This function will then take
    3585             :  * care to map the inode according to @idmap before checking permissions.
    3586             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3587             :  * raw inode simply passs @nop_mnt_idmap.
    3588             :  */
    3589           0 : static int vfs_tmpfile(struct mnt_idmap *idmap,
    3590             :                        const struct path *parentpath,
    3591             :                        struct file *file, umode_t mode)
    3592             : {
    3593             :         struct dentry *child;
    3594           0 :         struct inode *dir = d_inode(parentpath->dentry);
    3595             :         struct inode *inode;
    3596             :         int error;
    3597           0 :         int open_flag = file->f_flags;
    3598             : 
    3599             :         /* we want directory to be writable */
    3600           0 :         error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
    3601           0 :         if (error)
    3602             :                 return error;
    3603           0 :         if (!dir->i_op->tmpfile)
    3604             :                 return -EOPNOTSUPP;
    3605           0 :         child = d_alloc(parentpath->dentry, &slash_name);
    3606           0 :         if (unlikely(!child))
    3607             :                 return -ENOMEM;
    3608           0 :         file->f_path.mnt = parentpath->mnt;
    3609           0 :         file->f_path.dentry = child;
    3610           0 :         mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
    3611           0 :         error = dir->i_op->tmpfile(idmap, dir, file, mode);
    3612           0 :         dput(child);
    3613           0 :         if (error)
    3614             :                 return error;
    3615             :         /* Don't check for other permissions, the inode was just created */
    3616           0 :         error = may_open(idmap, &file->f_path, 0, file->f_flags);
    3617           0 :         if (error)
    3618             :                 return error;
    3619           0 :         inode = file_inode(file);
    3620           0 :         if (!(open_flag & O_EXCL)) {
    3621           0 :                 spin_lock(&inode->i_lock);
    3622           0 :                 inode->i_state |= I_LINKABLE;
    3623           0 :                 spin_unlock(&inode->i_lock);
    3624             :         }
    3625             :         ima_post_create_tmpfile(idmap, inode);
    3626             :         return 0;
    3627             : }
    3628             : 
    3629             : /**
    3630             :  * vfs_tmpfile_open - open a tmpfile for kernel internal use
    3631             :  * @idmap:      idmap of the mount the inode was found from
    3632             :  * @parentpath: path of the base directory
    3633             :  * @mode:       mode of the new tmpfile
    3634             :  * @open_flag:  flags
    3635             :  * @cred:       credentials for open
    3636             :  *
    3637             :  * Create and open a temporary file.  The file is not accounted in nr_files,
    3638             :  * hence this is only for kernel internal use, and must not be installed into
    3639             :  * file tables or such.
    3640             :  */
    3641           0 : struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
    3642             :                           const struct path *parentpath,
    3643             :                           umode_t mode, int open_flag, const struct cred *cred)
    3644             : {
    3645             :         struct file *file;
    3646             :         int error;
    3647             : 
    3648           0 :         file = alloc_empty_file_noaccount(open_flag, cred);
    3649           0 :         if (!IS_ERR(file)) {
    3650           0 :                 error = vfs_tmpfile(idmap, parentpath, file, mode);
    3651           0 :                 if (error) {
    3652           0 :                         fput(file);
    3653           0 :                         file = ERR_PTR(error);
    3654             :                 }
    3655             :         }
    3656           0 :         return file;
    3657             : }
    3658             : EXPORT_SYMBOL(vfs_tmpfile_open);
    3659             : 
    3660           0 : static int do_tmpfile(struct nameidata *nd, unsigned flags,
    3661             :                 const struct open_flags *op,
    3662             :                 struct file *file)
    3663             : {
    3664             :         struct path path;
    3665           0 :         int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
    3666             : 
    3667           0 :         if (unlikely(error))
    3668             :                 return error;
    3669           0 :         error = mnt_want_write(path.mnt);
    3670           0 :         if (unlikely(error))
    3671             :                 goto out;
    3672           0 :         error = vfs_tmpfile(mnt_idmap(path.mnt), &path, file, op->mode);
    3673             :         if (error)
    3674             :                 goto out2;
    3675             :         audit_inode(nd->name, file->f_path.dentry, 0);
    3676             : out2:
    3677           0 :         mnt_drop_write(path.mnt);
    3678             : out:
    3679           0 :         path_put(&path);
    3680             :         return error;
    3681             : }
    3682             : 
    3683           0 : static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
    3684             : {
    3685             :         struct path path;
    3686           0 :         int error = path_lookupat(nd, flags, &path);
    3687           0 :         if (!error) {
    3688           0 :                 audit_inode(nd->name, path.dentry, 0);
    3689           0 :                 error = vfs_open(&path, file);
    3690             :                 path_put(&path);
    3691             :         }
    3692           0 :         return error;
    3693             : }
    3694             : 
    3695           0 : static struct file *path_openat(struct nameidata *nd,
    3696             :                         const struct open_flags *op, unsigned flags)
    3697             : {
    3698             :         struct file *file;
    3699             :         int error;
    3700             : 
    3701           0 :         file = alloc_empty_file(op->open_flag, current_cred());
    3702           0 :         if (IS_ERR(file))
    3703             :                 return file;
    3704             : 
    3705           0 :         if (unlikely(file->f_flags & __O_TMPFILE)) {
    3706           0 :                 error = do_tmpfile(nd, flags, op, file);
    3707           0 :         } else if (unlikely(file->f_flags & O_PATH)) {
    3708           0 :                 error = do_o_path(nd, flags, file);
    3709             :         } else {
    3710           0 :                 const char *s = path_init(nd, flags);
    3711           0 :                 while (!(error = link_path_walk(s, nd)) &&
    3712             :                        (s = open_last_lookups(nd, file, op)) != NULL)
    3713             :                         ;
    3714           0 :                 if (!error)
    3715           0 :                         error = do_open(nd, file, op);
    3716           0 :                 terminate_walk(nd);
    3717             :         }
    3718           0 :         if (likely(!error)) {
    3719           0 :                 if (likely(file->f_mode & FMODE_OPENED))
    3720             :                         return file;
    3721           0 :                 WARN_ON(1);
    3722           0 :                 error = -EINVAL;
    3723             :         }
    3724           0 :         fput(file);
    3725           0 :         if (error == -EOPENSTALE) {
    3726           0 :                 if (flags & LOOKUP_RCU)
    3727             :                         error = -ECHILD;
    3728             :                 else
    3729           0 :                         error = -ESTALE;
    3730             :         }
    3731           0 :         return ERR_PTR(error);
    3732             : }
    3733             : 
    3734           0 : struct file *do_filp_open(int dfd, struct filename *pathname,
    3735             :                 const struct open_flags *op)
    3736             : {
    3737             :         struct nameidata nd;
    3738           0 :         int flags = op->lookup_flags;
    3739             :         struct file *filp;
    3740             : 
    3741           0 :         set_nameidata(&nd, dfd, pathname, NULL);
    3742           0 :         filp = path_openat(&nd, op, flags | LOOKUP_RCU);
    3743           0 :         if (unlikely(filp == ERR_PTR(-ECHILD)))
    3744           0 :                 filp = path_openat(&nd, op, flags);
    3745           0 :         if (unlikely(filp == ERR_PTR(-ESTALE)))
    3746           0 :                 filp = path_openat(&nd, op, flags | LOOKUP_REVAL);
    3747           0 :         restore_nameidata();
    3748           0 :         return filp;
    3749             : }
    3750             : 
    3751           0 : struct file *do_file_open_root(const struct path *root,
    3752             :                 const char *name, const struct open_flags *op)
    3753             : {
    3754             :         struct nameidata nd;
    3755             :         struct file *file;
    3756             :         struct filename *filename;
    3757           0 :         int flags = op->lookup_flags;
    3758             : 
    3759           0 :         if (d_is_symlink(root->dentry) && op->intent & LOOKUP_OPEN)
    3760             :                 return ERR_PTR(-ELOOP);
    3761             : 
    3762           0 :         filename = getname_kernel(name);
    3763           0 :         if (IS_ERR(filename))
    3764             :                 return ERR_CAST(filename);
    3765             : 
    3766           0 :         set_nameidata(&nd, -1, filename, root);
    3767           0 :         file = path_openat(&nd, op, flags | LOOKUP_RCU);
    3768           0 :         if (unlikely(file == ERR_PTR(-ECHILD)))
    3769           0 :                 file = path_openat(&nd, op, flags);
    3770           0 :         if (unlikely(file == ERR_PTR(-ESTALE)))
    3771           0 :                 file = path_openat(&nd, op, flags | LOOKUP_REVAL);
    3772           0 :         restore_nameidata();
    3773           0 :         putname(filename);
    3774           0 :         return file;
    3775             : }
    3776             : 
    3777           3 : static struct dentry *filename_create(int dfd, struct filename *name,
    3778             :                                       struct path *path, unsigned int lookup_flags)
    3779             : {
    3780           3 :         struct dentry *dentry = ERR_PTR(-EEXIST);
    3781             :         struct qstr last;
    3782           3 :         bool want_dir = lookup_flags & LOOKUP_DIRECTORY;
    3783           3 :         unsigned int reval_flag = lookup_flags & LOOKUP_REVAL;
    3784           3 :         unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL;
    3785             :         int type;
    3786             :         int err2;
    3787             :         int error;
    3788             : 
    3789           3 :         error = filename_parentat(dfd, name, reval_flag, path, &last, &type);
    3790           3 :         if (error)
    3791           0 :                 return ERR_PTR(error);
    3792             : 
    3793             :         /*
    3794             :          * Yucky last component or no last component at all?
    3795             :          * (foo/., foo/.., /////)
    3796             :          */
    3797           3 :         if (unlikely(type != LAST_NORM))
    3798             :                 goto out;
    3799             : 
    3800             :         /* don't fail immediately if it's r/o, at least try to report other errors */
    3801           3 :         err2 = mnt_want_write(path->mnt);
    3802             :         /*
    3803             :          * Do the final lookup.  Suppress 'create' if there is a trailing
    3804             :          * '/', and a directory wasn't requested.
    3805             :          */
    3806           3 :         if (last.name[last.len] && !want_dir)
    3807           0 :                 create_flags = 0;
    3808           6 :         inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
    3809           3 :         dentry = __lookup_hash(&last, path->dentry, reval_flag | create_flags);
    3810           3 :         if (IS_ERR(dentry))
    3811             :                 goto unlock;
    3812             : 
    3813           3 :         error = -EEXIST;
    3814           3 :         if (d_is_positive(dentry))
    3815             :                 goto fail;
    3816             : 
    3817             :         /*
    3818             :          * Special case - lookup gave negative, but... we had foo/bar/
    3819             :          * From the vfs_mknod() POV we just have a negative dentry -
    3820             :          * all is fine. Let's be bastards - you had / on the end, you've
    3821             :          * been asking for (non-existent) directory. -ENOENT for you.
    3822             :          */
    3823           3 :         if (unlikely(!create_flags)) {
    3824             :                 error = -ENOENT;
    3825             :                 goto fail;
    3826             :         }
    3827           3 :         if (unlikely(err2)) {
    3828             :                 error = err2;
    3829             :                 goto fail;
    3830             :         }
    3831             :         return dentry;
    3832             : fail:
    3833           0 :         dput(dentry);
    3834           0 :         dentry = ERR_PTR(error);
    3835             : unlock:
    3836           0 :         inode_unlock(path->dentry->d_inode);
    3837           0 :         if (!err2)
    3838           0 :                 mnt_drop_write(path->mnt);
    3839             : out:
    3840           0 :         path_put(path);
    3841           0 :         return dentry;
    3842             : }
    3843             : 
    3844           3 : struct dentry *kern_path_create(int dfd, const char *pathname,
    3845             :                                 struct path *path, unsigned int lookup_flags)
    3846             : {
    3847           3 :         struct filename *filename = getname_kernel(pathname);
    3848           3 :         struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
    3849             : 
    3850           3 :         putname(filename);
    3851           3 :         return res;
    3852             : }
    3853             : EXPORT_SYMBOL(kern_path_create);
    3854             : 
    3855           3 : void done_path_create(struct path *path, struct dentry *dentry)
    3856             : {
    3857           3 :         dput(dentry);
    3858           6 :         inode_unlock(path->dentry->d_inode);
    3859           3 :         mnt_drop_write(path->mnt);
    3860           3 :         path_put(path);
    3861           3 : }
    3862             : EXPORT_SYMBOL(done_path_create);
    3863             : 
    3864           0 : inline struct dentry *user_path_create(int dfd, const char __user *pathname,
    3865             :                                 struct path *path, unsigned int lookup_flags)
    3866             : {
    3867           0 :         struct filename *filename = getname(pathname);
    3868           0 :         struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
    3869             : 
    3870           0 :         putname(filename);
    3871           0 :         return res;
    3872             : }
    3873             : EXPORT_SYMBOL(user_path_create);
    3874             : 
    3875             : /**
    3876             :  * vfs_mknod - create device node or file
    3877             :  * @idmap:      idmap of the mount the inode was found from
    3878             :  * @dir:        inode of @dentry
    3879             :  * @dentry:     pointer to dentry of the base directory
    3880             :  * @mode:       mode of the new device node or file
    3881             :  * @dev:        device number of device to create
    3882             :  *
    3883             :  * Create a device node or file.
    3884             :  *
    3885             :  * If the inode has been found through an idmapped mount the idmap of
    3886             :  * the vfsmount must be passed through @idmap. This function will then take
    3887             :  * care to map the inode according to @idmap before checking permissions.
    3888             :  * On non-idmapped mounts or if permission checking is to be performed on the
    3889             :  * raw inode simply passs @nop_mnt_idmap.
    3890             :  */
    3891           1 : int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
    3892             :               struct dentry *dentry, umode_t mode, dev_t dev)
    3893             : {
    3894           1 :         bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
    3895           1 :         int error = may_create(idmap, dir, dentry);
    3896             : 
    3897           1 :         if (error)
    3898             :                 return error;
    3899             : 
    3900           2 :         if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout &&
    3901           1 :             !capable(CAP_MKNOD))
    3902             :                 return -EPERM;
    3903             : 
    3904           1 :         if (!dir->i_op->mknod)
    3905             :                 return -EPERM;
    3906             : 
    3907           1 :         mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
    3908           1 :         error = devcgroup_inode_mknod(mode, dev);
    3909             :         if (error)
    3910             :                 return error;
    3911             : 
    3912           1 :         error = security_inode_mknod(dir, dentry, mode, dev);
    3913             :         if (error)
    3914             :                 return error;
    3915             : 
    3916           1 :         error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
    3917           1 :         if (!error)
    3918             :                 fsnotify_create(dir, dentry);
    3919             :         return error;
    3920             : }
    3921             : EXPORT_SYMBOL(vfs_mknod);
    3922             : 
    3923             : static int may_mknod(umode_t mode)
    3924             : {
    3925           0 :         switch (mode & S_IFMT) {
    3926             :         case S_IFREG:
    3927             :         case S_IFCHR:
    3928             :         case S_IFBLK:
    3929             :         case S_IFIFO:
    3930             :         case S_IFSOCK:
    3931             :         case 0: /* zero mode translates to S_IFREG */
    3932             :                 return 0;
    3933             :         case S_IFDIR:
    3934             :                 return -EPERM;
    3935             :         default:
    3936             :                 return -EINVAL;
    3937             :         }
    3938             : }
    3939             : 
    3940           0 : static int do_mknodat(int dfd, struct filename *name, umode_t mode,
    3941             :                 unsigned int dev)
    3942             : {
    3943             :         struct mnt_idmap *idmap;
    3944             :         struct dentry *dentry;
    3945             :         struct path path;
    3946             :         int error;
    3947           0 :         unsigned int lookup_flags = 0;
    3948             : 
    3949           0 :         error = may_mknod(mode);
    3950           0 :         if (error)
    3951             :                 goto out1;
    3952             : retry:
    3953           0 :         dentry = filename_create(dfd, name, &path, lookup_flags);
    3954           0 :         error = PTR_ERR(dentry);
    3955           0 :         if (IS_ERR(dentry))
    3956             :                 goto out1;
    3957             : 
    3958           0 :         error = security_path_mknod(&path, dentry,
    3959           0 :                         mode_strip_umask(path.dentry->d_inode, mode), dev);
    3960             :         if (error)
    3961             :                 goto out2;
    3962             : 
    3963           0 :         idmap = mnt_idmap(path.mnt);
    3964           0 :         switch (mode & S_IFMT) {
    3965             :                 case 0: case S_IFREG:
    3966           0 :                         error = vfs_create(idmap, path.dentry->d_inode,
    3967             :                                            dentry, mode, true);
    3968             :                         if (!error)
    3969             :                                 ima_post_path_mknod(idmap, dentry);
    3970             :                         break;
    3971             :                 case S_IFCHR: case S_IFBLK:
    3972           0 :                         error = vfs_mknod(idmap, path.dentry->d_inode,
    3973             :                                           dentry, mode, new_decode_dev(dev));
    3974           0 :                         break;
    3975             :                 case S_IFIFO: case S_IFSOCK:
    3976           0 :                         error = vfs_mknod(idmap, path.dentry->d_inode,
    3977             :                                           dentry, mode, 0);
    3978           0 :                         break;
    3979             :         }
    3980             : out2:
    3981           0 :         done_path_create(&path, dentry);
    3982           0 :         if (retry_estale(error, lookup_flags)) {
    3983             :                 lookup_flags |= LOOKUP_REVAL;
    3984             :                 goto retry;
    3985             :         }
    3986             : out1:
    3987           0 :         putname(name);
    3988           0 :         return error;
    3989             : }
    3990             : 
    3991           0 : SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
    3992             :                 unsigned int, dev)
    3993             : {
    3994           0 :         return do_mknodat(dfd, getname(filename), mode, dev);
    3995             : }
    3996             : 
    3997           0 : SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
    3998             : {
    3999           0 :         return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
    4000             : }
    4001             : 
    4002             : /**
    4003             :  * vfs_mkdir - create directory
    4004             :  * @idmap:      idmap of the mount the inode was found from
    4005             :  * @dir:        inode of @dentry
    4006             :  * @dentry:     pointer to dentry of the base directory
    4007             :  * @mode:       mode of the new directory
    4008             :  *
    4009             :  * Create a directory.
    4010             :  *
    4011             :  * If the inode has been found through an idmapped mount the idmap of
    4012             :  * the vfsmount must be passed through @idmap. This function will then take
    4013             :  * care to map the inode according to @idmap before checking permissions.
    4014             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4015             :  * raw inode simply passs @nop_mnt_idmap.
    4016             :  */
    4017           2 : int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
    4018             :               struct dentry *dentry, umode_t mode)
    4019             : {
    4020             :         int error;
    4021           2 :         unsigned max_links = dir->i_sb->s_max_links;
    4022             : 
    4023           2 :         error = may_create(idmap, dir, dentry);
    4024           2 :         if (error)
    4025             :                 return error;
    4026             : 
    4027           2 :         if (!dir->i_op->mkdir)
    4028             :                 return -EPERM;
    4029             : 
    4030           2 :         mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0);
    4031           2 :         error = security_inode_mkdir(dir, dentry, mode);
    4032             :         if (error)
    4033             :                 return error;
    4034             : 
    4035           2 :         if (max_links && dir->i_nlink >= max_links)
    4036             :                 return -EMLINK;
    4037             : 
    4038           2 :         error = dir->i_op->mkdir(idmap, dir, dentry, mode);
    4039           2 :         if (!error)
    4040             :                 fsnotify_mkdir(dir, dentry);
    4041             :         return error;
    4042             : }
    4043             : EXPORT_SYMBOL(vfs_mkdir);
    4044             : 
    4045           0 : int do_mkdirat(int dfd, struct filename *name, umode_t mode)
    4046             : {
    4047             :         struct dentry *dentry;
    4048             :         struct path path;
    4049             :         int error;
    4050           0 :         unsigned int lookup_flags = LOOKUP_DIRECTORY;
    4051             : 
    4052             : retry:
    4053           0 :         dentry = filename_create(dfd, name, &path, lookup_flags);
    4054           0 :         error = PTR_ERR(dentry);
    4055           0 :         if (IS_ERR(dentry))
    4056             :                 goto out_putname;
    4057             : 
    4058           0 :         error = security_path_mkdir(&path, dentry,
    4059           0 :                         mode_strip_umask(path.dentry->d_inode, mode));
    4060             :         if (!error) {
    4061           0 :                 error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
    4062             :                                   dentry, mode);
    4063             :         }
    4064           0 :         done_path_create(&path, dentry);
    4065           0 :         if (retry_estale(error, lookup_flags)) {
    4066             :                 lookup_flags |= LOOKUP_REVAL;
    4067             :                 goto retry;
    4068             :         }
    4069             : out_putname:
    4070           0 :         putname(name);
    4071           0 :         return error;
    4072             : }
    4073             : 
    4074           0 : SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
    4075             : {
    4076           0 :         return do_mkdirat(dfd, getname(pathname), mode);
    4077             : }
    4078             : 
    4079           0 : SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
    4080             : {
    4081           0 :         return do_mkdirat(AT_FDCWD, getname(pathname), mode);
    4082             : }
    4083             : 
    4084             : /**
    4085             :  * vfs_rmdir - remove directory
    4086             :  * @idmap:      idmap of the mount the inode was found from
    4087             :  * @dir:        inode of @dentry
    4088             :  * @dentry:     pointer to dentry of the base directory
    4089             :  *
    4090             :  * Remove a directory.
    4091             :  *
    4092             :  * If the inode has been found through an idmapped mount the idmap of
    4093             :  * the vfsmount must be passed through @idmap. This function will then take
    4094             :  * care to map the inode according to @idmap before checking permissions.
    4095             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4096             :  * raw inode simply passs @nop_mnt_idmap.
    4097             :  */
    4098           0 : int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
    4099             :                      struct dentry *dentry)
    4100             : {
    4101           0 :         int error = may_delete(idmap, dir, dentry, 1);
    4102             : 
    4103           0 :         if (error)
    4104             :                 return error;
    4105             : 
    4106           0 :         if (!dir->i_op->rmdir)
    4107             :                 return -EPERM;
    4108             : 
    4109           0 :         dget(dentry);
    4110           0 :         inode_lock(dentry->d_inode);
    4111             : 
    4112           0 :         error = -EBUSY;
    4113           0 :         if (is_local_mountpoint(dentry) ||
    4114           0 :             (dentry->d_inode->i_flags & S_KERNEL_FILE))
    4115             :                 goto out;
    4116             : 
    4117           0 :         error = security_inode_rmdir(dir, dentry);
    4118             :         if (error)
    4119             :                 goto out;
    4120             : 
    4121           0 :         error = dir->i_op->rmdir(dir, dentry);
    4122           0 :         if (error)
    4123             :                 goto out;
    4124             : 
    4125           0 :         shrink_dcache_parent(dentry);
    4126           0 :         dentry->d_inode->i_flags |= S_DEAD;
    4127           0 :         dont_mount(dentry);
    4128             :         detach_mounts(dentry);
    4129             : 
    4130             : out:
    4131           0 :         inode_unlock(dentry->d_inode);
    4132           0 :         dput(dentry);
    4133           0 :         if (!error)
    4134           0 :                 d_delete_notify(dir, dentry);
    4135             :         return error;
    4136             : }
    4137             : EXPORT_SYMBOL(vfs_rmdir);
    4138             : 
    4139           0 : int do_rmdir(int dfd, struct filename *name)
    4140             : {
    4141             :         int error;
    4142             :         struct dentry *dentry;
    4143             :         struct path path;
    4144             :         struct qstr last;
    4145             :         int type;
    4146           0 :         unsigned int lookup_flags = 0;
    4147             : retry:
    4148           0 :         error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
    4149           0 :         if (error)
    4150             :                 goto exit1;
    4151             : 
    4152           0 :         switch (type) {
    4153             :         case LAST_DOTDOT:
    4154             :                 error = -ENOTEMPTY;
    4155             :                 goto exit2;
    4156             :         case LAST_DOT:
    4157           0 :                 error = -EINVAL;
    4158           0 :                 goto exit2;
    4159             :         case LAST_ROOT:
    4160           0 :                 error = -EBUSY;
    4161           0 :                 goto exit2;
    4162             :         }
    4163             : 
    4164           0 :         error = mnt_want_write(path.mnt);
    4165           0 :         if (error)
    4166             :                 goto exit2;
    4167             : 
    4168           0 :         inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
    4169           0 :         dentry = __lookup_hash(&last, path.dentry, lookup_flags);
    4170           0 :         error = PTR_ERR(dentry);
    4171           0 :         if (IS_ERR(dentry))
    4172             :                 goto exit3;
    4173           0 :         if (!dentry->d_inode) {
    4174             :                 error = -ENOENT;
    4175             :                 goto exit4;
    4176             :         }
    4177           0 :         error = security_path_rmdir(&path, dentry);
    4178             :         if (error)
    4179             :                 goto exit4;
    4180           0 :         error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry);
    4181             : exit4:
    4182           0 :         dput(dentry);
    4183             : exit3:
    4184           0 :         inode_unlock(path.dentry->d_inode);
    4185           0 :         mnt_drop_write(path.mnt);
    4186             : exit2:
    4187           0 :         path_put(&path);
    4188           0 :         if (retry_estale(error, lookup_flags)) {
    4189             :                 lookup_flags |= LOOKUP_REVAL;
    4190             :                 goto retry;
    4191             :         }
    4192             : exit1:
    4193           0 :         putname(name);
    4194           0 :         return error;
    4195             : }
    4196             : 
    4197           0 : SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
    4198             : {
    4199           0 :         return do_rmdir(AT_FDCWD, getname(pathname));
    4200             : }
    4201             : 
    4202             : /**
    4203             :  * vfs_unlink - unlink a filesystem object
    4204             :  * @idmap:      idmap of the mount the inode was found from
    4205             :  * @dir:        parent directory
    4206             :  * @dentry:     victim
    4207             :  * @delegated_inode: returns victim inode, if the inode is delegated.
    4208             :  *
    4209             :  * The caller must hold dir->i_mutex.
    4210             :  *
    4211             :  * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
    4212             :  * return a reference to the inode in delegated_inode.  The caller
    4213             :  * should then break the delegation on that inode and retry.  Because
    4214             :  * breaking a delegation may take a long time, the caller should drop
    4215             :  * dir->i_mutex before doing so.
    4216             :  *
    4217             :  * Alternatively, a caller may pass NULL for delegated_inode.  This may
    4218             :  * be appropriate for callers that expect the underlying filesystem not
    4219             :  * to be NFS exported.
    4220             :  *
    4221             :  * If the inode has been found through an idmapped mount the idmap of
    4222             :  * the vfsmount must be passed through @idmap. This function will then take
    4223             :  * care to map the inode according to @idmap before checking permissions.
    4224             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4225             :  * raw inode simply passs @nop_mnt_idmap.
    4226             :  */
    4227           0 : int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
    4228             :                struct dentry *dentry, struct inode **delegated_inode)
    4229             : {
    4230           0 :         struct inode *target = dentry->d_inode;
    4231           0 :         int error = may_delete(idmap, dir, dentry, 0);
    4232             : 
    4233           0 :         if (error)
    4234             :                 return error;
    4235             : 
    4236           0 :         if (!dir->i_op->unlink)
    4237             :                 return -EPERM;
    4238             : 
    4239           0 :         inode_lock(target);
    4240           0 :         if (IS_SWAPFILE(target))
    4241             :                 error = -EPERM;
    4242           0 :         else if (is_local_mountpoint(dentry))
    4243             :                 error = -EBUSY;
    4244             :         else {
    4245           0 :                 error = security_inode_unlink(dir, dentry);
    4246             :                 if (!error) {
    4247           0 :                         error = try_break_deleg(target, delegated_inode);
    4248           0 :                         if (error)
    4249             :                                 goto out;
    4250           0 :                         error = dir->i_op->unlink(dir, dentry);
    4251           0 :                         if (!error) {
    4252           0 :                                 dont_mount(dentry);
    4253             :                                 detach_mounts(dentry);
    4254             :                         }
    4255             :                 }
    4256             :         }
    4257             : out:
    4258           0 :         inode_unlock(target);
    4259             : 
    4260             :         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
    4261           0 :         if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) {
    4262           0 :                 fsnotify_unlink(dir, dentry);
    4263           0 :         } else if (!error) {
    4264           0 :                 fsnotify_link_count(target);
    4265           0 :                 d_delete_notify(dir, dentry);
    4266             :         }
    4267             : 
    4268             :         return error;
    4269             : }
    4270             : EXPORT_SYMBOL(vfs_unlink);
    4271             : 
    4272             : /*
    4273             :  * Make sure that the actual truncation of the file will occur outside its
    4274             :  * directory's i_mutex.  Truncate can take a long time if there is a lot of
    4275             :  * writeout happening, and we don't want to prevent access to the directory
    4276             :  * while waiting on the I/O.
    4277             :  */
    4278           0 : int do_unlinkat(int dfd, struct filename *name)
    4279             : {
    4280             :         int error;
    4281             :         struct dentry *dentry;
    4282             :         struct path path;
    4283             :         struct qstr last;
    4284             :         int type;
    4285           0 :         struct inode *inode = NULL;
    4286           0 :         struct inode *delegated_inode = NULL;
    4287           0 :         unsigned int lookup_flags = 0;
    4288             : retry:
    4289           0 :         error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
    4290           0 :         if (error)
    4291             :                 goto exit1;
    4292             : 
    4293           0 :         error = -EISDIR;
    4294           0 :         if (type != LAST_NORM)
    4295             :                 goto exit2;
    4296             : 
    4297           0 :         error = mnt_want_write(path.mnt);
    4298           0 :         if (error)
    4299             :                 goto exit2;
    4300             : retry_deleg:
    4301           0 :         inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
    4302           0 :         dentry = __lookup_hash(&last, path.dentry, lookup_flags);
    4303           0 :         error = PTR_ERR(dentry);
    4304           0 :         if (!IS_ERR(dentry)) {
    4305             : 
    4306             :                 /* Why not before? Because we want correct error value */
    4307           0 :                 if (last.name[last.len])
    4308             :                         goto slashes;
    4309           0 :                 inode = dentry->d_inode;
    4310           0 :                 if (d_is_negative(dentry))
    4311             :                         goto slashes;
    4312           0 :                 ihold(inode);
    4313           0 :                 error = security_path_unlink(&path, dentry);
    4314             :                 if (error)
    4315             :                         goto exit3;
    4316           0 :                 error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
    4317             :                                    dentry, &delegated_inode);
    4318             : exit3:
    4319           0 :                 dput(dentry);
    4320             :         }
    4321           0 :         inode_unlock(path.dentry->d_inode);
    4322           0 :         if (inode)
    4323           0 :                 iput(inode);    /* truncate the inode here */
    4324           0 :         inode = NULL;
    4325           0 :         if (delegated_inode) {
    4326           0 :                 error = break_deleg_wait(&delegated_inode);
    4327           0 :                 if (!error)
    4328             :                         goto retry_deleg;
    4329             :         }
    4330           0 :         mnt_drop_write(path.mnt);
    4331             : exit2:
    4332           0 :         path_put(&path);
    4333           0 :         if (retry_estale(error, lookup_flags)) {
    4334             :                 lookup_flags |= LOOKUP_REVAL;
    4335             :                 inode = NULL;
    4336             :                 goto retry;
    4337             :         }
    4338             : exit1:
    4339           0 :         putname(name);
    4340           0 :         return error;
    4341             : 
    4342             : slashes:
    4343           0 :         if (d_is_negative(dentry))
    4344             :                 error = -ENOENT;
    4345           0 :         else if (d_is_dir(dentry))
    4346             :                 error = -EISDIR;
    4347             :         else
    4348           0 :                 error = -ENOTDIR;
    4349             :         goto exit3;
    4350             : }
    4351             : 
    4352           0 : SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
    4353             : {
    4354           0 :         if ((flag & ~AT_REMOVEDIR) != 0)
    4355             :                 return -EINVAL;
    4356             : 
    4357           0 :         if (flag & AT_REMOVEDIR)
    4358           0 :                 return do_rmdir(dfd, getname(pathname));
    4359           0 :         return do_unlinkat(dfd, getname(pathname));
    4360             : }
    4361             : 
    4362           0 : SYSCALL_DEFINE1(unlink, const char __user *, pathname)
    4363             : {
    4364           0 :         return do_unlinkat(AT_FDCWD, getname(pathname));
    4365             : }
    4366             : 
    4367             : /**
    4368             :  * vfs_symlink - create symlink
    4369             :  * @idmap:      idmap of the mount the inode was found from
    4370             :  * @dir:        inode of @dentry
    4371             :  * @dentry:     pointer to dentry of the base directory
    4372             :  * @oldname:    name of the file to link to
    4373             :  *
    4374             :  * Create a symlink.
    4375             :  *
    4376             :  * If the inode has been found through an idmapped mount the idmap of
    4377             :  * the vfsmount must be passed through @idmap. This function will then take
    4378             :  * care to map the inode according to @idmap before checking permissions.
    4379             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4380             :  * raw inode simply passs @nop_mnt_idmap.
    4381             :  */
    4382           0 : int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
    4383             :                 struct dentry *dentry, const char *oldname)
    4384             : {
    4385             :         int error;
    4386             : 
    4387           0 :         error = may_create(idmap, dir, dentry);
    4388           0 :         if (error)
    4389             :                 return error;
    4390             : 
    4391           0 :         if (!dir->i_op->symlink)
    4392             :                 return -EPERM;
    4393             : 
    4394           0 :         error = security_inode_symlink(dir, dentry, oldname);
    4395             :         if (error)
    4396             :                 return error;
    4397             : 
    4398           0 :         error = dir->i_op->symlink(idmap, dir, dentry, oldname);
    4399           0 :         if (!error)
    4400             :                 fsnotify_create(dir, dentry);
    4401             :         return error;
    4402             : }
    4403             : EXPORT_SYMBOL(vfs_symlink);
    4404             : 
    4405           0 : int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
    4406             : {
    4407             :         int error;
    4408             :         struct dentry *dentry;
    4409             :         struct path path;
    4410           0 :         unsigned int lookup_flags = 0;
    4411             : 
    4412           0 :         if (IS_ERR(from)) {
    4413           0 :                 error = PTR_ERR(from);
    4414           0 :                 goto out_putnames;
    4415             :         }
    4416             : retry:
    4417           0 :         dentry = filename_create(newdfd, to, &path, lookup_flags);
    4418           0 :         error = PTR_ERR(dentry);
    4419           0 :         if (IS_ERR(dentry))
    4420             :                 goto out_putnames;
    4421             : 
    4422           0 :         error = security_path_symlink(&path, dentry, from->name);
    4423             :         if (!error)
    4424           0 :                 error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
    4425             :                                     dentry, from->name);
    4426           0 :         done_path_create(&path, dentry);
    4427           0 :         if (retry_estale(error, lookup_flags)) {
    4428             :                 lookup_flags |= LOOKUP_REVAL;
    4429             :                 goto retry;
    4430             :         }
    4431             : out_putnames:
    4432           0 :         putname(to);
    4433           0 :         putname(from);
    4434           0 :         return error;
    4435             : }
    4436             : 
    4437           0 : SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
    4438             :                 int, newdfd, const char __user *, newname)
    4439             : {
    4440           0 :         return do_symlinkat(getname(oldname), newdfd, getname(newname));
    4441             : }
    4442             : 
    4443           0 : SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
    4444             : {
    4445           0 :         return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
    4446             : }
    4447             : 
    4448             : /**
    4449             :  * vfs_link - create a new link
    4450             :  * @old_dentry: object to be linked
    4451             :  * @idmap:      idmap of the mount
    4452             :  * @dir:        new parent
    4453             :  * @new_dentry: where to create the new link
    4454             :  * @delegated_inode: returns inode needing a delegation break
    4455             :  *
    4456             :  * The caller must hold dir->i_mutex
    4457             :  *
    4458             :  * If vfs_link discovers a delegation on the to-be-linked file in need
    4459             :  * of breaking, it will return -EWOULDBLOCK and return a reference to the
    4460             :  * inode in delegated_inode.  The caller should then break the delegation
    4461             :  * and retry.  Because breaking a delegation may take a long time, the
    4462             :  * caller should drop the i_mutex before doing so.
    4463             :  *
    4464             :  * Alternatively, a caller may pass NULL for delegated_inode.  This may
    4465             :  * be appropriate for callers that expect the underlying filesystem not
    4466             :  * to be NFS exported.
    4467             :  *
    4468             :  * If the inode has been found through an idmapped mount the idmap of
    4469             :  * the vfsmount must be passed through @idmap. This function will then take
    4470             :  * care to map the inode according to @idmap before checking permissions.
    4471             :  * On non-idmapped mounts or if permission checking is to be performed on the
    4472             :  * raw inode simply passs @nop_mnt_idmap.
    4473             :  */
    4474           0 : int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
    4475             :              struct inode *dir, struct dentry *new_dentry,
    4476             :              struct inode **delegated_inode)
    4477             : {
    4478           0 :         struct inode *inode = old_dentry->d_inode;
    4479           0 :         unsigned max_links = dir->i_sb->s_max_links;
    4480             :         int error;
    4481             : 
    4482           0 :         if (!inode)
    4483             :                 return -ENOENT;
    4484             : 
    4485           0 :         error = may_create(idmap, dir, new_dentry);
    4486           0 :         if (error)
    4487             :                 return error;
    4488             : 
    4489           0 :         if (dir->i_sb != inode->i_sb)
    4490             :                 return -EXDEV;
    4491             : 
    4492             :         /*
    4493             :          * A link to an append-only or immutable file cannot be created.
    4494             :          */
    4495           0 :         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
    4496             :                 return -EPERM;
    4497             :         /*
    4498             :          * Updating the link count will likely cause i_uid and i_gid to
    4499             :          * be writen back improperly if their true value is unknown to
    4500             :          * the vfs.
    4501             :          */
    4502           0 :         if (HAS_UNMAPPED_ID(idmap, inode))
    4503             :                 return -EPERM;
    4504           0 :         if (!dir->i_op->link)
    4505             :                 return -EPERM;
    4506           0 :         if (S_ISDIR(inode->i_mode))
    4507             :                 return -EPERM;
    4508             : 
    4509           0 :         error = security_inode_link(old_dentry, dir, new_dentry);
    4510             :         if (error)
    4511             :                 return error;
    4512             : 
    4513           0 :         inode_lock(inode);
    4514             :         /* Make sure we don't allow creating hardlink to an unlinked file */
    4515           0 :         if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
    4516             :                 error =  -ENOENT;
    4517           0 :         else if (max_links && inode->i_nlink >= max_links)
    4518             :                 error = -EMLINK;
    4519             :         else {
    4520           0 :                 error = try_break_deleg(inode, delegated_inode);
    4521           0 :                 if (!error)
    4522           0 :                         error = dir->i_op->link(old_dentry, dir, new_dentry);
    4523             :         }
    4524             : 
    4525           0 :         if (!error && (inode->i_state & I_LINKABLE)) {
    4526           0 :                 spin_lock(&inode->i_lock);
    4527           0 :                 inode->i_state &= ~I_LINKABLE;
    4528           0 :                 spin_unlock(&inode->i_lock);
    4529             :         }
    4530           0 :         inode_unlock(inode);
    4531           0 :         if (!error)
    4532           0 :                 fsnotify_link(dir, inode, new_dentry);
    4533             :         return error;
    4534             : }
    4535             : EXPORT_SYMBOL(vfs_link);
    4536             : 
    4537             : /*
    4538             :  * Hardlinks are often used in delicate situations.  We avoid
    4539             :  * security-related surprises by not following symlinks on the
    4540             :  * newname.  --KAB
    4541             :  *
    4542             :  * We don't follow them on the oldname either to be compatible
    4543             :  * with linux 2.0, and to avoid hard-linking to directories
    4544             :  * and other special files.  --ADM
    4545             :  */
    4546           0 : int do_linkat(int olddfd, struct filename *old, int newdfd,
    4547             :               struct filename *new, int flags)
    4548             : {
    4549             :         struct mnt_idmap *idmap;
    4550             :         struct dentry *new_dentry;
    4551             :         struct path old_path, new_path;
    4552           0 :         struct inode *delegated_inode = NULL;
    4553           0 :         int how = 0;
    4554             :         int error;
    4555             : 
    4556           0 :         if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
    4557             :                 error = -EINVAL;
    4558             :                 goto out_putnames;
    4559             :         }
    4560             :         /*
    4561             :          * To use null names we require CAP_DAC_READ_SEARCH
    4562             :          * This ensures that not everyone will be able to create
    4563             :          * handlink using the passed filedescriptor.
    4564             :          */
    4565           0 :         if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
    4566             :                 error = -ENOENT;
    4567             :                 goto out_putnames;
    4568             :         }
    4569             : 
    4570           0 :         if (flags & AT_SYMLINK_FOLLOW)
    4571           0 :                 how |= LOOKUP_FOLLOW;
    4572             : retry:
    4573           0 :         error = filename_lookup(olddfd, old, how, &old_path, NULL);
    4574           0 :         if (error)
    4575             :                 goto out_putnames;
    4576             : 
    4577           0 :         new_dentry = filename_create(newdfd, new, &new_path,
    4578             :                                         (how & LOOKUP_REVAL));
    4579           0 :         error = PTR_ERR(new_dentry);
    4580           0 :         if (IS_ERR(new_dentry))
    4581             :                 goto out_putpath;
    4582             : 
    4583           0 :         error = -EXDEV;
    4584           0 :         if (old_path.mnt != new_path.mnt)
    4585             :                 goto out_dput;
    4586           0 :         idmap = mnt_idmap(new_path.mnt);
    4587           0 :         error = may_linkat(idmap, &old_path);
    4588           0 :         if (unlikely(error))
    4589             :                 goto out_dput;
    4590           0 :         error = security_path_link(old_path.dentry, &new_path, new_dentry);
    4591             :         if (error)
    4592             :                 goto out_dput;
    4593           0 :         error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode,
    4594             :                          new_dentry, &delegated_inode);
    4595             : out_dput:
    4596           0 :         done_path_create(&new_path, new_dentry);
    4597           0 :         if (delegated_inode) {
    4598           0 :                 error = break_deleg_wait(&delegated_inode);
    4599           0 :                 if (!error) {
    4600             :                         path_put(&old_path);
    4601             :                         goto retry;
    4602             :                 }
    4603             :         }
    4604           0 :         if (retry_estale(error, how)) {
    4605           0 :                 path_put(&old_path);
    4606           0 :                 how |= LOOKUP_REVAL;
    4607           0 :                 goto retry;
    4608             :         }
    4609             : out_putpath:
    4610             :         path_put(&old_path);
    4611             : out_putnames:
    4612           0 :         putname(old);
    4613           0 :         putname(new);
    4614             : 
    4615           0 :         return error;
    4616             : }
    4617             : 
    4618           0 : SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
    4619             :                 int, newdfd, const char __user *, newname, int, flags)
    4620             : {
    4621           0 :         return do_linkat(olddfd, getname_uflags(oldname, flags),
    4622             :                 newdfd, getname(newname), flags);
    4623             : }
    4624             : 
    4625           0 : SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
    4626             : {
    4627           0 :         return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
    4628             : }
    4629             : 
    4630             : /**
    4631             :  * vfs_rename - rename a filesystem object
    4632             :  * @rd:         pointer to &struct renamedata info
    4633             :  *
    4634             :  * The caller must hold multiple mutexes--see lock_rename()).
    4635             :  *
    4636             :  * If vfs_rename discovers a delegation in need of breaking at either
    4637             :  * the source or destination, it will return -EWOULDBLOCK and return a
    4638             :  * reference to the inode in delegated_inode.  The caller should then
    4639             :  * break the delegation and retry.  Because breaking a delegation may
    4640             :  * take a long time, the caller should drop all locks before doing
    4641             :  * so.
    4642             :  *
    4643             :  * Alternatively, a caller may pass NULL for delegated_inode.  This may
    4644             :  * be appropriate for callers that expect the underlying filesystem not
    4645             :  * to be NFS exported.
    4646             :  *
    4647             :  * The worst of all namespace operations - renaming directory. "Perverted"
    4648             :  * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
    4649             :  * Problems:
    4650             :  *
    4651             :  *      a) we can get into loop creation.
    4652             :  *      b) race potential - two innocent renames can create a loop together.
    4653             :  *         That's where 4.4 screws up. Current fix: serialization on
    4654             :  *         sb->s_vfs_rename_mutex. We might be more accurate, but that's another
    4655             :  *         story.
    4656             :  *      c) we have to lock _four_ objects - parents and victim (if it exists),
    4657             :  *         and source (if it is not a directory).
    4658             :  *         And that - after we got ->i_mutex on parents (until then we don't know
    4659             :  *         whether the target exists).  Solution: try to be smart with locking
    4660             :  *         order for inodes.  We rely on the fact that tree topology may change
    4661             :  *         only under ->s_vfs_rename_mutex _and_ that parent of the object we
    4662             :  *         move will be locked.  Thus we can rank directories by the tree
    4663             :  *         (ancestors first) and rank all non-directories after them.
    4664             :  *         That works since everybody except rename does "lock parent, lookup,
    4665             :  *         lock child" and rename is under ->s_vfs_rename_mutex.
    4666             :  *         HOWEVER, it relies on the assumption that any object with ->lookup()
    4667             :  *         has no more than 1 dentry.  If "hybrid" objects will ever appear,
    4668             :  *         we'd better make sure that there's no link(2) for them.
    4669             :  *      d) conversion from fhandle to dentry may come in the wrong moment - when
    4670             :  *         we are removing the target. Solution: we will have to grab ->i_mutex
    4671             :  *         in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
    4672             :  *         ->i_mutex on parents, which works but leads to some truly excessive
    4673             :  *         locking].
    4674             :  */
    4675           0 : int vfs_rename(struct renamedata *rd)
    4676             : {
    4677             :         int error;
    4678           0 :         struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
    4679           0 :         struct dentry *old_dentry = rd->old_dentry;
    4680           0 :         struct dentry *new_dentry = rd->new_dentry;
    4681           0 :         struct inode **delegated_inode = rd->delegated_inode;
    4682           0 :         unsigned int flags = rd->flags;
    4683           0 :         bool is_dir = d_is_dir(old_dentry);
    4684           0 :         struct inode *source = old_dentry->d_inode;
    4685           0 :         struct inode *target = new_dentry->d_inode;
    4686           0 :         bool new_is_dir = false;
    4687           0 :         unsigned max_links = new_dir->i_sb->s_max_links;
    4688             :         struct name_snapshot old_name;
    4689             : 
    4690           0 :         if (source == target)
    4691             :                 return 0;
    4692             : 
    4693           0 :         error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir);
    4694           0 :         if (error)
    4695             :                 return error;
    4696             : 
    4697           0 :         if (!target) {
    4698           0 :                 error = may_create(rd->new_mnt_idmap, new_dir, new_dentry);
    4699             :         } else {
    4700           0 :                 new_is_dir = d_is_dir(new_dentry);
    4701             : 
    4702           0 :                 if (!(flags & RENAME_EXCHANGE))
    4703           0 :                         error = may_delete(rd->new_mnt_idmap, new_dir,
    4704             :                                            new_dentry, is_dir);
    4705             :                 else
    4706           0 :                         error = may_delete(rd->new_mnt_idmap, new_dir,
    4707             :                                            new_dentry, new_is_dir);
    4708             :         }
    4709           0 :         if (error)
    4710             :                 return error;
    4711             : 
    4712           0 :         if (!old_dir->i_op->rename)
    4713             :                 return -EPERM;
    4714             : 
    4715             :         /*
    4716             :          * If we are going to change the parent - check write permissions,
    4717             :          * we'll need to flip '..'.
    4718             :          */
    4719           0 :         if (new_dir != old_dir) {
    4720           0 :                 if (is_dir) {
    4721           0 :                         error = inode_permission(rd->old_mnt_idmap, source,
    4722             :                                                  MAY_WRITE);
    4723           0 :                         if (error)
    4724             :                                 return error;
    4725             :                 }
    4726           0 :                 if ((flags & RENAME_EXCHANGE) && new_is_dir) {
    4727           0 :                         error = inode_permission(rd->new_mnt_idmap, target,
    4728             :                                                  MAY_WRITE);
    4729           0 :                         if (error)
    4730             :                                 return error;
    4731             :                 }
    4732             :         }
    4733             : 
    4734           0 :         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry,
    4735             :                                       flags);
    4736             :         if (error)
    4737             :                 return error;
    4738             : 
    4739           0 :         take_dentry_name_snapshot(&old_name, old_dentry);
    4740           0 :         dget(new_dentry);
    4741           0 :         if (!is_dir || (flags & RENAME_EXCHANGE))
    4742           0 :                 lock_two_nondirectories(source, target);
    4743           0 :         else if (target)
    4744             :                 inode_lock(target);
    4745             : 
    4746           0 :         error = -EPERM;
    4747           0 :         if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
    4748             :                 goto out;
    4749             : 
    4750           0 :         error = -EBUSY;
    4751           0 :         if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
    4752             :                 goto out;
    4753             : 
    4754           0 :         if (max_links && new_dir != old_dir) {
    4755           0 :                 error = -EMLINK;
    4756           0 :                 if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links)
    4757             :                         goto out;
    4758           0 :                 if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir &&
    4759           0 :                     old_dir->i_nlink >= max_links)
    4760             :                         goto out;
    4761             :         }
    4762           0 :         if (!is_dir) {
    4763           0 :                 error = try_break_deleg(source, delegated_inode);
    4764           0 :                 if (error)
    4765             :                         goto out;
    4766             :         }
    4767           0 :         if (target && !new_is_dir) {
    4768           0 :                 error = try_break_deleg(target, delegated_inode);
    4769           0 :                 if (error)
    4770             :                         goto out;
    4771             :         }
    4772           0 :         error = old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry,
    4773             :                                       new_dir, new_dentry, flags);
    4774           0 :         if (error)
    4775             :                 goto out;
    4776             : 
    4777           0 :         if (!(flags & RENAME_EXCHANGE) && target) {
    4778           0 :                 if (is_dir) {
    4779           0 :                         shrink_dcache_parent(new_dentry);
    4780           0 :                         target->i_flags |= S_DEAD;
    4781             :                 }
    4782           0 :                 dont_mount(new_dentry);
    4783             :                 detach_mounts(new_dentry);
    4784             :         }
    4785           0 :         if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) {
    4786           0 :                 if (!(flags & RENAME_EXCHANGE))
    4787           0 :                         d_move(old_dentry, new_dentry);
    4788             :                 else
    4789           0 :                         d_exchange(old_dentry, new_dentry);
    4790             :         }
    4791             : out:
    4792           0 :         if (!is_dir || (flags & RENAME_EXCHANGE))
    4793           0 :                 unlock_two_nondirectories(source, target);
    4794           0 :         else if (target)
    4795             :                 inode_unlock(target);
    4796           0 :         dput(new_dentry);
    4797           0 :         if (!error) {
    4798           0 :                 fsnotify_move(old_dir, new_dir, &old_name.name, is_dir,
    4799           0 :                               !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
    4800           0 :                 if (flags & RENAME_EXCHANGE) {
    4801           0 :                         fsnotify_move(new_dir, old_dir, &old_dentry->d_name,
    4802             :                                       new_is_dir, NULL, new_dentry);
    4803             :                 }
    4804             :         }
    4805           0 :         release_dentry_name_snapshot(&old_name);
    4806             : 
    4807           0 :         return error;
    4808             : }
    4809             : EXPORT_SYMBOL(vfs_rename);
    4810             : 
    4811           0 : int do_renameat2(int olddfd, struct filename *from, int newdfd,
    4812             :                  struct filename *to, unsigned int flags)
    4813             : {
    4814             :         struct renamedata rd;
    4815             :         struct dentry *old_dentry, *new_dentry;
    4816             :         struct dentry *trap;
    4817             :         struct path old_path, new_path;
    4818             :         struct qstr old_last, new_last;
    4819             :         int old_type, new_type;
    4820           0 :         struct inode *delegated_inode = NULL;
    4821           0 :         unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
    4822           0 :         bool should_retry = false;
    4823           0 :         int error = -EINVAL;
    4824             : 
    4825           0 :         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
    4826             :                 goto put_names;
    4827             : 
    4828           0 :         if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
    4829           0 :             (flags & RENAME_EXCHANGE))
    4830             :                 goto put_names;
    4831             : 
    4832           0 :         if (flags & RENAME_EXCHANGE)
    4833           0 :                 target_flags = 0;
    4834             : 
    4835             : retry:
    4836           0 :         error = filename_parentat(olddfd, from, lookup_flags, &old_path,
    4837             :                                   &old_last, &old_type);
    4838           0 :         if (error)
    4839             :                 goto put_names;
    4840             : 
    4841           0 :         error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
    4842             :                                   &new_type);
    4843           0 :         if (error)
    4844             :                 goto exit1;
    4845             : 
    4846           0 :         error = -EXDEV;
    4847           0 :         if (old_path.mnt != new_path.mnt)
    4848             :                 goto exit2;
    4849             : 
    4850           0 :         error = -EBUSY;
    4851           0 :         if (old_type != LAST_NORM)
    4852             :                 goto exit2;
    4853             : 
    4854           0 :         if (flags & RENAME_NOREPLACE)
    4855           0 :                 error = -EEXIST;
    4856           0 :         if (new_type != LAST_NORM)
    4857             :                 goto exit2;
    4858             : 
    4859           0 :         error = mnt_want_write(old_path.mnt);
    4860           0 :         if (error)
    4861             :                 goto exit2;
    4862             : 
    4863             : retry_deleg:
    4864           0 :         trap = lock_rename(new_path.dentry, old_path.dentry);
    4865             : 
    4866           0 :         old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags);
    4867           0 :         error = PTR_ERR(old_dentry);
    4868           0 :         if (IS_ERR(old_dentry))
    4869             :                 goto exit3;
    4870             :         /* source must exist */
    4871           0 :         error = -ENOENT;
    4872           0 :         if (d_is_negative(old_dentry))
    4873             :                 goto exit4;
    4874           0 :         new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags);
    4875           0 :         error = PTR_ERR(new_dentry);
    4876           0 :         if (IS_ERR(new_dentry))
    4877             :                 goto exit4;
    4878           0 :         error = -EEXIST;
    4879           0 :         if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
    4880             :                 goto exit5;
    4881           0 :         if (flags & RENAME_EXCHANGE) {
    4882           0 :                 error = -ENOENT;
    4883           0 :                 if (d_is_negative(new_dentry))
    4884             :                         goto exit5;
    4885             : 
    4886           0 :                 if (!d_is_dir(new_dentry)) {
    4887           0 :                         error = -ENOTDIR;
    4888           0 :                         if (new_last.name[new_last.len])
    4889             :                                 goto exit5;
    4890             :                 }
    4891             :         }
    4892             :         /* unless the source is a directory trailing slashes give -ENOTDIR */
    4893           0 :         if (!d_is_dir(old_dentry)) {
    4894           0 :                 error = -ENOTDIR;
    4895           0 :                 if (old_last.name[old_last.len])
    4896             :                         goto exit5;
    4897           0 :                 if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
    4898             :                         goto exit5;
    4899             :         }
    4900             :         /* source should not be ancestor of target */
    4901           0 :         error = -EINVAL;
    4902           0 :         if (old_dentry == trap)
    4903             :                 goto exit5;
    4904             :         /* target should not be an ancestor of source */
    4905           0 :         if (!(flags & RENAME_EXCHANGE))
    4906           0 :                 error = -ENOTEMPTY;
    4907           0 :         if (new_dentry == trap)
    4908             :                 goto exit5;
    4909             : 
    4910           0 :         error = security_path_rename(&old_path, old_dentry,
    4911             :                                      &new_path, new_dentry, flags);
    4912             :         if (error)
    4913             :                 goto exit5;
    4914             : 
    4915           0 :         rd.old_dir         = old_path.dentry->d_inode;
    4916           0 :         rd.old_dentry      = old_dentry;
    4917           0 :         rd.old_mnt_idmap   = mnt_idmap(old_path.mnt);
    4918           0 :         rd.new_dir         = new_path.dentry->d_inode;
    4919           0 :         rd.new_dentry      = new_dentry;
    4920           0 :         rd.new_mnt_idmap   = mnt_idmap(new_path.mnt);
    4921           0 :         rd.delegated_inode = &delegated_inode;
    4922           0 :         rd.flags           = flags;
    4923           0 :         error = vfs_rename(&rd);
    4924             : exit5:
    4925           0 :         dput(new_dentry);
    4926             : exit4:
    4927           0 :         dput(old_dentry);
    4928             : exit3:
    4929           0 :         unlock_rename(new_path.dentry, old_path.dentry);
    4930           0 :         if (delegated_inode) {
    4931           0 :                 error = break_deleg_wait(&delegated_inode);
    4932           0 :                 if (!error)
    4933             :                         goto retry_deleg;
    4934             :         }
    4935           0 :         mnt_drop_write(old_path.mnt);
    4936             : exit2:
    4937           0 :         if (retry_estale(error, lookup_flags))
    4938           0 :                 should_retry = true;
    4939             :         path_put(&new_path);
    4940             : exit1:
    4941           0 :         path_put(&old_path);
    4942           0 :         if (should_retry) {
    4943             :                 should_retry = false;
    4944             :                 lookup_flags |= LOOKUP_REVAL;
    4945             :                 goto retry;
    4946             :         }
    4947             : put_names:
    4948           0 :         putname(from);
    4949           0 :         putname(to);
    4950           0 :         return error;
    4951             : }
    4952             : 
    4953           0 : SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
    4954             :                 int, newdfd, const char __user *, newname, unsigned int, flags)
    4955             : {
    4956           0 :         return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
    4957             :                                 flags);
    4958             : }
    4959             : 
    4960           0 : SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
    4961             :                 int, newdfd, const char __user *, newname)
    4962             : {
    4963           0 :         return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname),
    4964             :                                 0);
    4965             : }
    4966             : 
    4967           0 : SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
    4968             : {
    4969           0 :         return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD,
    4970             :                                 getname(newname), 0);
    4971             : }
    4972             : 
    4973           0 : int readlink_copy(char __user *buffer, int buflen, const char *link)
    4974             : {
    4975           0 :         int len = PTR_ERR(link);
    4976           0 :         if (IS_ERR(link))
    4977             :                 goto out;
    4978             : 
    4979           0 :         len = strlen(link);
    4980           0 :         if (len > (unsigned) buflen)
    4981           0 :                 len = buflen;
    4982           0 :         if (copy_to_user(buffer, link, len))
    4983           0 :                 len = -EFAULT;
    4984             : out:
    4985           0 :         return len;
    4986             : }
    4987             : 
    4988             : /**
    4989             :  * vfs_readlink - copy symlink body into userspace buffer
    4990             :  * @dentry: dentry on which to get symbolic link
    4991             :  * @buffer: user memory pointer
    4992             :  * @buflen: size of buffer
    4993             :  *
    4994             :  * Does not touch atime.  That's up to the caller if necessary
    4995             :  *
    4996             :  * Does not call security hook.
    4997             :  */
    4998           0 : int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen)
    4999             : {
    5000           0 :         struct inode *inode = d_inode(dentry);
    5001           0 :         DEFINE_DELAYED_CALL(done);
    5002             :         const char *link;
    5003             :         int res;
    5004             : 
    5005           0 :         if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) {
    5006           0 :                 if (unlikely(inode->i_op->readlink))
    5007           0 :                         return inode->i_op->readlink(dentry, buffer, buflen);
    5008             : 
    5009           0 :                 if (!d_is_symlink(dentry))
    5010             :                         return -EINVAL;
    5011             : 
    5012           0 :                 spin_lock(&inode->i_lock);
    5013           0 :                 inode->i_opflags |= IOP_DEFAULT_READLINK;
    5014           0 :                 spin_unlock(&inode->i_lock);
    5015             :         }
    5016             : 
    5017           0 :         link = READ_ONCE(inode->i_link);
    5018           0 :         if (!link) {
    5019           0 :                 link = inode->i_op->get_link(dentry, inode, &done);
    5020           0 :                 if (IS_ERR(link))
    5021           0 :                         return PTR_ERR(link);
    5022             :         }
    5023           0 :         res = readlink_copy(buffer, buflen, link);
    5024             :         do_delayed_call(&done);
    5025             :         return res;
    5026             : }
    5027             : EXPORT_SYMBOL(vfs_readlink);
    5028             : 
    5029             : /**
    5030             :  * vfs_get_link - get symlink body
    5031             :  * @dentry: dentry on which to get symbolic link
    5032             :  * @done: caller needs to free returned data with this
    5033             :  *
    5034             :  * Calls security hook and i_op->get_link() on the supplied inode.
    5035             :  *
    5036             :  * It does not touch atime.  That's up to the caller if necessary.
    5037             :  *
    5038             :  * Does not work on "special" symlinks like /proc/$$/fd/N
    5039             :  */
    5040           0 : const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done)
    5041             : {
    5042           0 :         const char *res = ERR_PTR(-EINVAL);
    5043           0 :         struct inode *inode = d_inode(dentry);
    5044             : 
    5045           0 :         if (d_is_symlink(dentry)) {
    5046           0 :                 res = ERR_PTR(security_inode_readlink(dentry));
    5047             :                 if (!res)
    5048           0 :                         res = inode->i_op->get_link(dentry, inode, done);
    5049             :         }
    5050           0 :         return res;
    5051             : }
    5052             : EXPORT_SYMBOL(vfs_get_link);
    5053             : 
    5054             : /* get the link contents into pagecache */
    5055           0 : const char *page_get_link(struct dentry *dentry, struct inode *inode,
    5056             :                           struct delayed_call *callback)
    5057             : {
    5058             :         char *kaddr;
    5059             :         struct page *page;
    5060           0 :         struct address_space *mapping = inode->i_mapping;
    5061             : 
    5062           0 :         if (!dentry) {
    5063           0 :                 page = find_get_page(mapping, 0);
    5064           0 :                 if (!page)
    5065             :                         return ERR_PTR(-ECHILD);
    5066           0 :                 if (!PageUptodate(page)) {
    5067           0 :                         put_page(page);
    5068           0 :                         return ERR_PTR(-ECHILD);
    5069             :                 }
    5070             :         } else {
    5071           0 :                 page = read_mapping_page(mapping, 0, NULL);
    5072           0 :                 if (IS_ERR(page))
    5073             :                         return (char*)page;
    5074             :         }
    5075           0 :         set_delayed_call(callback, page_put_link, page);
    5076           0 :         BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM);
    5077           0 :         kaddr = page_address(page);
    5078           0 :         nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1);
    5079           0 :         return kaddr;
    5080             : }
    5081             : 
    5082             : EXPORT_SYMBOL(page_get_link);
    5083             : 
    5084           0 : void page_put_link(void *arg)
    5085             : {
    5086           0 :         put_page(arg);
    5087           0 : }
    5088             : EXPORT_SYMBOL(page_put_link);
    5089             : 
    5090           0 : int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
    5091             : {
    5092           0 :         DEFINE_DELAYED_CALL(done);
    5093           0 :         int res = readlink_copy(buffer, buflen,
    5094             :                                 page_get_link(dentry, d_inode(dentry),
    5095             :                                               &done));
    5096           0 :         do_delayed_call(&done);
    5097           0 :         return res;
    5098             : }
    5099             : EXPORT_SYMBOL(page_readlink);
    5100             : 
    5101           0 : int page_symlink(struct inode *inode, const char *symname, int len)
    5102             : {
    5103           0 :         struct address_space *mapping = inode->i_mapping;
    5104           0 :         const struct address_space_operations *aops = mapping->a_ops;
    5105           0 :         bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS);
    5106             :         struct page *page;
    5107           0 :         void *fsdata = NULL;
    5108             :         int err;
    5109             :         unsigned int flags;
    5110             : 
    5111             : retry:
    5112           0 :         if (nofs)
    5113           0 :                 flags = memalloc_nofs_save();
    5114           0 :         err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata);
    5115           0 :         if (nofs)
    5116             :                 memalloc_nofs_restore(flags);
    5117           0 :         if (err)
    5118             :                 goto fail;
    5119             : 
    5120           0 :         memcpy(page_address(page), symname, len-1);
    5121             : 
    5122           0 :         err = aops->write_end(NULL, mapping, 0, len-1, len-1,
    5123             :                                                         page, fsdata);
    5124           0 :         if (err < 0)
    5125             :                 goto fail;
    5126           0 :         if (err < len-1)
    5127             :                 goto retry;
    5128             : 
    5129           0 :         mark_inode_dirty(inode);
    5130           0 :         return 0;
    5131             : fail:
    5132             :         return err;
    5133             : }
    5134             : EXPORT_SYMBOL(page_symlink);
    5135             : 
    5136             : const struct inode_operations page_symlink_inode_operations = {
    5137             :         .get_link       = page_get_link,
    5138             : };
    5139             : EXPORT_SYMBOL(page_symlink_inode_operations);

Generated by: LCOV version 1.14