LCOV - code coverage report
Current view: top level - fs - exec.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 16 714 2.2 %
Date: 2023-07-19 18:55:55 Functions: 3 48 6.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  linux/fs/exec.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  */
       7             : 
       8             : /*
       9             :  * #!-checking implemented by tytso.
      10             :  */
      11             : /*
      12             :  * Demand-loading implemented 01.12.91 - no need to read anything but
      13             :  * the header into memory. The inode of the executable is put into
      14             :  * "current->executable", and page faults do the actual loading. Clean.
      15             :  *
      16             :  * Once more I can proudly say that linux stood up to being changed: it
      17             :  * was less than 2 hours work to get demand-loading completely implemented.
      18             :  *
      19             :  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
      20             :  * current->executable is only used by the procfs.  This allows a dispatch
      21             :  * table to check for several different types  of binary formats.  We keep
      22             :  * trying until we recognize the file or we run out of supported binary
      23             :  * formats.
      24             :  */
      25             : 
      26             : #include <linux/kernel_read_file.h>
      27             : #include <linux/slab.h>
      28             : #include <linux/file.h>
      29             : #include <linux/fdtable.h>
      30             : #include <linux/mm.h>
      31             : #include <linux/stat.h>
      32             : #include <linux/fcntl.h>
      33             : #include <linux/swap.h>
      34             : #include <linux/string.h>
      35             : #include <linux/init.h>
      36             : #include <linux/sched/mm.h>
      37             : #include <linux/sched/coredump.h>
      38             : #include <linux/sched/signal.h>
      39             : #include <linux/sched/numa_balancing.h>
      40             : #include <linux/sched/task.h>
      41             : #include <linux/pagemap.h>
      42             : #include <linux/perf_event.h>
      43             : #include <linux/highmem.h>
      44             : #include <linux/spinlock.h>
      45             : #include <linux/key.h>
      46             : #include <linux/personality.h>
      47             : #include <linux/binfmts.h>
      48             : #include <linux/utsname.h>
      49             : #include <linux/pid_namespace.h>
      50             : #include <linux/module.h>
      51             : #include <linux/namei.h>
      52             : #include <linux/mount.h>
      53             : #include <linux/security.h>
      54             : #include <linux/syscalls.h>
      55             : #include <linux/tsacct_kern.h>
      56             : #include <linux/cn_proc.h>
      57             : #include <linux/audit.h>
      58             : #include <linux/kmod.h>
      59             : #include <linux/fsnotify.h>
      60             : #include <linux/fs_struct.h>
      61             : #include <linux/oom.h>
      62             : #include <linux/compat.h>
      63             : #include <linux/vmalloc.h>
      64             : #include <linux/io_uring.h>
      65             : #include <linux/syscall_user_dispatch.h>
      66             : #include <linux/coredump.h>
      67             : #include <linux/time_namespace.h>
      68             : #include <linux/user_events.h>
      69             : 
      70             : #include <linux/uaccess.h>
      71             : #include <asm/mmu_context.h>
      72             : #include <asm/tlb.h>
      73             : 
      74             : #include <trace/events/task.h>
      75             : #include "internal.h"
      76             : 
      77             : #include <trace/events/sched.h>
      78             : 
      79             : static int bprm_creds_from_file(struct linux_binprm *bprm);
      80             : 
      81             : int suid_dumpable = 0;
      82             : 
      83             : static LIST_HEAD(formats);
      84             : static DEFINE_RWLOCK(binfmt_lock);
      85             : 
      86           2 : void __register_binfmt(struct linux_binfmt * fmt, int insert)
      87             : {
      88           2 :         write_lock(&binfmt_lock);
      89           2 :         insert ? list_add(&fmt->lh, &formats) :
      90           2 :                  list_add_tail(&fmt->lh, &formats);
      91           2 :         write_unlock(&binfmt_lock);
      92           2 : }
      93             : 
      94             : EXPORT_SYMBOL(__register_binfmt);
      95             : 
      96           0 : void unregister_binfmt(struct linux_binfmt * fmt)
      97             : {
      98           0 :         write_lock(&binfmt_lock);
      99           0 :         list_del(&fmt->lh);
     100           0 :         write_unlock(&binfmt_lock);
     101           0 : }
     102             : 
     103             : EXPORT_SYMBOL(unregister_binfmt);
     104             : 
     105             : static inline void put_binfmt(struct linux_binfmt * fmt)
     106             : {
     107           0 :         module_put(fmt->module);
     108             : }
     109             : 
     110           0 : bool path_noexec(const struct path *path)
     111             : {
     112           0 :         return (path->mnt->mnt_flags & MNT_NOEXEC) ||
     113           0 :                (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
     114             : }
     115             : 
     116             : #ifdef CONFIG_USELIB
     117             : /*
     118             :  * Note that a shared library must be both readable and executable due to
     119             :  * security reasons.
     120             :  *
     121             :  * Also note that we take the address to load from the file itself.
     122             :  */
     123             : SYSCALL_DEFINE1(uselib, const char __user *, library)
     124             : {
     125             :         struct linux_binfmt *fmt;
     126             :         struct file *file;
     127             :         struct filename *tmp = getname(library);
     128             :         int error = PTR_ERR(tmp);
     129             :         static const struct open_flags uselib_flags = {
     130             :                 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
     131             :                 .acc_mode = MAY_READ | MAY_EXEC,
     132             :                 .intent = LOOKUP_OPEN,
     133             :                 .lookup_flags = LOOKUP_FOLLOW,
     134             :         };
     135             : 
     136             :         if (IS_ERR(tmp))
     137             :                 goto out;
     138             : 
     139             :         file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
     140             :         putname(tmp);
     141             :         error = PTR_ERR(file);
     142             :         if (IS_ERR(file))
     143             :                 goto out;
     144             : 
     145             :         /*
     146             :          * may_open() has already checked for this, so it should be
     147             :          * impossible to trip now. But we need to be extra cautious
     148             :          * and check again at the very end too.
     149             :          */
     150             :         error = -EACCES;
     151             :         if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
     152             :                          path_noexec(&file->f_path)))
     153             :                 goto exit;
     154             : 
     155             :         fsnotify_open(file);
     156             : 
     157             :         error = -ENOEXEC;
     158             : 
     159             :         read_lock(&binfmt_lock);
     160             :         list_for_each_entry(fmt, &formats, lh) {
     161             :                 if (!fmt->load_shlib)
     162             :                         continue;
     163             :                 if (!try_module_get(fmt->module))
     164             :                         continue;
     165             :                 read_unlock(&binfmt_lock);
     166             :                 error = fmt->load_shlib(file);
     167             :                 read_lock(&binfmt_lock);
     168             :                 put_binfmt(fmt);
     169             :                 if (error != -ENOEXEC)
     170             :                         break;
     171             :         }
     172             :         read_unlock(&binfmt_lock);
     173             : exit:
     174             :         fput(file);
     175             : out:
     176             :         return error;
     177             : }
     178             : #endif /* #ifdef CONFIG_USELIB */
     179             : 
     180             : #ifdef CONFIG_MMU
     181             : /*
     182             :  * The nascent bprm->mm is not visible until exec_mmap() but it can
     183             :  * use a lot of memory, account these pages in current->mm temporary
     184             :  * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
     185             :  * change the counter back via acct_arg_size(0).
     186             :  */
     187           0 : static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
     188             : {
     189           0 :         struct mm_struct *mm = current->mm;
     190           0 :         long diff = (long)(pages - bprm->vma_pages);
     191             : 
     192           0 :         if (!mm || !diff)
     193             :                 return;
     194             : 
     195           0 :         bprm->vma_pages = pages;
     196           0 :         add_mm_counter(mm, MM_ANONPAGES, diff);
     197             : }
     198             : 
     199           0 : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
     200             :                 int write)
     201             : {
     202             :         struct page *page;
     203             :         int ret;
     204           0 :         unsigned int gup_flags = 0;
     205             : 
     206             : #ifdef CONFIG_STACK_GROWSUP
     207             :         if (write) {
     208             :                 ret = expand_downwards(bprm->vma, pos);
     209             :                 if (ret < 0)
     210             :                         return NULL;
     211             :         }
     212             : #endif
     213             : 
     214           0 :         if (write)
     215           0 :                 gup_flags |= FOLL_WRITE;
     216             : 
     217             :         /*
     218             :          * We are doing an exec().  'current' is the process
     219             :          * doing the exec and bprm->mm is the new process's mm.
     220             :          */
     221           0 :         mmap_read_lock(bprm->mm);
     222           0 :         ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
     223             :                         &page, NULL, NULL);
     224           0 :         mmap_read_unlock(bprm->mm);
     225           0 :         if (ret <= 0)
     226             :                 return NULL;
     227             : 
     228           0 :         if (write)
     229           0 :                 acct_arg_size(bprm, vma_pages(bprm->vma));
     230             : 
     231           0 :         return page;
     232             : }
     233             : 
     234             : static void put_arg_page(struct page *page)
     235             : {
     236           0 :         put_page(page);
     237             : }
     238             : 
     239             : static void free_arg_pages(struct linux_binprm *bprm)
     240             : {
     241             : }
     242             : 
     243             : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
     244             :                 struct page *page)
     245             : {
     246           0 :         flush_cache_page(bprm->vma, pos, page_to_pfn(page));
     247             : }
     248             : 
     249           0 : static int __bprm_mm_init(struct linux_binprm *bprm)
     250             : {
     251             :         int err;
     252           0 :         struct vm_area_struct *vma = NULL;
     253           0 :         struct mm_struct *mm = bprm->mm;
     254             : 
     255           0 :         bprm->vma = vma = vm_area_alloc(mm);
     256           0 :         if (!vma)
     257             :                 return -ENOMEM;
     258           0 :         vma_set_anonymous(vma);
     259             : 
     260           0 :         if (mmap_write_lock_killable(mm)) {
     261             :                 err = -EINTR;
     262             :                 goto err_free;
     263             :         }
     264             : 
     265             :         /*
     266             :          * Place the stack at the largest stack address the architecture
     267             :          * supports. Later, we'll move this to an appropriate place. We don't
     268             :          * use STACK_TOP because that can depend on attributes which aren't
     269             :          * configured yet.
     270             :          */
     271             :         BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
     272           0 :         vma->vm_end = STACK_TOP_MAX;
     273           0 :         vma->vm_start = vma->vm_end - PAGE_SIZE;
     274           0 :         vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
     275           0 :         vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
     276             : 
     277           0 :         err = insert_vm_struct(mm, vma);
     278           0 :         if (err)
     279             :                 goto err;
     280             : 
     281           0 :         mm->stack_vm = mm->total_vm = 1;
     282           0 :         mmap_write_unlock(mm);
     283           0 :         bprm->p = vma->vm_end - sizeof(void *);
     284           0 :         return 0;
     285             : err:
     286             :         mmap_write_unlock(mm);
     287             : err_free:
     288           0 :         bprm->vma = NULL;
     289           0 :         vm_area_free(vma);
     290           0 :         return err;
     291             : }
     292             : 
     293             : static bool valid_arg_len(struct linux_binprm *bprm, long len)
     294             : {
     295           0 :         return len <= MAX_ARG_STRLEN;
     296             : }
     297             : 
     298             : #else
     299             : 
     300             : static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
     301             : {
     302             : }
     303             : 
     304             : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
     305             :                 int write)
     306             : {
     307             :         struct page *page;
     308             : 
     309             :         page = bprm->page[pos / PAGE_SIZE];
     310             :         if (!page && write) {
     311             :                 page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
     312             :                 if (!page)
     313             :                         return NULL;
     314             :                 bprm->page[pos / PAGE_SIZE] = page;
     315             :         }
     316             : 
     317             :         return page;
     318             : }
     319             : 
     320             : static void put_arg_page(struct page *page)
     321             : {
     322             : }
     323             : 
     324             : static void free_arg_page(struct linux_binprm *bprm, int i)
     325             : {
     326             :         if (bprm->page[i]) {
     327             :                 __free_page(bprm->page[i]);
     328             :                 bprm->page[i] = NULL;
     329             :         }
     330             : }
     331             : 
     332             : static void free_arg_pages(struct linux_binprm *bprm)
     333             : {
     334             :         int i;
     335             : 
     336             :         for (i = 0; i < MAX_ARG_PAGES; i++)
     337             :                 free_arg_page(bprm, i);
     338             : }
     339             : 
     340             : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
     341             :                 struct page *page)
     342             : {
     343             : }
     344             : 
     345             : static int __bprm_mm_init(struct linux_binprm *bprm)
     346             : {
     347             :         bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
     348             :         return 0;
     349             : }
     350             : 
     351             : static bool valid_arg_len(struct linux_binprm *bprm, long len)
     352             : {
     353             :         return len <= bprm->p;
     354             : }
     355             : 
     356             : #endif /* CONFIG_MMU */
     357             : 
     358             : /*
     359             :  * Create a new mm_struct and populate it with a temporary stack
     360             :  * vm_area_struct.  We don't have enough context at this point to set the stack
     361             :  * flags, permissions, and offset, so we use temporary values.  We'll update
     362             :  * them later in setup_arg_pages().
     363             :  */
     364           0 : static int bprm_mm_init(struct linux_binprm *bprm)
     365             : {
     366             :         int err;
     367           0 :         struct mm_struct *mm = NULL;
     368             : 
     369           0 :         bprm->mm = mm = mm_alloc();
     370           0 :         err = -ENOMEM;
     371           0 :         if (!mm)
     372             :                 goto err;
     373             : 
     374             :         /* Save current stack limit for all calculations made during exec. */
     375           0 :         task_lock(current->group_leader);
     376           0 :         bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
     377           0 :         task_unlock(current->group_leader);
     378             : 
     379           0 :         err = __bprm_mm_init(bprm);
     380           0 :         if (err)
     381             :                 goto err;
     382             : 
     383             :         return 0;
     384             : 
     385             : err:
     386           0 :         if (mm) {
     387           0 :                 bprm->mm = NULL;
     388             :                 mmdrop(mm);
     389             :         }
     390             : 
     391             :         return err;
     392             : }
     393             : 
     394             : struct user_arg_ptr {
     395             : #ifdef CONFIG_COMPAT
     396             :         bool is_compat;
     397             : #endif
     398             :         union {
     399             :                 const char __user *const __user *native;
     400             : #ifdef CONFIG_COMPAT
     401             :                 const compat_uptr_t __user *compat;
     402             : #endif
     403             :         } ptr;
     404             : };
     405             : 
     406           0 : static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
     407             : {
     408             :         const char __user *native;
     409             : 
     410             : #ifdef CONFIG_COMPAT
     411             :         if (unlikely(argv.is_compat)) {
     412             :                 compat_uptr_t compat;
     413             : 
     414             :                 if (get_user(compat, argv.ptr.compat + nr))
     415             :                         return ERR_PTR(-EFAULT);
     416             : 
     417             :                 return compat_ptr(compat);
     418             :         }
     419             : #endif
     420             : 
     421           0 :         if (get_user(native, argv.ptr.native + nr))
     422             :                 return ERR_PTR(-EFAULT);
     423             : 
     424           0 :         return native;
     425             : }
     426             : 
     427             : /*
     428             :  * count() counts the number of strings in array ARGV.
     429             :  */
     430           0 : static int count(struct user_arg_ptr argv, int max)
     431             : {
     432           0 :         int i = 0;
     433             : 
     434           0 :         if (argv.ptr.native != NULL) {
     435           0 :                 for (;;) {
     436           0 :                         const char __user *p = get_user_arg_ptr(argv, i);
     437             : 
     438           0 :                         if (!p)
     439             :                                 break;
     440             : 
     441           0 :                         if (IS_ERR(p))
     442             :                                 return -EFAULT;
     443             : 
     444           0 :                         if (i >= max)
     445             :                                 return -E2BIG;
     446           0 :                         ++i;
     447             : 
     448           0 :                         if (fatal_signal_pending(current))
     449             :                                 return -ERESTARTNOHAND;
     450           0 :                         cond_resched();
     451             :                 }
     452             :         }
     453             :         return i;
     454             : }
     455             : 
     456           0 : static int count_strings_kernel(const char *const *argv)
     457             : {
     458             :         int i;
     459             : 
     460           0 :         if (!argv)
     461             :                 return 0;
     462             : 
     463           0 :         for (i = 0; argv[i]; ++i) {
     464           0 :                 if (i >= MAX_ARG_STRINGS)
     465             :                         return -E2BIG;
     466           0 :                 if (fatal_signal_pending(current))
     467             :                         return -ERESTARTNOHAND;
     468           0 :                 cond_resched();
     469             :         }
     470             :         return i;
     471             : }
     472             : 
     473             : static int bprm_stack_limits(struct linux_binprm *bprm)
     474             : {
     475             :         unsigned long limit, ptr_size;
     476             : 
     477             :         /*
     478             :          * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
     479             :          * (whichever is smaller) for the argv+env strings.
     480             :          * This ensures that:
     481             :          *  - the remaining binfmt code will not run out of stack space,
     482             :          *  - the program will have a reasonable amount of stack left
     483             :          *    to work from.
     484             :          */
     485           0 :         limit = _STK_LIM / 4 * 3;
     486           0 :         limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
     487             :         /*
     488             :          * We've historically supported up to 32 pages (ARG_MAX)
     489             :          * of argument strings even with small stacks
     490             :          */
     491           0 :         limit = max_t(unsigned long, limit, ARG_MAX);
     492             :         /*
     493             :          * We must account for the size of all the argv and envp pointers to
     494             :          * the argv and envp strings, since they will also take up space in
     495             :          * the stack. They aren't stored until much later when we can't
     496             :          * signal to the parent that the child has run out of stack space.
     497             :          * Instead, calculate it here so it's possible to fail gracefully.
     498             :          *
     499             :          * In the case of argc = 0, make sure there is space for adding a
     500             :          * empty string (which will bump argc to 1), to ensure confused
     501             :          * userspace programs don't start processing from argv[1], thinking
     502             :          * argc can never be 0, to keep them from walking envp by accident.
     503             :          * See do_execveat_common().
     504             :          */
     505           0 :         ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
     506           0 :         if (limit <= ptr_size)
     507             :                 return -E2BIG;
     508           0 :         limit -= ptr_size;
     509             : 
     510           0 :         bprm->argmin = bprm->p - limit;
     511             :         return 0;
     512             : }
     513             : 
     514             : /*
     515             :  * 'copy_strings()' copies argument/environment strings from the old
     516             :  * processes's memory to the new process's stack.  The call to get_user_pages()
     517             :  * ensures the destination page is created and not swapped out.
     518             :  */
     519           0 : static int copy_strings(int argc, struct user_arg_ptr argv,
     520             :                         struct linux_binprm *bprm)
     521             : {
     522           0 :         struct page *kmapped_page = NULL;
     523           0 :         char *kaddr = NULL;
     524           0 :         unsigned long kpos = 0;
     525             :         int ret;
     526             : 
     527           0 :         while (argc-- > 0) {
     528             :                 const char __user *str;
     529             :                 int len;
     530             :                 unsigned long pos;
     531             : 
     532           0 :                 ret = -EFAULT;
     533           0 :                 str = get_user_arg_ptr(argv, argc);
     534           0 :                 if (IS_ERR(str))
     535             :                         goto out;
     536             : 
     537           0 :                 len = strnlen_user(str, MAX_ARG_STRLEN);
     538           0 :                 if (!len)
     539             :                         goto out;
     540             : 
     541           0 :                 ret = -E2BIG;
     542           0 :                 if (!valid_arg_len(bprm, len))
     543             :                         goto out;
     544             : 
     545             :                 /* We're going to work our way backwards. */
     546           0 :                 pos = bprm->p;
     547           0 :                 str += len;
     548           0 :                 bprm->p -= len;
     549             : #ifdef CONFIG_MMU
     550           0 :                 if (bprm->p < bprm->argmin)
     551             :                         goto out;
     552             : #endif
     553             : 
     554           0 :                 while (len > 0) {
     555             :                         int offset, bytes_to_copy;
     556             : 
     557           0 :                         if (fatal_signal_pending(current)) {
     558             :                                 ret = -ERESTARTNOHAND;
     559             :                                 goto out;
     560             :                         }
     561           0 :                         cond_resched();
     562             : 
     563           0 :                         offset = pos % PAGE_SIZE;
     564           0 :                         if (offset == 0)
     565           0 :                                 offset = PAGE_SIZE;
     566             : 
     567           0 :                         bytes_to_copy = offset;
     568           0 :                         if (bytes_to_copy > len)
     569           0 :                                 bytes_to_copy = len;
     570             : 
     571           0 :                         offset -= bytes_to_copy;
     572           0 :                         pos -= bytes_to_copy;
     573           0 :                         str -= bytes_to_copy;
     574           0 :                         len -= bytes_to_copy;
     575             : 
     576           0 :                         if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
     577             :                                 struct page *page;
     578             : 
     579           0 :                                 page = get_arg_page(bprm, pos, 1);
     580           0 :                                 if (!page) {
     581             :                                         ret = -E2BIG;
     582             :                                         goto out;
     583             :                                 }
     584             : 
     585           0 :                                 if (kmapped_page) {
     586           0 :                                         flush_dcache_page(kmapped_page);
     587           0 :                                         kunmap_local(kaddr);
     588             :                                         put_arg_page(kmapped_page);
     589             :                                 }
     590           0 :                                 kmapped_page = page;
     591           0 :                                 kaddr = kmap_local_page(kmapped_page);
     592           0 :                                 kpos = pos & PAGE_MASK;
     593           0 :                                 flush_arg_page(bprm, kpos, kmapped_page);
     594             :                         }
     595           0 :                         if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
     596             :                                 ret = -EFAULT;
     597             :                                 goto out;
     598             :                         }
     599             :                 }
     600             :         }
     601             :         ret = 0;
     602             : out:
     603           0 :         if (kmapped_page) {
     604           0 :                 flush_dcache_page(kmapped_page);
     605           0 :                 kunmap_local(kaddr);
     606             :                 put_arg_page(kmapped_page);
     607             :         }
     608           0 :         return ret;
     609             : }
     610             : 
     611             : /*
     612             :  * Copy and argument/environment string from the kernel to the processes stack.
     613             :  */
     614           0 : int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
     615             : {
     616           0 :         int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
     617           0 :         unsigned long pos = bprm->p;
     618             : 
     619           0 :         if (len == 0)
     620             :                 return -EFAULT;
     621           0 :         if (!valid_arg_len(bprm, len))
     622             :                 return -E2BIG;
     623             : 
     624             :         /* We're going to work our way backwards. */
     625           0 :         arg += len;
     626           0 :         bprm->p -= len;
     627           0 :         if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
     628             :                 return -E2BIG;
     629             : 
     630           0 :         while (len > 0) {
     631           0 :                 unsigned int bytes_to_copy = min_t(unsigned int, len,
     632             :                                 min_not_zero(offset_in_page(pos), PAGE_SIZE));
     633             :                 struct page *page;
     634             : 
     635           0 :                 pos -= bytes_to_copy;
     636           0 :                 arg -= bytes_to_copy;
     637           0 :                 len -= bytes_to_copy;
     638             : 
     639           0 :                 page = get_arg_page(bprm, pos, 1);
     640           0 :                 if (!page)
     641             :                         return -E2BIG;
     642           0 :                 flush_arg_page(bprm, pos & PAGE_MASK, page);
     643           0 :                 memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy);
     644             :                 put_arg_page(page);
     645             :         }
     646             : 
     647             :         return 0;
     648             : }
     649             : EXPORT_SYMBOL(copy_string_kernel);
     650             : 
     651           0 : static int copy_strings_kernel(int argc, const char *const *argv,
     652             :                                struct linux_binprm *bprm)
     653             : {
     654           0 :         while (argc-- > 0) {
     655           0 :                 int ret = copy_string_kernel(argv[argc], bprm);
     656           0 :                 if (ret < 0)
     657             :                         return ret;
     658           0 :                 if (fatal_signal_pending(current))
     659             :                         return -ERESTARTNOHAND;
     660           0 :                 cond_resched();
     661             :         }
     662             :         return 0;
     663             : }
     664             : 
     665             : #ifdef CONFIG_MMU
     666             : 
     667             : /*
     668             :  * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
     669             :  * the binfmt code determines where the new stack should reside, we shift it to
     670             :  * its final location.  The process proceeds as follows:
     671             :  *
     672             :  * 1) Use shift to calculate the new vma endpoints.
     673             :  * 2) Extend vma to cover both the old and new ranges.  This ensures the
     674             :  *    arguments passed to subsequent functions are consistent.
     675             :  * 3) Move vma's page tables to the new range.
     676             :  * 4) Free up any cleared pgd range.
     677             :  * 5) Shrink the vma to cover only the new range.
     678             :  */
     679           0 : static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
     680             : {
     681           0 :         struct mm_struct *mm = vma->vm_mm;
     682           0 :         unsigned long old_start = vma->vm_start;
     683           0 :         unsigned long old_end = vma->vm_end;
     684           0 :         unsigned long length = old_end - old_start;
     685           0 :         unsigned long new_start = old_start - shift;
     686           0 :         unsigned long new_end = old_end - shift;
     687           0 :         VMA_ITERATOR(vmi, mm, new_start);
     688             :         struct vm_area_struct *next;
     689             :         struct mmu_gather tlb;
     690             : 
     691           0 :         BUG_ON(new_start > new_end);
     692             : 
     693             :         /*
     694             :          * ensure there are no vmas between where we want to go
     695             :          * and where we are
     696             :          */
     697           0 :         if (vma != vma_next(&vmi))
     698             :                 return -EFAULT;
     699             : 
     700             :         /*
     701             :          * cover the whole range: [new_start, old_end)
     702             :          */
     703           0 :         if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
     704             :                 return -ENOMEM;
     705             : 
     706             :         /*
     707             :          * move the page tables downwards, on failure we rely on
     708             :          * process cleanup to remove whatever mess we made.
     709             :          */
     710           0 :         if (length != move_page_tables(vma, old_start,
     711             :                                        vma, new_start, length, false))
     712             :                 return -ENOMEM;
     713             : 
     714           0 :         lru_add_drain();
     715           0 :         tlb_gather_mmu(&tlb, mm);
     716           0 :         next = vma_next(&vmi);
     717           0 :         if (new_end > old_start) {
     718             :                 /*
     719             :                  * when the old and new regions overlap clear from new_end.
     720             :                  */
     721           0 :                 free_pgd_range(&tlb, new_end, old_end, new_end,
     722             :                         next ? next->vm_start : USER_PGTABLES_CEILING);
     723             :         } else {
     724             :                 /*
     725             :                  * otherwise, clean from old_start; this is done to not touch
     726             :                  * the address space in [new_end, old_start) some architectures
     727             :                  * have constraints on va-space that make this illegal (IA64) -
     728             :                  * for the others its just a little faster.
     729             :                  */
     730           0 :                 free_pgd_range(&tlb, old_start, old_end, new_end,
     731             :                         next ? next->vm_start : USER_PGTABLES_CEILING);
     732             :         }
     733           0 :         tlb_finish_mmu(&tlb);
     734             : 
     735           0 :         vma_prev(&vmi);
     736             :         /* Shrink the vma to just the new range */
     737           0 :         return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
     738             : }
     739             : 
     740             : /*
     741             :  * Finalizes the stack vm_area_struct. The flags and permissions are updated,
     742             :  * the stack is optionally relocated, and some extra space is added.
     743             :  */
     744           0 : int setup_arg_pages(struct linux_binprm *bprm,
     745             :                     unsigned long stack_top,
     746             :                     int executable_stack)
     747             : {
     748             :         unsigned long ret;
     749             :         unsigned long stack_shift;
     750           0 :         struct mm_struct *mm = current->mm;
     751           0 :         struct vm_area_struct *vma = bprm->vma;
     752           0 :         struct vm_area_struct *prev = NULL;
     753             :         unsigned long vm_flags;
     754             :         unsigned long stack_base;
     755             :         unsigned long stack_size;
     756             :         unsigned long stack_expand;
     757             :         unsigned long rlim_stack;
     758             :         struct mmu_gather tlb;
     759             :         struct vma_iterator vmi;
     760             : 
     761             : #ifdef CONFIG_STACK_GROWSUP
     762             :         /* Limit stack size */
     763             :         stack_base = bprm->rlim_stack.rlim_max;
     764             : 
     765             :         stack_base = calc_max_stack_size(stack_base);
     766             : 
     767             :         /* Add space for stack randomization. */
     768             :         stack_base += (STACK_RND_MASK << PAGE_SHIFT);
     769             : 
     770             :         /* Make sure we didn't let the argument array grow too large. */
     771             :         if (vma->vm_end - vma->vm_start > stack_base)
     772             :                 return -ENOMEM;
     773             : 
     774             :         stack_base = PAGE_ALIGN(stack_top - stack_base);
     775             : 
     776             :         stack_shift = vma->vm_start - stack_base;
     777             :         mm->arg_start = bprm->p - stack_shift;
     778             :         bprm->p = vma->vm_end - stack_shift;
     779             : #else
     780           0 :         stack_top = arch_align_stack(stack_top);
     781           0 :         stack_top = PAGE_ALIGN(stack_top);
     782             : 
     783           0 :         if (unlikely(stack_top < mmap_min_addr) ||
     784           0 :             unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
     785             :                 return -ENOMEM;
     786             : 
     787           0 :         stack_shift = vma->vm_end - stack_top;
     788             : 
     789           0 :         bprm->p -= stack_shift;
     790           0 :         mm->arg_start = bprm->p;
     791             : #endif
     792             : 
     793           0 :         if (bprm->loader)
     794           0 :                 bprm->loader -= stack_shift;
     795           0 :         bprm->exec -= stack_shift;
     796             : 
     797           0 :         if (mmap_write_lock_killable(mm))
     798             :                 return -EINTR;
     799             : 
     800           0 :         vm_flags = VM_STACK_FLAGS;
     801             : 
     802             :         /*
     803             :          * Adjust stack execute permissions; explicitly enable for
     804             :          * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
     805             :          * (arch default) otherwise.
     806             :          */
     807           0 :         if (unlikely(executable_stack == EXSTACK_ENABLE_X))
     808             :                 vm_flags |= VM_EXEC;
     809           0 :         else if (executable_stack == EXSTACK_DISABLE_X)
     810           0 :                 vm_flags &= ~VM_EXEC;
     811           0 :         vm_flags |= mm->def_flags;
     812           0 :         vm_flags |= VM_STACK_INCOMPLETE_SETUP;
     813             : 
     814           0 :         vma_iter_init(&vmi, mm, vma->vm_start);
     815             : 
     816           0 :         tlb_gather_mmu(&tlb, mm);
     817           0 :         ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end,
     818             :                         vm_flags);
     819           0 :         tlb_finish_mmu(&tlb);
     820             : 
     821           0 :         if (ret)
     822             :                 goto out_unlock;
     823           0 :         BUG_ON(prev != vma);
     824             : 
     825           0 :         if (unlikely(vm_flags & VM_EXEC)) {
     826           0 :                 pr_warn_once("process '%pD4' started with executable stack\n",
     827             :                              bprm->file);
     828             :         }
     829             : 
     830             :         /* Move stack pages down in memory. */
     831           0 :         if (stack_shift) {
     832           0 :                 ret = shift_arg_pages(vma, stack_shift);
     833           0 :                 if (ret)
     834             :                         goto out_unlock;
     835             :         }
     836             : 
     837             :         /* mprotect_fixup is overkill to remove the temporary stack flags */
     838           0 :         vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP);
     839             : 
     840           0 :         stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
     841           0 :         stack_size = vma->vm_end - vma->vm_start;
     842             :         /*
     843             :          * Align this down to a page boundary as expand_stack
     844             :          * will align it up.
     845             :          */
     846           0 :         rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
     847             : 
     848           0 :         stack_expand = min(rlim_stack, stack_size + stack_expand);
     849             : 
     850             : #ifdef CONFIG_STACK_GROWSUP
     851             :         stack_base = vma->vm_start + stack_expand;
     852             : #else
     853           0 :         stack_base = vma->vm_end - stack_expand;
     854             : #endif
     855           0 :         current->mm->start_stack = bprm->p;
     856           0 :         ret = expand_stack(vma, stack_base);
     857           0 :         if (ret)
     858           0 :                 ret = -EFAULT;
     859             : 
     860             : out_unlock:
     861           0 :         mmap_write_unlock(mm);
     862           0 :         return ret;
     863             : }
     864             : EXPORT_SYMBOL(setup_arg_pages);
     865             : 
     866             : #else
     867             : 
     868             : /*
     869             :  * Transfer the program arguments and environment from the holding pages
     870             :  * onto the stack. The provided stack pointer is adjusted accordingly.
     871             :  */
     872             : int transfer_args_to_stack(struct linux_binprm *bprm,
     873             :                            unsigned long *sp_location)
     874             : {
     875             :         unsigned long index, stop, sp;
     876             :         int ret = 0;
     877             : 
     878             :         stop = bprm->p >> PAGE_SHIFT;
     879             :         sp = *sp_location;
     880             : 
     881             :         for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
     882             :                 unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
     883             :                 char *src = kmap_local_page(bprm->page[index]) + offset;
     884             :                 sp -= PAGE_SIZE - offset;
     885             :                 if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
     886             :                         ret = -EFAULT;
     887             :                 kunmap_local(src);
     888             :                 if (ret)
     889             :                         goto out;
     890             :         }
     891             : 
     892             :         *sp_location = sp;
     893             : 
     894             : out:
     895             :         return ret;
     896             : }
     897             : EXPORT_SYMBOL(transfer_args_to_stack);
     898             : 
     899             : #endif /* CONFIG_MMU */
     900             : 
     901           0 : static struct file *do_open_execat(int fd, struct filename *name, int flags)
     902             : {
     903             :         struct file *file;
     904             :         int err;
     905           0 :         struct open_flags open_exec_flags = {
     906             :                 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
     907             :                 .acc_mode = MAY_EXEC,
     908             :                 .intent = LOOKUP_OPEN,
     909             :                 .lookup_flags = LOOKUP_FOLLOW,
     910             :         };
     911             : 
     912           0 :         if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
     913             :                 return ERR_PTR(-EINVAL);
     914           0 :         if (flags & AT_SYMLINK_NOFOLLOW)
     915           0 :                 open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
     916           0 :         if (flags & AT_EMPTY_PATH)
     917           0 :                 open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
     918             : 
     919           0 :         file = do_filp_open(fd, name, &open_exec_flags);
     920           0 :         if (IS_ERR(file))
     921             :                 goto out;
     922             : 
     923             :         /*
     924             :          * may_open() has already checked for this, so it should be
     925             :          * impossible to trip now. But we need to be extra cautious
     926             :          * and check again at the very end too.
     927             :          */
     928           0 :         err = -EACCES;
     929           0 :         if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
     930             :                          path_noexec(&file->f_path)))
     931             :                 goto exit;
     932             : 
     933           0 :         err = deny_write_access(file);
     934           0 :         if (err)
     935             :                 goto exit;
     936             : 
     937           0 :         if (name->name[0] != '\0')
     938           0 :                 fsnotify_open(file);
     939             : 
     940             : out:
     941             :         return file;
     942             : 
     943             : exit:
     944           0 :         fput(file);
     945           0 :         return ERR_PTR(err);
     946             : }
     947             : 
     948           0 : struct file *open_exec(const char *name)
     949             : {
     950           0 :         struct filename *filename = getname_kernel(name);
     951           0 :         struct file *f = ERR_CAST(filename);
     952             : 
     953           0 :         if (!IS_ERR(filename)) {
     954           0 :                 f = do_open_execat(AT_FDCWD, filename, 0);
     955           0 :                 putname(filename);
     956             :         }
     957           0 :         return f;
     958             : }
     959             : EXPORT_SYMBOL(open_exec);
     960             : 
     961             : #if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
     962             : ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
     963             : {
     964             :         ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
     965             :         if (res > 0)
     966             :                 flush_icache_user_range(addr, addr + len);
     967             :         return res;
     968             : }
     969             : EXPORT_SYMBOL(read_code);
     970             : #endif
     971             : 
     972             : /*
     973             :  * Maps the mm_struct mm into the current task struct.
     974             :  * On success, this function returns with exec_update_lock
     975             :  * held for writing.
     976             :  */
     977           0 : static int exec_mmap(struct mm_struct *mm)
     978             : {
     979             :         struct task_struct *tsk;
     980             :         struct mm_struct *old_mm, *active_mm;
     981             :         int ret;
     982             : 
     983             :         /* Notify parent that we're no longer interested in the old VM */
     984           0 :         tsk = current;
     985           0 :         old_mm = current->mm;
     986           0 :         exec_mm_release(tsk, old_mm);
     987             :         if (old_mm)
     988             :                 sync_mm_rss(old_mm);
     989             : 
     990           0 :         ret = down_write_killable(&tsk->signal->exec_update_lock);
     991           0 :         if (ret)
     992             :                 return ret;
     993             : 
     994           0 :         if (old_mm) {
     995             :                 /*
     996             :                  * If there is a pending fatal signal perhaps a signal
     997             :                  * whose default action is to create a coredump get
     998             :                  * out and die instead of going through with the exec.
     999             :                  */
    1000           0 :                 ret = mmap_read_lock_killable(old_mm);
    1001           0 :                 if (ret) {
    1002           0 :                         up_write(&tsk->signal->exec_update_lock);
    1003           0 :                         return ret;
    1004             :                 }
    1005             :         }
    1006             : 
    1007           0 :         task_lock(tsk);
    1008           0 :         membarrier_exec_mmap(mm);
    1009             : 
    1010             :         local_irq_disable();
    1011           0 :         active_mm = tsk->active_mm;
    1012           0 :         tsk->active_mm = mm;
    1013           0 :         tsk->mm = mm;
    1014           0 :         mm_init_cid(mm);
    1015             :         /*
    1016             :          * This prevents preemption while active_mm is being loaded and
    1017             :          * it and mm are being updated, which could cause problems for
    1018             :          * lazy tlb mm refcounting when these are updated by context
    1019             :          * switches. Not all architectures can handle irqs off over
    1020             :          * activate_mm yet.
    1021             :          */
    1022             :         if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
    1023             :                 local_irq_enable();
    1024           0 :         activate_mm(active_mm, mm);
    1025             :         if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
    1026             :                 local_irq_enable();
    1027           0 :         lru_gen_add_mm(mm);
    1028           0 :         task_unlock(tsk);
    1029           0 :         lru_gen_use_mm(mm);
    1030           0 :         if (old_mm) {
    1031           0 :                 mmap_read_unlock(old_mm);
    1032           0 :                 BUG_ON(active_mm != old_mm);
    1033           0 :                 setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
    1034           0 :                 mm_update_next_owner(old_mm);
    1035           0 :                 mmput(old_mm);
    1036           0 :                 return 0;
    1037             :         }
    1038             :         mmdrop_lazy_tlb(active_mm);
    1039             :         return 0;
    1040             : }
    1041             : 
    1042           0 : static int de_thread(struct task_struct *tsk)
    1043             : {
    1044           0 :         struct signal_struct *sig = tsk->signal;
    1045           0 :         struct sighand_struct *oldsighand = tsk->sighand;
    1046           0 :         spinlock_t *lock = &oldsighand->siglock;
    1047             : 
    1048           0 :         if (thread_group_empty(tsk))
    1049             :                 goto no_thread_group;
    1050             : 
    1051             :         /*
    1052             :          * Kill all other threads in the thread group.
    1053             :          */
    1054           0 :         spin_lock_irq(lock);
    1055           0 :         if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
    1056             :                 /*
    1057             :                  * Another group action in progress, just
    1058             :                  * return so that the signal is processed.
    1059             :                  */
    1060           0 :                 spin_unlock_irq(lock);
    1061           0 :                 return -EAGAIN;
    1062             :         }
    1063             : 
    1064           0 :         sig->group_exec_task = tsk;
    1065           0 :         sig->notify_count = zap_other_threads(tsk);
    1066           0 :         if (!thread_group_leader(tsk))
    1067           0 :                 sig->notify_count--;
    1068             : 
    1069           0 :         while (sig->notify_count) {
    1070           0 :                 __set_current_state(TASK_KILLABLE);
    1071           0 :                 spin_unlock_irq(lock);
    1072           0 :                 schedule();
    1073           0 :                 if (__fatal_signal_pending(tsk))
    1074             :                         goto killed;
    1075             :                 spin_lock_irq(lock);
    1076             :         }
    1077           0 :         spin_unlock_irq(lock);
    1078             : 
    1079             :         /*
    1080             :          * At this point all other threads have exited, all we have to
    1081             :          * do is to wait for the thread group leader to become inactive,
    1082             :          * and to assume its PID:
    1083             :          */
    1084           0 :         if (!thread_group_leader(tsk)) {
    1085           0 :                 struct task_struct *leader = tsk->group_leader;
    1086             : 
    1087             :                 for (;;) {
    1088           0 :                         cgroup_threadgroup_change_begin(tsk);
    1089           0 :                         write_lock_irq(&tasklist_lock);
    1090             :                         /*
    1091             :                          * Do this under tasklist_lock to ensure that
    1092             :                          * exit_notify() can't miss ->group_exec_task
    1093             :                          */
    1094           0 :                         sig->notify_count = -1;
    1095           0 :                         if (likely(leader->exit_state))
    1096             :                                 break;
    1097           0 :                         __set_current_state(TASK_KILLABLE);
    1098           0 :                         write_unlock_irq(&tasklist_lock);
    1099           0 :                         cgroup_threadgroup_change_end(tsk);
    1100           0 :                         schedule();
    1101           0 :                         if (__fatal_signal_pending(tsk))
    1102             :                                 goto killed;
    1103             :                 }
    1104             : 
    1105             :                 /*
    1106             :                  * The only record we have of the real-time age of a
    1107             :                  * process, regardless of execs it's done, is start_time.
    1108             :                  * All the past CPU time is accumulated in signal_struct
    1109             :                  * from sister threads now dead.  But in this non-leader
    1110             :                  * exec, nothing survives from the original leader thread,
    1111             :                  * whose birth marks the true age of this process now.
    1112             :                  * When we take on its identity by switching to its PID, we
    1113             :                  * also take its birthdate (always earlier than our own).
    1114             :                  */
    1115           0 :                 tsk->start_time = leader->start_time;
    1116           0 :                 tsk->start_boottime = leader->start_boottime;
    1117             : 
    1118           0 :                 BUG_ON(!same_thread_group(leader, tsk));
    1119             :                 /*
    1120             :                  * An exec() starts a new thread group with the
    1121             :                  * TGID of the previous thread group. Rehash the
    1122             :                  * two threads with a switched PID, and release
    1123             :                  * the former thread group leader:
    1124             :                  */
    1125             : 
    1126             :                 /* Become a process group leader with the old leader's pid.
    1127             :                  * The old leader becomes a thread of the this thread group.
    1128             :                  */
    1129           0 :                 exchange_tids(tsk, leader);
    1130           0 :                 transfer_pid(leader, tsk, PIDTYPE_TGID);
    1131           0 :                 transfer_pid(leader, tsk, PIDTYPE_PGID);
    1132           0 :                 transfer_pid(leader, tsk, PIDTYPE_SID);
    1133             : 
    1134           0 :                 list_replace_rcu(&leader->tasks, &tsk->tasks);
    1135           0 :                 list_replace_init(&leader->sibling, &tsk->sibling);
    1136             : 
    1137           0 :                 tsk->group_leader = tsk;
    1138           0 :                 leader->group_leader = tsk;
    1139             : 
    1140           0 :                 tsk->exit_signal = SIGCHLD;
    1141           0 :                 leader->exit_signal = -1;
    1142             : 
    1143           0 :                 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
    1144           0 :                 leader->exit_state = EXIT_DEAD;
    1145             : 
    1146             :                 /*
    1147             :                  * We are going to release_task()->ptrace_unlink() silently,
    1148             :                  * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
    1149             :                  * the tracer won't block again waiting for this thread.
    1150             :                  */
    1151           0 :                 if (unlikely(leader->ptrace))
    1152           0 :                         __wake_up_parent(leader, leader->parent);
    1153           0 :                 write_unlock_irq(&tasklist_lock);
    1154           0 :                 cgroup_threadgroup_change_end(tsk);
    1155             : 
    1156           0 :                 release_task(leader);
    1157             :         }
    1158             : 
    1159           0 :         sig->group_exec_task = NULL;
    1160           0 :         sig->notify_count = 0;
    1161             : 
    1162             : no_thread_group:
    1163             :         /* we have changed execution domain */
    1164           0 :         tsk->exit_signal = SIGCHLD;
    1165             : 
    1166           0 :         BUG_ON(!thread_group_leader(tsk));
    1167             :         return 0;
    1168             : 
    1169             : killed:
    1170             :         /* protects against exit_notify() and __exit_signal() */
    1171           0 :         read_lock(&tasklist_lock);
    1172           0 :         sig->group_exec_task = NULL;
    1173           0 :         sig->notify_count = 0;
    1174           0 :         read_unlock(&tasklist_lock);
    1175           0 :         return -EAGAIN;
    1176             : }
    1177             : 
    1178             : 
    1179             : /*
    1180             :  * This function makes sure the current process has its own signal table,
    1181             :  * so that flush_signal_handlers can later reset the handlers without
    1182             :  * disturbing other processes.  (Other processes might share the signal
    1183             :  * table via the CLONE_SIGHAND option to clone().)
    1184             :  */
    1185           0 : static int unshare_sighand(struct task_struct *me)
    1186             : {
    1187           0 :         struct sighand_struct *oldsighand = me->sighand;
    1188             : 
    1189           0 :         if (refcount_read(&oldsighand->count) != 1) {
    1190             :                 struct sighand_struct *newsighand;
    1191             :                 /*
    1192             :                  * This ->sighand is shared with the CLONE_SIGHAND
    1193             :                  * but not CLONE_THREAD task, switch to the new one.
    1194             :                  */
    1195           0 :                 newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
    1196           0 :                 if (!newsighand)
    1197             :                         return -ENOMEM;
    1198             : 
    1199           0 :                 refcount_set(&newsighand->count, 1);
    1200             : 
    1201           0 :                 write_lock_irq(&tasklist_lock);
    1202           0 :                 spin_lock(&oldsighand->siglock);
    1203           0 :                 memcpy(newsighand->action, oldsighand->action,
    1204             :                        sizeof(newsighand->action));
    1205           0 :                 rcu_assign_pointer(me->sighand, newsighand);
    1206           0 :                 spin_unlock(&oldsighand->siglock);
    1207           0 :                 write_unlock_irq(&tasklist_lock);
    1208             : 
    1209           0 :                 __cleanup_sighand(oldsighand);
    1210             :         }
    1211             :         return 0;
    1212             : }
    1213             : 
    1214           0 : char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
    1215             : {
    1216           0 :         task_lock(tsk);
    1217             :         /* Always NUL terminated and zero-padded */
    1218           0 :         strscpy_pad(buf, tsk->comm, buf_size);
    1219           0 :         task_unlock(tsk);
    1220           0 :         return buf;
    1221             : }
    1222             : EXPORT_SYMBOL_GPL(__get_task_comm);
    1223             : 
    1224             : /*
    1225             :  * These functions flushes out all traces of the currently running executable
    1226             :  * so that a new one can be started
    1227             :  */
    1228             : 
    1229           1 : void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
    1230             : {
    1231           1 :         task_lock(tsk);
    1232           1 :         trace_task_rename(tsk, buf);
    1233           1 :         strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
    1234           1 :         task_unlock(tsk);
    1235           1 :         perf_event_comm(tsk, exec);
    1236           1 : }
    1237             : 
    1238             : /*
    1239             :  * Calling this is the point of no return. None of the failures will be
    1240             :  * seen by userspace since either the process is already taking a fatal
    1241             :  * signal (via de_thread() or coredump), or will have SEGV raised
    1242             :  * (after exec_mmap()) by search_binary_handler (see below).
    1243             :  */
    1244           0 : int begin_new_exec(struct linux_binprm * bprm)
    1245             : {
    1246           0 :         struct task_struct *me = current;
    1247             :         int retval;
    1248             : 
    1249             :         /* Once we are committed compute the creds */
    1250           0 :         retval = bprm_creds_from_file(bprm);
    1251           0 :         if (retval)
    1252             :                 return retval;
    1253             : 
    1254             :         /*
    1255             :          * Ensure all future errors are fatal.
    1256             :          */
    1257           0 :         bprm->point_of_no_return = true;
    1258             : 
    1259             :         /*
    1260             :          * Make this the only thread in the thread group.
    1261             :          */
    1262           0 :         retval = de_thread(me);
    1263           0 :         if (retval)
    1264             :                 goto out;
    1265             : 
    1266             :         /*
    1267             :          * Cancel any io_uring activity across execve
    1268             :          */
    1269           0 :         io_uring_task_cancel();
    1270             : 
    1271             :         /* Ensure the files table is not shared. */
    1272           0 :         retval = unshare_files();
    1273           0 :         if (retval)
    1274             :                 goto out;
    1275             : 
    1276             :         /*
    1277             :          * Must be called _before_ exec_mmap() as bprm->mm is
    1278             :          * not visible until then. This also enables the update
    1279             :          * to be lockless.
    1280             :          */
    1281           0 :         retval = set_mm_exe_file(bprm->mm, bprm->file);
    1282           0 :         if (retval)
    1283             :                 goto out;
    1284             : 
    1285             :         /* If the binary is not readable then enforce mm->dumpable=0 */
    1286           0 :         would_dump(bprm, bprm->file);
    1287           0 :         if (bprm->have_execfd)
    1288           0 :                 would_dump(bprm, bprm->executable);
    1289             : 
    1290             :         /*
    1291             :          * Release all of the old mmap stuff
    1292             :          */
    1293           0 :         acct_arg_size(bprm, 0);
    1294           0 :         retval = exec_mmap(bprm->mm);
    1295           0 :         if (retval)
    1296             :                 goto out;
    1297             : 
    1298           0 :         bprm->mm = NULL;
    1299             : 
    1300           0 :         retval = exec_task_namespaces();
    1301           0 :         if (retval)
    1302             :                 goto out_unlock;
    1303             : 
    1304             : #ifdef CONFIG_POSIX_TIMERS
    1305           0 :         spin_lock_irq(&me->sighand->siglock);
    1306           0 :         posix_cpu_timers_exit(me);
    1307           0 :         spin_unlock_irq(&me->sighand->siglock);
    1308           0 :         exit_itimers(me);
    1309           0 :         flush_itimer_signals();
    1310             : #endif
    1311             : 
    1312             :         /*
    1313             :          * Make the signal table private.
    1314             :          */
    1315           0 :         retval = unshare_sighand(me);
    1316           0 :         if (retval)
    1317             :                 goto out_unlock;
    1318             : 
    1319           0 :         me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
    1320             :                                         PF_NOFREEZE | PF_NO_SETAFFINITY);
    1321           0 :         flush_thread();
    1322           0 :         me->personality &= ~bprm->per_clear;
    1323             : 
    1324           0 :         clear_syscall_work_syscall_user_dispatch(me);
    1325             : 
    1326             :         /*
    1327             :          * We have to apply CLOEXEC before we change whether the process is
    1328             :          * dumpable (in setup_new_exec) to avoid a race with a process in userspace
    1329             :          * trying to access the should-be-closed file descriptors of a process
    1330             :          * undergoing exec(2).
    1331             :          */
    1332           0 :         do_close_on_exec(me->files);
    1333             : 
    1334           0 :         if (bprm->secureexec) {
    1335             :                 /* Make sure parent cannot signal privileged process. */
    1336           0 :                 me->pdeath_signal = 0;
    1337             : 
    1338             :                 /*
    1339             :                  * For secureexec, reset the stack limit to sane default to
    1340             :                  * avoid bad behavior from the prior rlimits. This has to
    1341             :                  * happen before arch_pick_mmap_layout(), which examines
    1342             :                  * RLIMIT_STACK, but after the point of no return to avoid
    1343             :                  * needing to clean up the change on failure.
    1344             :                  */
    1345           0 :                 if (bprm->rlim_stack.rlim_cur > _STK_LIM)
    1346           0 :                         bprm->rlim_stack.rlim_cur = _STK_LIM;
    1347             :         }
    1348             : 
    1349           0 :         me->sas_ss_sp = me->sas_ss_size = 0;
    1350             : 
    1351             :         /*
    1352             :          * Figure out dumpability. Note that this checking only of current
    1353             :          * is wrong, but userspace depends on it. This should be testing
    1354             :          * bprm->secureexec instead.
    1355             :          */
    1356           0 :         if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
    1357           0 :             !(uid_eq(current_euid(), current_uid()) &&
    1358           0 :               gid_eq(current_egid(), current_gid())))
    1359           0 :                 set_dumpable(current->mm, suid_dumpable);
    1360             :         else
    1361           0 :                 set_dumpable(current->mm, SUID_DUMP_USER);
    1362             : 
    1363             :         perf_event_exec();
    1364           0 :         __set_task_comm(me, kbasename(bprm->filename), true);
    1365             : 
    1366             :         /* An exec changes our domain. We are no longer part of the thread
    1367             :            group */
    1368           0 :         WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
    1369           0 :         flush_signal_handlers(me, 0);
    1370             : 
    1371           0 :         retval = set_cred_ucounts(bprm->cred);
    1372           0 :         if (retval < 0)
    1373             :                 goto out_unlock;
    1374             : 
    1375             :         /*
    1376             :          * install the new credentials for this executable
    1377             :          */
    1378           0 :         security_bprm_committing_creds(bprm);
    1379             : 
    1380           0 :         commit_creds(bprm->cred);
    1381           0 :         bprm->cred = NULL;
    1382             : 
    1383             :         /*
    1384             :          * Disable monitoring for regular users
    1385             :          * when executing setuid binaries. Must
    1386             :          * wait until new credentials are committed
    1387             :          * by commit_creds() above
    1388             :          */
    1389           0 :         if (get_dumpable(me->mm) != SUID_DUMP_USER)
    1390             :                 perf_event_exit_task(me);
    1391             :         /*
    1392             :          * cred_guard_mutex must be held at least to this point to prevent
    1393             :          * ptrace_attach() from altering our determination of the task's
    1394             :          * credentials; any time after this it may be unlocked.
    1395             :          */
    1396           0 :         security_bprm_committed_creds(bprm);
    1397             : 
    1398             :         /* Pass the opened binary to the interpreter. */
    1399           0 :         if (bprm->have_execfd) {
    1400           0 :                 retval = get_unused_fd_flags(0);
    1401           0 :                 if (retval < 0)
    1402             :                         goto out_unlock;
    1403           0 :                 fd_install(retval, bprm->executable);
    1404           0 :                 bprm->executable = NULL;
    1405           0 :                 bprm->execfd = retval;
    1406             :         }
    1407             :         return 0;
    1408             : 
    1409             : out_unlock:
    1410           0 :         up_write(&me->signal->exec_update_lock);
    1411             : out:
    1412             :         return retval;
    1413             : }
    1414             : EXPORT_SYMBOL(begin_new_exec);
    1415             : 
    1416           0 : void would_dump(struct linux_binprm *bprm, struct file *file)
    1417             : {
    1418           0 :         struct inode *inode = file_inode(file);
    1419           0 :         struct mnt_idmap *idmap = file_mnt_idmap(file);
    1420           0 :         if (inode_permission(idmap, inode, MAY_READ) < 0) {
    1421             :                 struct user_namespace *old, *user_ns;
    1422           0 :                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
    1423             : 
    1424             :                 /* Ensure mm->user_ns contains the executable */
    1425           0 :                 user_ns = old = bprm->mm->user_ns;
    1426           0 :                 while ((user_ns != &init_user_ns) &&
    1427           0 :                        !privileged_wrt_inode_uidgid(user_ns, idmap, inode))
    1428           0 :                         user_ns = user_ns->parent;
    1429             : 
    1430           0 :                 if (old != user_ns) {
    1431           0 :                         bprm->mm->user_ns = get_user_ns(user_ns);
    1432           0 :                         put_user_ns(old);
    1433             :                 }
    1434             :         }
    1435           0 : }
    1436             : EXPORT_SYMBOL(would_dump);
    1437             : 
    1438           0 : void setup_new_exec(struct linux_binprm * bprm)
    1439             : {
    1440             :         /* Setup things that can depend upon the personality */
    1441           0 :         struct task_struct *me = current;
    1442             : 
    1443           0 :         arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
    1444             : 
    1445             :         arch_setup_new_exec();
    1446             : 
    1447             :         /* Set the new mm task size. We have to do that late because it may
    1448             :          * depend on TIF_32BIT which is only updated in flush_thread() on
    1449             :          * some architectures like powerpc
    1450             :          */
    1451           0 :         me->mm->task_size = TASK_SIZE;
    1452           0 :         up_write(&me->signal->exec_update_lock);
    1453           0 :         mutex_unlock(&me->signal->cred_guard_mutex);
    1454           0 : }
    1455             : EXPORT_SYMBOL(setup_new_exec);
    1456             : 
    1457             : /* Runs immediately before start_thread() takes over. */
    1458           0 : void finalize_exec(struct linux_binprm *bprm)
    1459             : {
    1460             :         /* Store any stack rlimit changes before starting thread. */
    1461           0 :         task_lock(current->group_leader);
    1462           0 :         current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
    1463           0 :         task_unlock(current->group_leader);
    1464           0 : }
    1465             : EXPORT_SYMBOL(finalize_exec);
    1466             : 
    1467             : /*
    1468             :  * Prepare credentials and lock ->cred_guard_mutex.
    1469             :  * setup_new_exec() commits the new creds and drops the lock.
    1470             :  * Or, if exec fails before, free_bprm() should release ->cred
    1471             :  * and unlock.
    1472             :  */
    1473           0 : static int prepare_bprm_creds(struct linux_binprm *bprm)
    1474             : {
    1475           0 :         if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
    1476             :                 return -ERESTARTNOINTR;
    1477             : 
    1478           0 :         bprm->cred = prepare_exec_creds();
    1479           0 :         if (likely(bprm->cred))
    1480             :                 return 0;
    1481             : 
    1482           0 :         mutex_unlock(&current->signal->cred_guard_mutex);
    1483             :         return -ENOMEM;
    1484             : }
    1485             : 
    1486           0 : static void free_bprm(struct linux_binprm *bprm)
    1487             : {
    1488           0 :         if (bprm->mm) {
    1489           0 :                 acct_arg_size(bprm, 0);
    1490           0 :                 mmput(bprm->mm);
    1491             :         }
    1492           0 :         free_arg_pages(bprm);
    1493           0 :         if (bprm->cred) {
    1494           0 :                 mutex_unlock(&current->signal->cred_guard_mutex);
    1495           0 :                 abort_creds(bprm->cred);
    1496             :         }
    1497           0 :         if (bprm->file) {
    1498           0 :                 allow_write_access(bprm->file);
    1499           0 :                 fput(bprm->file);
    1500             :         }
    1501           0 :         if (bprm->executable)
    1502           0 :                 fput(bprm->executable);
    1503             :         /* If a binfmt changed the interp, free it. */
    1504           0 :         if (bprm->interp != bprm->filename)
    1505           0 :                 kfree(bprm->interp);
    1506           0 :         kfree(bprm->fdpath);
    1507           0 :         kfree(bprm);
    1508           0 : }
    1509             : 
    1510           0 : static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
    1511             : {
    1512           0 :         struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
    1513           0 :         int retval = -ENOMEM;
    1514           0 :         if (!bprm)
    1515             :                 goto out;
    1516             : 
    1517           0 :         if (fd == AT_FDCWD || filename->name[0] == '/') {
    1518           0 :                 bprm->filename = filename->name;
    1519             :         } else {
    1520           0 :                 if (filename->name[0] == '\0')
    1521           0 :                         bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
    1522             :                 else
    1523           0 :                         bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
    1524             :                                                   fd, filename->name);
    1525           0 :                 if (!bprm->fdpath)
    1526             :                         goto out_free;
    1527             : 
    1528           0 :                 bprm->filename = bprm->fdpath;
    1529             :         }
    1530           0 :         bprm->interp = bprm->filename;
    1531             : 
    1532           0 :         retval = bprm_mm_init(bprm);
    1533           0 :         if (retval)
    1534             :                 goto out_free;
    1535             :         return bprm;
    1536             : 
    1537             : out_free:
    1538           0 :         free_bprm(bprm);
    1539             : out:
    1540           0 :         return ERR_PTR(retval);
    1541             : }
    1542             : 
    1543           0 : int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
    1544             : {
    1545             :         /* If a binfmt changed the interp, free it first. */
    1546           0 :         if (bprm->interp != bprm->filename)
    1547           0 :                 kfree(bprm->interp);
    1548           0 :         bprm->interp = kstrdup(interp, GFP_KERNEL);
    1549           0 :         if (!bprm->interp)
    1550             :                 return -ENOMEM;
    1551           0 :         return 0;
    1552             : }
    1553             : EXPORT_SYMBOL(bprm_change_interp);
    1554             : 
    1555             : /*
    1556             :  * determine how safe it is to execute the proposed program
    1557             :  * - the caller must hold ->cred_guard_mutex to protect against
    1558             :  *   PTRACE_ATTACH or seccomp thread-sync
    1559             :  */
    1560           0 : static void check_unsafe_exec(struct linux_binprm *bprm)
    1561             : {
    1562           0 :         struct task_struct *p = current, *t;
    1563             :         unsigned n_fs;
    1564             : 
    1565           0 :         if (p->ptrace)
    1566           0 :                 bprm->unsafe |= LSM_UNSAFE_PTRACE;
    1567             : 
    1568             :         /*
    1569             :          * This isn't strictly necessary, but it makes it harder for LSMs to
    1570             :          * mess up.
    1571             :          */
    1572           0 :         if (task_no_new_privs(current))
    1573           0 :                 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
    1574             : 
    1575             :         /*
    1576             :          * If another task is sharing our fs, we cannot safely
    1577             :          * suid exec because the differently privileged task
    1578             :          * will be able to manipulate the current directory, etc.
    1579             :          * It would be nice to force an unshare instead...
    1580             :          */
    1581           0 :         t = p;
    1582           0 :         n_fs = 1;
    1583           0 :         spin_lock(&p->fs->lock);
    1584             :         rcu_read_lock();
    1585           0 :         while_each_thread(p, t) {
    1586           0 :                 if (t->fs == p->fs)
    1587           0 :                         n_fs++;
    1588             :         }
    1589             :         rcu_read_unlock();
    1590             : 
    1591           0 :         if (p->fs->users > n_fs)
    1592           0 :                 bprm->unsafe |= LSM_UNSAFE_SHARE;
    1593             :         else
    1594           0 :                 p->fs->in_exec = 1;
    1595           0 :         spin_unlock(&p->fs->lock);
    1596           0 : }
    1597             : 
    1598           0 : static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
    1599             : {
    1600             :         /* Handle suid and sgid on files */
    1601             :         struct mnt_idmap *idmap;
    1602           0 :         struct inode *inode = file_inode(file);
    1603             :         unsigned int mode;
    1604             :         vfsuid_t vfsuid;
    1605             :         vfsgid_t vfsgid;
    1606             : 
    1607           0 :         if (!mnt_may_suid(file->f_path.mnt))
    1608             :                 return;
    1609             : 
    1610           0 :         if (task_no_new_privs(current))
    1611             :                 return;
    1612             : 
    1613           0 :         mode = READ_ONCE(inode->i_mode);
    1614           0 :         if (!(mode & (S_ISUID|S_ISGID)))
    1615             :                 return;
    1616             : 
    1617           0 :         idmap = file_mnt_idmap(file);
    1618             : 
    1619             :         /* Be careful if suid/sgid is set */
    1620           0 :         inode_lock(inode);
    1621             : 
    1622             :         /* reload atomically mode/uid/gid now that lock held */
    1623           0 :         mode = inode->i_mode;
    1624           0 :         vfsuid = i_uid_into_vfsuid(idmap, inode);
    1625           0 :         vfsgid = i_gid_into_vfsgid(idmap, inode);
    1626           0 :         inode_unlock(inode);
    1627             : 
    1628             :         /* We ignore suid/sgid if there are no mappings for them in the ns */
    1629           0 :         if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) ||
    1630           0 :             !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid))
    1631             :                 return;
    1632             : 
    1633           0 :         if (mode & S_ISUID) {
    1634           0 :                 bprm->per_clear |= PER_CLEAR_ON_SETID;
    1635           0 :                 bprm->cred->euid = vfsuid_into_kuid(vfsuid);
    1636             :         }
    1637             : 
    1638           0 :         if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
    1639           0 :                 bprm->per_clear |= PER_CLEAR_ON_SETID;
    1640           0 :                 bprm->cred->egid = vfsgid_into_kgid(vfsgid);
    1641             :         }
    1642             : }
    1643             : 
    1644             : /*
    1645             :  * Compute brpm->cred based upon the final binary.
    1646             :  */
    1647           0 : static int bprm_creds_from_file(struct linux_binprm *bprm)
    1648             : {
    1649             :         /* Compute creds based on which file? */
    1650           0 :         struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
    1651             : 
    1652           0 :         bprm_fill_uid(bprm, file);
    1653           0 :         return security_bprm_creds_from_file(bprm, file);
    1654             : }
    1655             : 
    1656             : /*
    1657             :  * Fill the binprm structure from the inode.
    1658             :  * Read the first BINPRM_BUF_SIZE bytes
    1659             :  *
    1660             :  * This may be called multiple times for binary chains (scripts for example).
    1661             :  */
    1662           0 : static int prepare_binprm(struct linux_binprm *bprm)
    1663             : {
    1664           0 :         loff_t pos = 0;
    1665             : 
    1666           0 :         memset(bprm->buf, 0, BINPRM_BUF_SIZE);
    1667           0 :         return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
    1668             : }
    1669             : 
    1670             : /*
    1671             :  * Arguments are '\0' separated strings found at the location bprm->p
    1672             :  * points to; chop off the first by relocating brpm->p to right after
    1673             :  * the first '\0' encountered.
    1674             :  */
    1675           0 : int remove_arg_zero(struct linux_binprm *bprm)
    1676             : {
    1677           0 :         int ret = 0;
    1678             :         unsigned long offset;
    1679             :         char *kaddr;
    1680             :         struct page *page;
    1681             : 
    1682           0 :         if (!bprm->argc)
    1683             :                 return 0;
    1684             : 
    1685             :         do {
    1686           0 :                 offset = bprm->p & ~PAGE_MASK;
    1687           0 :                 page = get_arg_page(bprm, bprm->p, 0);
    1688           0 :                 if (!page) {
    1689             :                         ret = -EFAULT;
    1690             :                         goto out;
    1691             :                 }
    1692           0 :                 kaddr = kmap_local_page(page);
    1693             : 
    1694           0 :                 for (; offset < PAGE_SIZE && kaddr[offset];
    1695           0 :                                 offset++, bprm->p++)
    1696             :                         ;
    1697             : 
    1698           0 :                 kunmap_local(kaddr);
    1699           0 :                 put_arg_page(page);
    1700           0 :         } while (offset == PAGE_SIZE);
    1701             : 
    1702           0 :         bprm->p++;
    1703           0 :         bprm->argc--;
    1704           0 :         ret = 0;
    1705             : 
    1706             : out:
    1707             :         return ret;
    1708             : }
    1709             : EXPORT_SYMBOL(remove_arg_zero);
    1710             : 
    1711             : #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
    1712             : /*
    1713             :  * cycle the list of binary formats handler, until one recognizes the image
    1714             :  */
    1715           0 : static int search_binary_handler(struct linux_binprm *bprm)
    1716             : {
    1717           0 :         bool need_retry = IS_ENABLED(CONFIG_MODULES);
    1718             :         struct linux_binfmt *fmt;
    1719             :         int retval;
    1720             : 
    1721           0 :         retval = prepare_binprm(bprm);
    1722           0 :         if (retval < 0)
    1723             :                 return retval;
    1724             : 
    1725           0 :         retval = security_bprm_check(bprm);
    1726             :         if (retval)
    1727             :                 return retval;
    1728             : 
    1729           0 :         retval = -ENOENT;
    1730             :  retry:
    1731           0 :         read_lock(&binfmt_lock);
    1732           0 :         list_for_each_entry(fmt, &formats, lh) {
    1733           0 :                 if (!try_module_get(fmt->module))
    1734             :                         continue;
    1735           0 :                 read_unlock(&binfmt_lock);
    1736             : 
    1737           0 :                 retval = fmt->load_binary(bprm);
    1738             : 
    1739           0 :                 read_lock(&binfmt_lock);
    1740           0 :                 put_binfmt(fmt);
    1741           0 :                 if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
    1742           0 :                         read_unlock(&binfmt_lock);
    1743           0 :                         return retval;
    1744             :                 }
    1745             :         }
    1746           0 :         read_unlock(&binfmt_lock);
    1747             : 
    1748             :         if (need_retry) {
    1749             :                 if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
    1750             :                     printable(bprm->buf[2]) && printable(bprm->buf[3]))
    1751             :                         return retval;
    1752             :                 if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
    1753             :                         return retval;
    1754             :                 need_retry = false;
    1755             :                 goto retry;
    1756             :         }
    1757             : 
    1758           0 :         return retval;
    1759             : }
    1760             : 
    1761             : /* binfmt handlers will call back into begin_new_exec() on success. */
    1762           0 : static int exec_binprm(struct linux_binprm *bprm)
    1763             : {
    1764             :         pid_t old_pid, old_vpid;
    1765             :         int ret, depth;
    1766             : 
    1767             :         /* Need to fetch pid before load_binary changes it */
    1768           0 :         old_pid = current->pid;
    1769             :         rcu_read_lock();
    1770           0 :         old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
    1771             :         rcu_read_unlock();
    1772             : 
    1773             :         /* This allows 4 levels of binfmt rewrites before failing hard. */
    1774           0 :         for (depth = 0;; depth++) {
    1775             :                 struct file *exec;
    1776           0 :                 if (depth > 5)
    1777             :                         return -ELOOP;
    1778             : 
    1779           0 :                 ret = search_binary_handler(bprm);
    1780           0 :                 if (ret < 0)
    1781             :                         return ret;
    1782           0 :                 if (!bprm->interpreter)
    1783             :                         break;
    1784             : 
    1785           0 :                 exec = bprm->file;
    1786           0 :                 bprm->file = bprm->interpreter;
    1787           0 :                 bprm->interpreter = NULL;
    1788             : 
    1789           0 :                 allow_write_access(exec);
    1790           0 :                 if (unlikely(bprm->have_execfd)) {
    1791           0 :                         if (bprm->executable) {
    1792           0 :                                 fput(exec);
    1793           0 :                                 return -ENOEXEC;
    1794             :                         }
    1795           0 :                         bprm->executable = exec;
    1796             :                 } else
    1797           0 :                         fput(exec);
    1798             :         }
    1799             : 
    1800           0 :         audit_bprm(bprm);
    1801           0 :         trace_sched_process_exec(current, old_pid, bprm);
    1802           0 :         ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
    1803           0 :         proc_exec_connector(current);
    1804           0 :         return 0;
    1805             : }
    1806             : 
    1807             : /*
    1808             :  * sys_execve() executes a new program.
    1809             :  */
    1810           0 : static int bprm_execve(struct linux_binprm *bprm,
    1811             :                        int fd, struct filename *filename, int flags)
    1812             : {
    1813             :         struct file *file;
    1814             :         int retval;
    1815             : 
    1816           0 :         retval = prepare_bprm_creds(bprm);
    1817           0 :         if (retval)
    1818             :                 return retval;
    1819             : 
    1820             :         /*
    1821             :          * Check for unsafe execution states before exec_binprm(), which
    1822             :          * will call back into begin_new_exec(), into bprm_creds_from_file(),
    1823             :          * where setuid-ness is evaluated.
    1824             :          */
    1825           0 :         check_unsafe_exec(bprm);
    1826           0 :         current->in_execve = 1;
    1827           0 :         sched_mm_cid_before_execve(current);
    1828             : 
    1829           0 :         file = do_open_execat(fd, filename, flags);
    1830           0 :         retval = PTR_ERR(file);
    1831           0 :         if (IS_ERR(file))
    1832             :                 goto out_unmark;
    1833             : 
    1834             :         sched_exec();
    1835             : 
    1836           0 :         bprm->file = file;
    1837             :         /*
    1838             :          * Record that a name derived from an O_CLOEXEC fd will be
    1839             :          * inaccessible after exec.  This allows the code in exec to
    1840             :          * choose to fail when the executable is not mmaped into the
    1841             :          * interpreter and an open file descriptor is not passed to
    1842             :          * the interpreter.  This makes for a better user experience
    1843             :          * than having the interpreter start and then immediately fail
    1844             :          * when it finds the executable is inaccessible.
    1845             :          */
    1846           0 :         if (bprm->fdpath && get_close_on_exec(fd))
    1847           0 :                 bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
    1848             : 
    1849             :         /* Set the unchanging part of bprm->cred */
    1850           0 :         retval = security_bprm_creds_for_exec(bprm);
    1851             :         if (retval)
    1852             :                 goto out;
    1853             : 
    1854           0 :         retval = exec_binprm(bprm);
    1855           0 :         if (retval < 0)
    1856             :                 goto out;
    1857             : 
    1858           0 :         sched_mm_cid_after_execve(current);
    1859             :         /* execve succeeded */
    1860           0 :         current->fs->in_exec = 0;
    1861           0 :         current->in_execve = 0;
    1862           0 :         rseq_execve(current);
    1863           0 :         user_events_execve(current);
    1864           0 :         acct_update_integrals(current);
    1865           0 :         task_numa_free(current, false);
    1866           0 :         return retval;
    1867             : 
    1868             : out:
    1869             :         /*
    1870             :          * If past the point of no return ensure the code never
    1871             :          * returns to the userspace process.  Use an existing fatal
    1872             :          * signal if present otherwise terminate the process with
    1873             :          * SIGSEGV.
    1874             :          */
    1875           0 :         if (bprm->point_of_no_return && !fatal_signal_pending(current))
    1876           0 :                 force_fatal_sig(SIGSEGV);
    1877             : 
    1878             : out_unmark:
    1879           0 :         sched_mm_cid_after_execve(current);
    1880           0 :         current->fs->in_exec = 0;
    1881           0 :         current->in_execve = 0;
    1882             : 
    1883           0 :         return retval;
    1884             : }
    1885             : 
    1886           0 : static int do_execveat_common(int fd, struct filename *filename,
    1887             :                               struct user_arg_ptr argv,
    1888             :                               struct user_arg_ptr envp,
    1889             :                               int flags)
    1890             : {
    1891             :         struct linux_binprm *bprm;
    1892             :         int retval;
    1893             : 
    1894           0 :         if (IS_ERR(filename))
    1895           0 :                 return PTR_ERR(filename);
    1896             : 
    1897             :         /*
    1898             :          * We move the actual failure in case of RLIMIT_NPROC excess from
    1899             :          * set*uid() to execve() because too many poorly written programs
    1900             :          * don't check setuid() return code.  Here we additionally recheck
    1901             :          * whether NPROC limit is still exceeded.
    1902             :          */
    1903           0 :         if ((current->flags & PF_NPROC_EXCEEDED) &&
    1904           0 :             is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
    1905             :                 retval = -EAGAIN;
    1906             :                 goto out_ret;
    1907             :         }
    1908             : 
    1909             :         /* We're below the limit (still or again), so we don't want to make
    1910             :          * further execve() calls fail. */
    1911           0 :         current->flags &= ~PF_NPROC_EXCEEDED;
    1912             : 
    1913           0 :         bprm = alloc_bprm(fd, filename);
    1914           0 :         if (IS_ERR(bprm)) {
    1915           0 :                 retval = PTR_ERR(bprm);
    1916           0 :                 goto out_ret;
    1917             :         }
    1918             : 
    1919           0 :         retval = count(argv, MAX_ARG_STRINGS);
    1920           0 :         if (retval == 0)
    1921           0 :                 pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
    1922             :                              current->comm, bprm->filename);
    1923           0 :         if (retval < 0)
    1924             :                 goto out_free;
    1925           0 :         bprm->argc = retval;
    1926             : 
    1927           0 :         retval = count(envp, MAX_ARG_STRINGS);
    1928           0 :         if (retval < 0)
    1929             :                 goto out_free;
    1930           0 :         bprm->envc = retval;
    1931             : 
    1932           0 :         retval = bprm_stack_limits(bprm);
    1933           0 :         if (retval < 0)
    1934             :                 goto out_free;
    1935             : 
    1936           0 :         retval = copy_string_kernel(bprm->filename, bprm);
    1937           0 :         if (retval < 0)
    1938             :                 goto out_free;
    1939           0 :         bprm->exec = bprm->p;
    1940             : 
    1941           0 :         retval = copy_strings(bprm->envc, envp, bprm);
    1942           0 :         if (retval < 0)
    1943             :                 goto out_free;
    1944             : 
    1945           0 :         retval = copy_strings(bprm->argc, argv, bprm);
    1946           0 :         if (retval < 0)
    1947             :                 goto out_free;
    1948             : 
    1949             :         /*
    1950             :          * When argv is empty, add an empty string ("") as argv[0] to
    1951             :          * ensure confused userspace programs that start processing
    1952             :          * from argv[1] won't end up walking envp. See also
    1953             :          * bprm_stack_limits().
    1954             :          */
    1955           0 :         if (bprm->argc == 0) {
    1956           0 :                 retval = copy_string_kernel("", bprm);
    1957           0 :                 if (retval < 0)
    1958             :                         goto out_free;
    1959           0 :                 bprm->argc = 1;
    1960             :         }
    1961             : 
    1962           0 :         retval = bprm_execve(bprm, fd, filename, flags);
    1963             : out_free:
    1964           0 :         free_bprm(bprm);
    1965             : 
    1966             : out_ret:
    1967           0 :         putname(filename);
    1968           0 :         return retval;
    1969             : }
    1970             : 
    1971           0 : int kernel_execve(const char *kernel_filename,
    1972             :                   const char *const *argv, const char *const *envp)
    1973             : {
    1974             :         struct filename *filename;
    1975             :         struct linux_binprm *bprm;
    1976           0 :         int fd = AT_FDCWD;
    1977             :         int retval;
    1978             : 
    1979             :         /* It is non-sense for kernel threads to call execve */
    1980           0 :         if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
    1981             :                 return -EINVAL;
    1982             : 
    1983           0 :         filename = getname_kernel(kernel_filename);
    1984           0 :         if (IS_ERR(filename))
    1985           0 :                 return PTR_ERR(filename);
    1986             : 
    1987           0 :         bprm = alloc_bprm(fd, filename);
    1988           0 :         if (IS_ERR(bprm)) {
    1989           0 :                 retval = PTR_ERR(bprm);
    1990           0 :                 goto out_ret;
    1991             :         }
    1992             : 
    1993           0 :         retval = count_strings_kernel(argv);
    1994           0 :         if (WARN_ON_ONCE(retval == 0))
    1995           0 :                 retval = -EINVAL;
    1996           0 :         if (retval < 0)
    1997             :                 goto out_free;
    1998           0 :         bprm->argc = retval;
    1999             : 
    2000           0 :         retval = count_strings_kernel(envp);
    2001           0 :         if (retval < 0)
    2002             :                 goto out_free;
    2003           0 :         bprm->envc = retval;
    2004             : 
    2005           0 :         retval = bprm_stack_limits(bprm);
    2006           0 :         if (retval < 0)
    2007             :                 goto out_free;
    2008             : 
    2009           0 :         retval = copy_string_kernel(bprm->filename, bprm);
    2010           0 :         if (retval < 0)
    2011             :                 goto out_free;
    2012           0 :         bprm->exec = bprm->p;
    2013             : 
    2014           0 :         retval = copy_strings_kernel(bprm->envc, envp, bprm);
    2015           0 :         if (retval < 0)
    2016             :                 goto out_free;
    2017             : 
    2018           0 :         retval = copy_strings_kernel(bprm->argc, argv, bprm);
    2019           0 :         if (retval < 0)
    2020             :                 goto out_free;
    2021             : 
    2022           0 :         retval = bprm_execve(bprm, fd, filename, 0);
    2023             : out_free:
    2024           0 :         free_bprm(bprm);
    2025             : out_ret:
    2026           0 :         putname(filename);
    2027           0 :         return retval;
    2028             : }
    2029             : 
    2030             : static int do_execve(struct filename *filename,
    2031             :         const char __user *const __user *__argv,
    2032             :         const char __user *const __user *__envp)
    2033             : {
    2034           0 :         struct user_arg_ptr argv = { .ptr.native = __argv };
    2035           0 :         struct user_arg_ptr envp = { .ptr.native = __envp };
    2036           0 :         return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
    2037             : }
    2038             : 
    2039             : static int do_execveat(int fd, struct filename *filename,
    2040             :                 const char __user *const __user *__argv,
    2041             :                 const char __user *const __user *__envp,
    2042             :                 int flags)
    2043             : {
    2044           0 :         struct user_arg_ptr argv = { .ptr.native = __argv };
    2045           0 :         struct user_arg_ptr envp = { .ptr.native = __envp };
    2046             : 
    2047           0 :         return do_execveat_common(fd, filename, argv, envp, flags);
    2048             : }
    2049             : 
    2050             : #ifdef CONFIG_COMPAT
    2051             : static int compat_do_execve(struct filename *filename,
    2052             :         const compat_uptr_t __user *__argv,
    2053             :         const compat_uptr_t __user *__envp)
    2054             : {
    2055             :         struct user_arg_ptr argv = {
    2056             :                 .is_compat = true,
    2057             :                 .ptr.compat = __argv,
    2058             :         };
    2059             :         struct user_arg_ptr envp = {
    2060             :                 .is_compat = true,
    2061             :                 .ptr.compat = __envp,
    2062             :         };
    2063             :         return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
    2064             : }
    2065             : 
    2066             : static int compat_do_execveat(int fd, struct filename *filename,
    2067             :                               const compat_uptr_t __user *__argv,
    2068             :                               const compat_uptr_t __user *__envp,
    2069             :                               int flags)
    2070             : {
    2071             :         struct user_arg_ptr argv = {
    2072             :                 .is_compat = true,
    2073             :                 .ptr.compat = __argv,
    2074             :         };
    2075             :         struct user_arg_ptr envp = {
    2076             :                 .is_compat = true,
    2077             :                 .ptr.compat = __envp,
    2078             :         };
    2079             :         return do_execveat_common(fd, filename, argv, envp, flags);
    2080             : }
    2081             : #endif
    2082             : 
    2083           0 : void set_binfmt(struct linux_binfmt *new)
    2084             : {
    2085           0 :         struct mm_struct *mm = current->mm;
    2086             : 
    2087           0 :         if (mm->binfmt)
    2088             :                 module_put(mm->binfmt->module);
    2089             : 
    2090           0 :         mm->binfmt = new;
    2091             :         if (new)
    2092             :                 __module_get(new->module);
    2093           0 : }
    2094             : EXPORT_SYMBOL(set_binfmt);
    2095             : 
    2096             : /*
    2097             :  * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
    2098             :  */
    2099           0 : void set_dumpable(struct mm_struct *mm, int value)
    2100             : {
    2101           0 :         if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
    2102             :                 return;
    2103             : 
    2104           0 :         set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
    2105             : }
    2106             : 
    2107           0 : SYSCALL_DEFINE3(execve,
    2108             :                 const char __user *, filename,
    2109             :                 const char __user *const __user *, argv,
    2110             :                 const char __user *const __user *, envp)
    2111             : {
    2112           0 :         return do_execve(getname(filename), argv, envp);
    2113             : }
    2114             : 
    2115           0 : SYSCALL_DEFINE5(execveat,
    2116             :                 int, fd, const char __user *, filename,
    2117             :                 const char __user *const __user *, argv,
    2118             :                 const char __user *const __user *, envp,
    2119             :                 int, flags)
    2120             : {
    2121           0 :         return do_execveat(fd,
    2122             :                            getname_uflags(filename, flags),
    2123             :                            argv, envp, flags);
    2124             : }
    2125             : 
    2126             : #ifdef CONFIG_COMPAT
    2127             : COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
    2128             :         const compat_uptr_t __user *, argv,
    2129             :         const compat_uptr_t __user *, envp)
    2130             : {
    2131             :         return compat_do_execve(getname(filename), argv, envp);
    2132             : }
    2133             : 
    2134             : COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
    2135             :                        const char __user *, filename,
    2136             :                        const compat_uptr_t __user *, argv,
    2137             :                        const compat_uptr_t __user *, envp,
    2138             :                        int,  flags)
    2139             : {
    2140             :         return compat_do_execveat(fd,
    2141             :                                   getname_uflags(filename, flags),
    2142             :                                   argv, envp, flags);
    2143             : }
    2144             : #endif
    2145             : 
    2146             : #ifdef CONFIG_SYSCTL
    2147             : 
    2148           0 : static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
    2149             :                 void *buffer, size_t *lenp, loff_t *ppos)
    2150             : {
    2151           0 :         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
    2152             : 
    2153           0 :         if (!error)
    2154           0 :                 validate_coredump_safety();
    2155           0 :         return error;
    2156             : }
    2157             : 
    2158             : static struct ctl_table fs_exec_sysctls[] = {
    2159             :         {
    2160             :                 .procname       = "suid_dumpable",
    2161             :                 .data           = &suid_dumpable,
    2162             :                 .maxlen         = sizeof(int),
    2163             :                 .mode           = 0644,
    2164             :                 .proc_handler   = proc_dointvec_minmax_coredump,
    2165             :                 .extra1         = SYSCTL_ZERO,
    2166             :                 .extra2         = SYSCTL_TWO,
    2167             :         },
    2168             :         { }
    2169             : };
    2170             : 
    2171           1 : static int __init init_fs_exec_sysctls(void)
    2172             : {
    2173           1 :         register_sysctl_init("fs", fs_exec_sysctls);
    2174           1 :         return 0;
    2175             : }
    2176             : 
    2177             : fs_initcall(init_fs_exec_sysctls);
    2178             : #endif /* CONFIG_SYSCTL */

Generated by: LCOV version 1.14