LCOV - code coverage report
Current view: top level - fs - exec.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 16 713 2.2 %
Date: 2023-04-06 08:38:28 Functions: 3 48 6.2 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  *  linux/fs/exec.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  */
       7             : 
       8             : /*
       9             :  * #!-checking implemented by tytso.
      10             :  */
      11             : /*
      12             :  * Demand-loading implemented 01.12.91 - no need to read anything but
      13             :  * the header into memory. The inode of the executable is put into
      14             :  * "current->executable", and page faults do the actual loading. Clean.
      15             :  *
      16             :  * Once more I can proudly say that linux stood up to being changed: it
      17             :  * was less than 2 hours work to get demand-loading completely implemented.
      18             :  *
      19             :  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
      20             :  * current->executable is only used by the procfs.  This allows a dispatch
      21             :  * table to check for several different types  of binary formats.  We keep
      22             :  * trying until we recognize the file or we run out of supported binary
      23             :  * formats.
      24             :  */
      25             : 
      26             : #include <linux/kernel_read_file.h>
      27             : #include <linux/slab.h>
      28             : #include <linux/file.h>
      29             : #include <linux/fdtable.h>
      30             : #include <linux/mm.h>
      31             : #include <linux/stat.h>
      32             : #include <linux/fcntl.h>
      33             : #include <linux/swap.h>
      34             : #include <linux/string.h>
      35             : #include <linux/init.h>
      36             : #include <linux/sched/mm.h>
      37             : #include <linux/sched/coredump.h>
      38             : #include <linux/sched/signal.h>
      39             : #include <linux/sched/numa_balancing.h>
      40             : #include <linux/sched/task.h>
      41             : #include <linux/pagemap.h>
      42             : #include <linux/perf_event.h>
      43             : #include <linux/highmem.h>
      44             : #include <linux/spinlock.h>
      45             : #include <linux/key.h>
      46             : #include <linux/personality.h>
      47             : #include <linux/binfmts.h>
      48             : #include <linux/utsname.h>
      49             : #include <linux/pid_namespace.h>
      50             : #include <linux/module.h>
      51             : #include <linux/namei.h>
      52             : #include <linux/mount.h>
      53             : #include <linux/security.h>
      54             : #include <linux/syscalls.h>
      55             : #include <linux/tsacct_kern.h>
      56             : #include <linux/cn_proc.h>
      57             : #include <linux/audit.h>
      58             : #include <linux/kmod.h>
      59             : #include <linux/fsnotify.h>
      60             : #include <linux/fs_struct.h>
      61             : #include <linux/oom.h>
      62             : #include <linux/compat.h>
      63             : #include <linux/vmalloc.h>
      64             : #include <linux/io_uring.h>
      65             : #include <linux/syscall_user_dispatch.h>
      66             : #include <linux/coredump.h>
      67             : #include <linux/time_namespace.h>
      68             : 
      69             : #include <linux/uaccess.h>
      70             : #include <asm/mmu_context.h>
      71             : #include <asm/tlb.h>
      72             : 
      73             : #include <trace/events/task.h>
      74             : #include "internal.h"
      75             : 
      76             : #include <trace/events/sched.h>
      77             : 
      78             : static int bprm_creds_from_file(struct linux_binprm *bprm);
      79             : 
      80             : int suid_dumpable = 0;
      81             : 
      82             : static LIST_HEAD(formats);
      83             : static DEFINE_RWLOCK(binfmt_lock);
      84             : 
      85           2 : void __register_binfmt(struct linux_binfmt * fmt, int insert)
      86             : {
      87           2 :         write_lock(&binfmt_lock);
      88           2 :         insert ? list_add(&fmt->lh, &formats) :
      89           2 :                  list_add_tail(&fmt->lh, &formats);
      90           2 :         write_unlock(&binfmt_lock);
      91           2 : }
      92             : 
      93             : EXPORT_SYMBOL(__register_binfmt);
      94             : 
      95           0 : void unregister_binfmt(struct linux_binfmt * fmt)
      96             : {
      97           0 :         write_lock(&binfmt_lock);
      98           0 :         list_del(&fmt->lh);
      99           0 :         write_unlock(&binfmt_lock);
     100           0 : }
     101             : 
     102             : EXPORT_SYMBOL(unregister_binfmt);
     103             : 
     104             : static inline void put_binfmt(struct linux_binfmt * fmt)
     105             : {
     106           0 :         module_put(fmt->module);
     107             : }
     108             : 
     109           0 : bool path_noexec(const struct path *path)
     110             : {
     111           0 :         return (path->mnt->mnt_flags & MNT_NOEXEC) ||
     112           0 :                (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
     113             : }
     114             : 
     115             : #ifdef CONFIG_USELIB
     116             : /*
     117             :  * Note that a shared library must be both readable and executable due to
     118             :  * security reasons.
     119             :  *
     120             :  * Also note that we take the address to load from the file itself.
     121             :  */
     122             : SYSCALL_DEFINE1(uselib, const char __user *, library)
     123             : {
     124             :         struct linux_binfmt *fmt;
     125             :         struct file *file;
     126             :         struct filename *tmp = getname(library);
     127             :         int error = PTR_ERR(tmp);
     128             :         static const struct open_flags uselib_flags = {
     129             :                 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
     130             :                 .acc_mode = MAY_READ | MAY_EXEC,
     131             :                 .intent = LOOKUP_OPEN,
     132             :                 .lookup_flags = LOOKUP_FOLLOW,
     133             :         };
     134             : 
     135             :         if (IS_ERR(tmp))
     136             :                 goto out;
     137             : 
     138             :         file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
     139             :         putname(tmp);
     140             :         error = PTR_ERR(file);
     141             :         if (IS_ERR(file))
     142             :                 goto out;
     143             : 
     144             :         /*
     145             :          * may_open() has already checked for this, so it should be
     146             :          * impossible to trip now. But we need to be extra cautious
     147             :          * and check again at the very end too.
     148             :          */
     149             :         error = -EACCES;
     150             :         if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
     151             :                          path_noexec(&file->f_path)))
     152             :                 goto exit;
     153             : 
     154             :         fsnotify_open(file);
     155             : 
     156             :         error = -ENOEXEC;
     157             : 
     158             :         read_lock(&binfmt_lock);
     159             :         list_for_each_entry(fmt, &formats, lh) {
     160             :                 if (!fmt->load_shlib)
     161             :                         continue;
     162             :                 if (!try_module_get(fmt->module))
     163             :                         continue;
     164             :                 read_unlock(&binfmt_lock);
     165             :                 error = fmt->load_shlib(file);
     166             :                 read_lock(&binfmt_lock);
     167             :                 put_binfmt(fmt);
     168             :                 if (error != -ENOEXEC)
     169             :                         break;
     170             :         }
     171             :         read_unlock(&binfmt_lock);
     172             : exit:
     173             :         fput(file);
     174             : out:
     175             :         return error;
     176             : }
     177             : #endif /* #ifdef CONFIG_USELIB */
     178             : 
     179             : #ifdef CONFIG_MMU
     180             : /*
     181             :  * The nascent bprm->mm is not visible until exec_mmap() but it can
     182             :  * use a lot of memory, account these pages in current->mm temporary
     183             :  * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
     184             :  * change the counter back via acct_arg_size(0).
     185             :  */
     186           0 : static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
     187             : {
     188           0 :         struct mm_struct *mm = current->mm;
     189           0 :         long diff = (long)(pages - bprm->vma_pages);
     190             : 
     191           0 :         if (!mm || !diff)
     192             :                 return;
     193             : 
     194           0 :         bprm->vma_pages = pages;
     195           0 :         add_mm_counter(mm, MM_ANONPAGES, diff);
     196             : }
     197             : 
     198           0 : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
     199             :                 int write)
     200             : {
     201             :         struct page *page;
     202             :         int ret;
     203           0 :         unsigned int gup_flags = 0;
     204             : 
     205             : #ifdef CONFIG_STACK_GROWSUP
     206             :         if (write) {
     207             :                 ret = expand_downwards(bprm->vma, pos);
     208             :                 if (ret < 0)
     209             :                         return NULL;
     210             :         }
     211             : #endif
     212             : 
     213           0 :         if (write)
     214           0 :                 gup_flags |= FOLL_WRITE;
     215             : 
     216             :         /*
     217             :          * We are doing an exec().  'current' is the process
     218             :          * doing the exec and bprm->mm is the new process's mm.
     219             :          */
     220           0 :         mmap_read_lock(bprm->mm);
     221           0 :         ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
     222             :                         &page, NULL, NULL);
     223           0 :         mmap_read_unlock(bprm->mm);
     224           0 :         if (ret <= 0)
     225             :                 return NULL;
     226             : 
     227           0 :         if (write)
     228           0 :                 acct_arg_size(bprm, vma_pages(bprm->vma));
     229             : 
     230           0 :         return page;
     231             : }
     232             : 
     233             : static void put_arg_page(struct page *page)
     234             : {
     235           0 :         put_page(page);
     236             : }
     237             : 
     238             : static void free_arg_pages(struct linux_binprm *bprm)
     239             : {
     240             : }
     241             : 
     242             : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
     243             :                 struct page *page)
     244             : {
     245           0 :         flush_cache_page(bprm->vma, pos, page_to_pfn(page));
     246             : }
     247             : 
     248           0 : static int __bprm_mm_init(struct linux_binprm *bprm)
     249             : {
     250             :         int err;
     251           0 :         struct vm_area_struct *vma = NULL;
     252           0 :         struct mm_struct *mm = bprm->mm;
     253             : 
     254           0 :         bprm->vma = vma = vm_area_alloc(mm);
     255           0 :         if (!vma)
     256             :                 return -ENOMEM;
     257           0 :         vma_set_anonymous(vma);
     258             : 
     259           0 :         if (mmap_write_lock_killable(mm)) {
     260             :                 err = -EINTR;
     261             :                 goto err_free;
     262             :         }
     263             : 
     264             :         /*
     265             :          * Place the stack at the largest stack address the architecture
     266             :          * supports. Later, we'll move this to an appropriate place. We don't
     267             :          * use STACK_TOP because that can depend on attributes which aren't
     268             :          * configured yet.
     269             :          */
     270             :         BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
     271           0 :         vma->vm_end = STACK_TOP_MAX;
     272           0 :         vma->vm_start = vma->vm_end - PAGE_SIZE;
     273           0 :         vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
     274           0 :         vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
     275             : 
     276           0 :         err = insert_vm_struct(mm, vma);
     277           0 :         if (err)
     278             :                 goto err;
     279             : 
     280           0 :         mm->stack_vm = mm->total_vm = 1;
     281           0 :         mmap_write_unlock(mm);
     282           0 :         bprm->p = vma->vm_end - sizeof(void *);
     283           0 :         return 0;
     284             : err:
     285             :         mmap_write_unlock(mm);
     286             : err_free:
     287           0 :         bprm->vma = NULL;
     288           0 :         vm_area_free(vma);
     289           0 :         return err;
     290             : }
     291             : 
     292             : static bool valid_arg_len(struct linux_binprm *bprm, long len)
     293             : {
     294           0 :         return len <= MAX_ARG_STRLEN;
     295             : }
     296             : 
     297             : #else
     298             : 
     299             : static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
     300             : {
     301             : }
     302             : 
     303             : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
     304             :                 int write)
     305             : {
     306             :         struct page *page;
     307             : 
     308             :         page = bprm->page[pos / PAGE_SIZE];
     309             :         if (!page && write) {
     310             :                 page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
     311             :                 if (!page)
     312             :                         return NULL;
     313             :                 bprm->page[pos / PAGE_SIZE] = page;
     314             :         }
     315             : 
     316             :         return page;
     317             : }
     318             : 
     319             : static void put_arg_page(struct page *page)
     320             : {
     321             : }
     322             : 
     323             : static void free_arg_page(struct linux_binprm *bprm, int i)
     324             : {
     325             :         if (bprm->page[i]) {
     326             :                 __free_page(bprm->page[i]);
     327             :                 bprm->page[i] = NULL;
     328             :         }
     329             : }
     330             : 
     331             : static void free_arg_pages(struct linux_binprm *bprm)
     332             : {
     333             :         int i;
     334             : 
     335             :         for (i = 0; i < MAX_ARG_PAGES; i++)
     336             :                 free_arg_page(bprm, i);
     337             : }
     338             : 
     339             : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
     340             :                 struct page *page)
     341             : {
     342             : }
     343             : 
     344             : static int __bprm_mm_init(struct linux_binprm *bprm)
     345             : {
     346             :         bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
     347             :         return 0;
     348             : }
     349             : 
     350             : static bool valid_arg_len(struct linux_binprm *bprm, long len)
     351             : {
     352             :         return len <= bprm->p;
     353             : }
     354             : 
     355             : #endif /* CONFIG_MMU */
     356             : 
     357             : /*
     358             :  * Create a new mm_struct and populate it with a temporary stack
     359             :  * vm_area_struct.  We don't have enough context at this point to set the stack
     360             :  * flags, permissions, and offset, so we use temporary values.  We'll update
     361             :  * them later in setup_arg_pages().
     362             :  */
     363           0 : static int bprm_mm_init(struct linux_binprm *bprm)
     364             : {
     365             :         int err;
     366           0 :         struct mm_struct *mm = NULL;
     367             : 
     368           0 :         bprm->mm = mm = mm_alloc();
     369           0 :         err = -ENOMEM;
     370           0 :         if (!mm)
     371             :                 goto err;
     372             : 
     373             :         /* Save current stack limit for all calculations made during exec. */
     374           0 :         task_lock(current->group_leader);
     375           0 :         bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
     376           0 :         task_unlock(current->group_leader);
     377             : 
     378           0 :         err = __bprm_mm_init(bprm);
     379           0 :         if (err)
     380             :                 goto err;
     381             : 
     382             :         return 0;
     383             : 
     384             : err:
     385           0 :         if (mm) {
     386           0 :                 bprm->mm = NULL;
     387             :                 mmdrop(mm);
     388             :         }
     389             : 
     390             :         return err;
     391             : }
     392             : 
     393             : struct user_arg_ptr {
     394             : #ifdef CONFIG_COMPAT
     395             :         bool is_compat;
     396             : #endif
     397             :         union {
     398             :                 const char __user *const __user *native;
     399             : #ifdef CONFIG_COMPAT
     400             :                 const compat_uptr_t __user *compat;
     401             : #endif
     402             :         } ptr;
     403             : };
     404             : 
     405           0 : static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
     406             : {
     407             :         const char __user *native;
     408             : 
     409             : #ifdef CONFIG_COMPAT
     410             :         if (unlikely(argv.is_compat)) {
     411             :                 compat_uptr_t compat;
     412             : 
     413             :                 if (get_user(compat, argv.ptr.compat + nr))
     414             :                         return ERR_PTR(-EFAULT);
     415             : 
     416             :                 return compat_ptr(compat);
     417             :         }
     418             : #endif
     419             : 
     420           0 :         if (get_user(native, argv.ptr.native + nr))
     421             :                 return ERR_PTR(-EFAULT);
     422             : 
     423           0 :         return native;
     424             : }
     425             : 
     426             : /*
     427             :  * count() counts the number of strings in array ARGV.
     428             :  */
     429           0 : static int count(struct user_arg_ptr argv, int max)
     430             : {
     431           0 :         int i = 0;
     432             : 
     433           0 :         if (argv.ptr.native != NULL) {
     434           0 :                 for (;;) {
     435           0 :                         const char __user *p = get_user_arg_ptr(argv, i);
     436             : 
     437           0 :                         if (!p)
     438             :                                 break;
     439             : 
     440           0 :                         if (IS_ERR(p))
     441             :                                 return -EFAULT;
     442             : 
     443           0 :                         if (i >= max)
     444             :                                 return -E2BIG;
     445           0 :                         ++i;
     446             : 
     447           0 :                         if (fatal_signal_pending(current))
     448             :                                 return -ERESTARTNOHAND;
     449           0 :                         cond_resched();
     450             :                 }
     451             :         }
     452             :         return i;
     453             : }
     454             : 
     455           0 : static int count_strings_kernel(const char *const *argv)
     456             : {
     457             :         int i;
     458             : 
     459           0 :         if (!argv)
     460             :                 return 0;
     461             : 
     462           0 :         for (i = 0; argv[i]; ++i) {
     463           0 :                 if (i >= MAX_ARG_STRINGS)
     464             :                         return -E2BIG;
     465           0 :                 if (fatal_signal_pending(current))
     466             :                         return -ERESTARTNOHAND;
     467           0 :                 cond_resched();
     468             :         }
     469             :         return i;
     470             : }
     471             : 
     472             : static int bprm_stack_limits(struct linux_binprm *bprm)
     473             : {
     474             :         unsigned long limit, ptr_size;
     475             : 
     476             :         /*
     477             :          * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
     478             :          * (whichever is smaller) for the argv+env strings.
     479             :          * This ensures that:
     480             :          *  - the remaining binfmt code will not run out of stack space,
     481             :          *  - the program will have a reasonable amount of stack left
     482             :          *    to work from.
     483             :          */
     484           0 :         limit = _STK_LIM / 4 * 3;
     485           0 :         limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
     486             :         /*
     487             :          * We've historically supported up to 32 pages (ARG_MAX)
     488             :          * of argument strings even with small stacks
     489             :          */
     490           0 :         limit = max_t(unsigned long, limit, ARG_MAX);
     491             :         /*
     492             :          * We must account for the size of all the argv and envp pointers to
     493             :          * the argv and envp strings, since they will also take up space in
     494             :          * the stack. They aren't stored until much later when we can't
     495             :          * signal to the parent that the child has run out of stack space.
     496             :          * Instead, calculate it here so it's possible to fail gracefully.
     497             :          *
     498             :          * In the case of argc = 0, make sure there is space for adding a
     499             :          * empty string (which will bump argc to 1), to ensure confused
     500             :          * userspace programs don't start processing from argv[1], thinking
     501             :          * argc can never be 0, to keep them from walking envp by accident.
     502             :          * See do_execveat_common().
     503             :          */
     504           0 :         ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
     505           0 :         if (limit <= ptr_size)
     506             :                 return -E2BIG;
     507           0 :         limit -= ptr_size;
     508             : 
     509           0 :         bprm->argmin = bprm->p - limit;
     510             :         return 0;
     511             : }
     512             : 
     513             : /*
     514             :  * 'copy_strings()' copies argument/environment strings from the old
     515             :  * processes's memory to the new process's stack.  The call to get_user_pages()
     516             :  * ensures the destination page is created and not swapped out.
     517             :  */
     518           0 : static int copy_strings(int argc, struct user_arg_ptr argv,
     519             :                         struct linux_binprm *bprm)
     520             : {
     521           0 :         struct page *kmapped_page = NULL;
     522           0 :         char *kaddr = NULL;
     523           0 :         unsigned long kpos = 0;
     524             :         int ret;
     525             : 
     526           0 :         while (argc-- > 0) {
     527             :                 const char __user *str;
     528             :                 int len;
     529             :                 unsigned long pos;
     530             : 
     531           0 :                 ret = -EFAULT;
     532           0 :                 str = get_user_arg_ptr(argv, argc);
     533           0 :                 if (IS_ERR(str))
     534             :                         goto out;
     535             : 
     536           0 :                 len = strnlen_user(str, MAX_ARG_STRLEN);
     537           0 :                 if (!len)
     538             :                         goto out;
     539             : 
     540           0 :                 ret = -E2BIG;
     541           0 :                 if (!valid_arg_len(bprm, len))
     542             :                         goto out;
     543             : 
     544             :                 /* We're going to work our way backwards. */
     545           0 :                 pos = bprm->p;
     546           0 :                 str += len;
     547           0 :                 bprm->p -= len;
     548             : #ifdef CONFIG_MMU
     549           0 :                 if (bprm->p < bprm->argmin)
     550             :                         goto out;
     551             : #endif
     552             : 
     553           0 :                 while (len > 0) {
     554             :                         int offset, bytes_to_copy;
     555             : 
     556           0 :                         if (fatal_signal_pending(current)) {
     557             :                                 ret = -ERESTARTNOHAND;
     558             :                                 goto out;
     559             :                         }
     560           0 :                         cond_resched();
     561             : 
     562           0 :                         offset = pos % PAGE_SIZE;
     563           0 :                         if (offset == 0)
     564           0 :                                 offset = PAGE_SIZE;
     565             : 
     566           0 :                         bytes_to_copy = offset;
     567           0 :                         if (bytes_to_copy > len)
     568           0 :                                 bytes_to_copy = len;
     569             : 
     570           0 :                         offset -= bytes_to_copy;
     571           0 :                         pos -= bytes_to_copy;
     572           0 :                         str -= bytes_to_copy;
     573           0 :                         len -= bytes_to_copy;
     574             : 
     575           0 :                         if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
     576             :                                 struct page *page;
     577             : 
     578           0 :                                 page = get_arg_page(bprm, pos, 1);
     579           0 :                                 if (!page) {
     580             :                                         ret = -E2BIG;
     581             :                                         goto out;
     582             :                                 }
     583             : 
     584           0 :                                 if (kmapped_page) {
     585           0 :                                         flush_dcache_page(kmapped_page);
     586           0 :                                         kunmap_local(kaddr);
     587             :                                         put_arg_page(kmapped_page);
     588             :                                 }
     589           0 :                                 kmapped_page = page;
     590           0 :                                 kaddr = kmap_local_page(kmapped_page);
     591           0 :                                 kpos = pos & PAGE_MASK;
     592           0 :                                 flush_arg_page(bprm, kpos, kmapped_page);
     593             :                         }
     594           0 :                         if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
     595             :                                 ret = -EFAULT;
     596             :                                 goto out;
     597             :                         }
     598             :                 }
     599             :         }
     600             :         ret = 0;
     601             : out:
     602           0 :         if (kmapped_page) {
     603           0 :                 flush_dcache_page(kmapped_page);
     604           0 :                 kunmap_local(kaddr);
     605             :                 put_arg_page(kmapped_page);
     606             :         }
     607           0 :         return ret;
     608             : }
     609             : 
     610             : /*
     611             :  * Copy and argument/environment string from the kernel to the processes stack.
     612             :  */
     613           0 : int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
     614             : {
     615           0 :         int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
     616           0 :         unsigned long pos = bprm->p;
     617             : 
     618           0 :         if (len == 0)
     619             :                 return -EFAULT;
     620           0 :         if (!valid_arg_len(bprm, len))
     621             :                 return -E2BIG;
     622             : 
     623             :         /* We're going to work our way backwards. */
     624           0 :         arg += len;
     625           0 :         bprm->p -= len;
     626           0 :         if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
     627             :                 return -E2BIG;
     628             : 
     629           0 :         while (len > 0) {
     630           0 :                 unsigned int bytes_to_copy = min_t(unsigned int, len,
     631             :                                 min_not_zero(offset_in_page(pos), PAGE_SIZE));
     632             :                 struct page *page;
     633             : 
     634           0 :                 pos -= bytes_to_copy;
     635           0 :                 arg -= bytes_to_copy;
     636           0 :                 len -= bytes_to_copy;
     637             : 
     638           0 :                 page = get_arg_page(bprm, pos, 1);
     639           0 :                 if (!page)
     640             :                         return -E2BIG;
     641           0 :                 flush_arg_page(bprm, pos & PAGE_MASK, page);
     642           0 :                 memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy);
     643             :                 put_arg_page(page);
     644             :         }
     645             : 
     646             :         return 0;
     647             : }
     648             : EXPORT_SYMBOL(copy_string_kernel);
     649             : 
     650           0 : static int copy_strings_kernel(int argc, const char *const *argv,
     651             :                                struct linux_binprm *bprm)
     652             : {
     653           0 :         while (argc-- > 0) {
     654           0 :                 int ret = copy_string_kernel(argv[argc], bprm);
     655           0 :                 if (ret < 0)
     656             :                         return ret;
     657           0 :                 if (fatal_signal_pending(current))
     658             :                         return -ERESTARTNOHAND;
     659           0 :                 cond_resched();
     660             :         }
     661             :         return 0;
     662             : }
     663             : 
     664             : #ifdef CONFIG_MMU
     665             : 
     666             : /*
     667             :  * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX.  Once
     668             :  * the binfmt code determines where the new stack should reside, we shift it to
     669             :  * its final location.  The process proceeds as follows:
     670             :  *
     671             :  * 1) Use shift to calculate the new vma endpoints.
     672             :  * 2) Extend vma to cover both the old and new ranges.  This ensures the
     673             :  *    arguments passed to subsequent functions are consistent.
     674             :  * 3) Move vma's page tables to the new range.
     675             :  * 4) Free up any cleared pgd range.
     676             :  * 5) Shrink the vma to cover only the new range.
     677             :  */
     678           0 : static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
     679             : {
     680           0 :         struct mm_struct *mm = vma->vm_mm;
     681           0 :         unsigned long old_start = vma->vm_start;
     682           0 :         unsigned long old_end = vma->vm_end;
     683           0 :         unsigned long length = old_end - old_start;
     684           0 :         unsigned long new_start = old_start - shift;
     685           0 :         unsigned long new_end = old_end - shift;
     686           0 :         VMA_ITERATOR(vmi, mm, new_start);
     687             :         struct vm_area_struct *next;
     688             :         struct mmu_gather tlb;
     689             : 
     690           0 :         BUG_ON(new_start > new_end);
     691             : 
     692             :         /*
     693             :          * ensure there are no vmas between where we want to go
     694             :          * and where we are
     695             :          */
     696           0 :         if (vma != vma_next(&vmi))
     697             :                 return -EFAULT;
     698             : 
     699             :         /*
     700             :          * cover the whole range: [new_start, old_end)
     701             :          */
     702           0 :         if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
     703             :                 return -ENOMEM;
     704             : 
     705             :         /*
     706             :          * move the page tables downwards, on failure we rely on
     707             :          * process cleanup to remove whatever mess we made.
     708             :          */
     709           0 :         if (length != move_page_tables(vma, old_start,
     710             :                                        vma, new_start, length, false))
     711             :                 return -ENOMEM;
     712             : 
     713           0 :         lru_add_drain();
     714           0 :         tlb_gather_mmu(&tlb, mm);
     715           0 :         next = vma_next(&vmi);
     716           0 :         if (new_end > old_start) {
     717             :                 /*
     718             :                  * when the old and new regions overlap clear from new_end.
     719             :                  */
     720           0 :                 free_pgd_range(&tlb, new_end, old_end, new_end,
     721             :                         next ? next->vm_start : USER_PGTABLES_CEILING);
     722             :         } else {
     723             :                 /*
     724             :                  * otherwise, clean from old_start; this is done to not touch
     725             :                  * the address space in [new_end, old_start) some architectures
     726             :                  * have constraints on va-space that make this illegal (IA64) -
     727             :                  * for the others its just a little faster.
     728             :                  */
     729           0 :                 free_pgd_range(&tlb, old_start, old_end, new_end,
     730             :                         next ? next->vm_start : USER_PGTABLES_CEILING);
     731             :         }
     732           0 :         tlb_finish_mmu(&tlb);
     733             : 
     734           0 :         vma_prev(&vmi);
     735             :         /* Shrink the vma to just the new range */
     736           0 :         return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
     737             : }
     738             : 
     739             : /*
     740             :  * Finalizes the stack vm_area_struct. The flags and permissions are updated,
     741             :  * the stack is optionally relocated, and some extra space is added.
     742             :  */
     743           0 : int setup_arg_pages(struct linux_binprm *bprm,
     744             :                     unsigned long stack_top,
     745             :                     int executable_stack)
     746             : {
     747             :         unsigned long ret;
     748             :         unsigned long stack_shift;
     749           0 :         struct mm_struct *mm = current->mm;
     750           0 :         struct vm_area_struct *vma = bprm->vma;
     751           0 :         struct vm_area_struct *prev = NULL;
     752             :         unsigned long vm_flags;
     753             :         unsigned long stack_base;
     754             :         unsigned long stack_size;
     755             :         unsigned long stack_expand;
     756             :         unsigned long rlim_stack;
     757             :         struct mmu_gather tlb;
     758             :         struct vma_iterator vmi;
     759             : 
     760             : #ifdef CONFIG_STACK_GROWSUP
     761             :         /* Limit stack size */
     762             :         stack_base = bprm->rlim_stack.rlim_max;
     763             : 
     764             :         stack_base = calc_max_stack_size(stack_base);
     765             : 
     766             :         /* Add space for stack randomization. */
     767             :         stack_base += (STACK_RND_MASK << PAGE_SHIFT);
     768             : 
     769             :         /* Make sure we didn't let the argument array grow too large. */
     770             :         if (vma->vm_end - vma->vm_start > stack_base)
     771             :                 return -ENOMEM;
     772             : 
     773             :         stack_base = PAGE_ALIGN(stack_top - stack_base);
     774             : 
     775             :         stack_shift = vma->vm_start - stack_base;
     776             :         mm->arg_start = bprm->p - stack_shift;
     777             :         bprm->p = vma->vm_end - stack_shift;
     778             : #else
     779           0 :         stack_top = arch_align_stack(stack_top);
     780           0 :         stack_top = PAGE_ALIGN(stack_top);
     781             : 
     782           0 :         if (unlikely(stack_top < mmap_min_addr) ||
     783           0 :             unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
     784             :                 return -ENOMEM;
     785             : 
     786           0 :         stack_shift = vma->vm_end - stack_top;
     787             : 
     788           0 :         bprm->p -= stack_shift;
     789           0 :         mm->arg_start = bprm->p;
     790             : #endif
     791             : 
     792           0 :         if (bprm->loader)
     793           0 :                 bprm->loader -= stack_shift;
     794           0 :         bprm->exec -= stack_shift;
     795             : 
     796           0 :         if (mmap_write_lock_killable(mm))
     797             :                 return -EINTR;
     798             : 
     799           0 :         vm_flags = VM_STACK_FLAGS;
     800             : 
     801             :         /*
     802             :          * Adjust stack execute permissions; explicitly enable for
     803             :          * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
     804             :          * (arch default) otherwise.
     805             :          */
     806           0 :         if (unlikely(executable_stack == EXSTACK_ENABLE_X))
     807             :                 vm_flags |= VM_EXEC;
     808           0 :         else if (executable_stack == EXSTACK_DISABLE_X)
     809           0 :                 vm_flags &= ~VM_EXEC;
     810           0 :         vm_flags |= mm->def_flags;
     811           0 :         vm_flags |= VM_STACK_INCOMPLETE_SETUP;
     812             : 
     813           0 :         vma_iter_init(&vmi, mm, vma->vm_start);
     814             : 
     815           0 :         tlb_gather_mmu(&tlb, mm);
     816           0 :         ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end,
     817             :                         vm_flags);
     818           0 :         tlb_finish_mmu(&tlb);
     819             : 
     820           0 :         if (ret)
     821             :                 goto out_unlock;
     822           0 :         BUG_ON(prev != vma);
     823             : 
     824           0 :         if (unlikely(vm_flags & VM_EXEC)) {
     825           0 :                 pr_warn_once("process '%pD4' started with executable stack\n",
     826             :                              bprm->file);
     827             :         }
     828             : 
     829             :         /* Move stack pages down in memory. */
     830           0 :         if (stack_shift) {
     831           0 :                 ret = shift_arg_pages(vma, stack_shift);
     832           0 :                 if (ret)
     833             :                         goto out_unlock;
     834             :         }
     835             : 
     836             :         /* mprotect_fixup is overkill to remove the temporary stack flags */
     837           0 :         vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP);
     838             : 
     839           0 :         stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
     840           0 :         stack_size = vma->vm_end - vma->vm_start;
     841             :         /*
     842             :          * Align this down to a page boundary as expand_stack
     843             :          * will align it up.
     844             :          */
     845           0 :         rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
     846             : 
     847           0 :         stack_expand = min(rlim_stack, stack_size + stack_expand);
     848             : 
     849             : #ifdef CONFIG_STACK_GROWSUP
     850             :         stack_base = vma->vm_start + stack_expand;
     851             : #else
     852           0 :         stack_base = vma->vm_end - stack_expand;
     853             : #endif
     854           0 :         current->mm->start_stack = bprm->p;
     855           0 :         ret = expand_stack(vma, stack_base);
     856           0 :         if (ret)
     857           0 :                 ret = -EFAULT;
     858             : 
     859             : out_unlock:
     860           0 :         mmap_write_unlock(mm);
     861           0 :         return ret;
     862             : }
     863             : EXPORT_SYMBOL(setup_arg_pages);
     864             : 
     865             : #else
     866             : 
     867             : /*
     868             :  * Transfer the program arguments and environment from the holding pages
     869             :  * onto the stack. The provided stack pointer is adjusted accordingly.
     870             :  */
     871             : int transfer_args_to_stack(struct linux_binprm *bprm,
     872             :                            unsigned long *sp_location)
     873             : {
     874             :         unsigned long index, stop, sp;
     875             :         int ret = 0;
     876             : 
     877             :         stop = bprm->p >> PAGE_SHIFT;
     878             :         sp = *sp_location;
     879             : 
     880             :         for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
     881             :                 unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
     882             :                 char *src = kmap_local_page(bprm->page[index]) + offset;
     883             :                 sp -= PAGE_SIZE - offset;
     884             :                 if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
     885             :                         ret = -EFAULT;
     886             :                 kunmap_local(src);
     887             :                 if (ret)
     888             :                         goto out;
     889             :         }
     890             : 
     891             :         *sp_location = sp;
     892             : 
     893             : out:
     894             :         return ret;
     895             : }
     896             : EXPORT_SYMBOL(transfer_args_to_stack);
     897             : 
     898             : #endif /* CONFIG_MMU */
     899             : 
     900           0 : static struct file *do_open_execat(int fd, struct filename *name, int flags)
     901             : {
     902             :         struct file *file;
     903             :         int err;
     904           0 :         struct open_flags open_exec_flags = {
     905             :                 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
     906             :                 .acc_mode = MAY_EXEC,
     907             :                 .intent = LOOKUP_OPEN,
     908             :                 .lookup_flags = LOOKUP_FOLLOW,
     909             :         };
     910             : 
     911           0 :         if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
     912             :                 return ERR_PTR(-EINVAL);
     913           0 :         if (flags & AT_SYMLINK_NOFOLLOW)
     914           0 :                 open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
     915           0 :         if (flags & AT_EMPTY_PATH)
     916           0 :                 open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
     917             : 
     918           0 :         file = do_filp_open(fd, name, &open_exec_flags);
     919           0 :         if (IS_ERR(file))
     920             :                 goto out;
     921             : 
     922             :         /*
     923             :          * may_open() has already checked for this, so it should be
     924             :          * impossible to trip now. But we need to be extra cautious
     925             :          * and check again at the very end too.
     926             :          */
     927           0 :         err = -EACCES;
     928           0 :         if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
     929             :                          path_noexec(&file->f_path)))
     930             :                 goto exit;
     931             : 
     932           0 :         err = deny_write_access(file);
     933           0 :         if (err)
     934             :                 goto exit;
     935             : 
     936           0 :         if (name->name[0] != '\0')
     937           0 :                 fsnotify_open(file);
     938             : 
     939             : out:
     940             :         return file;
     941             : 
     942             : exit:
     943           0 :         fput(file);
     944           0 :         return ERR_PTR(err);
     945             : }
     946             : 
     947           0 : struct file *open_exec(const char *name)
     948             : {
     949           0 :         struct filename *filename = getname_kernel(name);
     950           0 :         struct file *f = ERR_CAST(filename);
     951             : 
     952           0 :         if (!IS_ERR(filename)) {
     953           0 :                 f = do_open_execat(AT_FDCWD, filename, 0);
     954           0 :                 putname(filename);
     955             :         }
     956           0 :         return f;
     957             : }
     958             : EXPORT_SYMBOL(open_exec);
     959             : 
     960             : #if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
     961             : ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
     962             : {
     963             :         ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
     964             :         if (res > 0)
     965             :                 flush_icache_user_range(addr, addr + len);
     966             :         return res;
     967             : }
     968             : EXPORT_SYMBOL(read_code);
     969             : #endif
     970             : 
     971             : /*
     972             :  * Maps the mm_struct mm into the current task struct.
     973             :  * On success, this function returns with exec_update_lock
     974             :  * held for writing.
     975             :  */
     976           0 : static int exec_mmap(struct mm_struct *mm)
     977             : {
     978             :         struct task_struct *tsk;
     979             :         struct mm_struct *old_mm, *active_mm;
     980             :         int ret;
     981             : 
     982             :         /* Notify parent that we're no longer interested in the old VM */
     983           0 :         tsk = current;
     984           0 :         old_mm = current->mm;
     985           0 :         exec_mm_release(tsk, old_mm);
     986             :         if (old_mm)
     987             :                 sync_mm_rss(old_mm);
     988             : 
     989           0 :         ret = down_write_killable(&tsk->signal->exec_update_lock);
     990           0 :         if (ret)
     991             :                 return ret;
     992             : 
     993           0 :         if (old_mm) {
     994             :                 /*
     995             :                  * If there is a pending fatal signal perhaps a signal
     996             :                  * whose default action is to create a coredump get
     997             :                  * out and die instead of going through with the exec.
     998             :                  */
     999           0 :                 ret = mmap_read_lock_killable(old_mm);
    1000           0 :                 if (ret) {
    1001           0 :                         up_write(&tsk->signal->exec_update_lock);
    1002           0 :                         return ret;
    1003             :                 }
    1004             :         }
    1005             : 
    1006           0 :         task_lock(tsk);
    1007           0 :         membarrier_exec_mmap(mm);
    1008             : 
    1009             :         local_irq_disable();
    1010           0 :         active_mm = tsk->active_mm;
    1011           0 :         tsk->active_mm = mm;
    1012           0 :         tsk->mm = mm;
    1013           0 :         mm_init_cid(mm);
    1014             :         /*
    1015             :          * This prevents preemption while active_mm is being loaded and
    1016             :          * it and mm are being updated, which could cause problems for
    1017             :          * lazy tlb mm refcounting when these are updated by context
    1018             :          * switches. Not all architectures can handle irqs off over
    1019             :          * activate_mm yet.
    1020             :          */
    1021             :         if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
    1022             :                 local_irq_enable();
    1023           0 :         activate_mm(active_mm, mm);
    1024             :         if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
    1025             :                 local_irq_enable();
    1026           0 :         lru_gen_add_mm(mm);
    1027           0 :         task_unlock(tsk);
    1028           0 :         lru_gen_use_mm(mm);
    1029           0 :         if (old_mm) {
    1030           0 :                 mmap_read_unlock(old_mm);
    1031           0 :                 BUG_ON(active_mm != old_mm);
    1032           0 :                 setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
    1033           0 :                 mm_update_next_owner(old_mm);
    1034           0 :                 mmput(old_mm);
    1035           0 :                 return 0;
    1036             :         }
    1037             :         mmdrop(active_mm);
    1038             :         return 0;
    1039             : }
    1040             : 
    1041           0 : static int de_thread(struct task_struct *tsk)
    1042             : {
    1043           0 :         struct signal_struct *sig = tsk->signal;
    1044           0 :         struct sighand_struct *oldsighand = tsk->sighand;
    1045           0 :         spinlock_t *lock = &oldsighand->siglock;
    1046             : 
    1047           0 :         if (thread_group_empty(tsk))
    1048             :                 goto no_thread_group;
    1049             : 
    1050             :         /*
    1051             :          * Kill all other threads in the thread group.
    1052             :          */
    1053           0 :         spin_lock_irq(lock);
    1054           0 :         if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
    1055             :                 /*
    1056             :                  * Another group action in progress, just
    1057             :                  * return so that the signal is processed.
    1058             :                  */
    1059           0 :                 spin_unlock_irq(lock);
    1060           0 :                 return -EAGAIN;
    1061             :         }
    1062             : 
    1063           0 :         sig->group_exec_task = tsk;
    1064           0 :         sig->notify_count = zap_other_threads(tsk);
    1065           0 :         if (!thread_group_leader(tsk))
    1066           0 :                 sig->notify_count--;
    1067             : 
    1068           0 :         while (sig->notify_count) {
    1069           0 :                 __set_current_state(TASK_KILLABLE);
    1070           0 :                 spin_unlock_irq(lock);
    1071           0 :                 schedule();
    1072           0 :                 if (__fatal_signal_pending(tsk))
    1073             :                         goto killed;
    1074             :                 spin_lock_irq(lock);
    1075             :         }
    1076           0 :         spin_unlock_irq(lock);
    1077             : 
    1078             :         /*
    1079             :          * At this point all other threads have exited, all we have to
    1080             :          * do is to wait for the thread group leader to become inactive,
    1081             :          * and to assume its PID:
    1082             :          */
    1083           0 :         if (!thread_group_leader(tsk)) {
    1084           0 :                 struct task_struct *leader = tsk->group_leader;
    1085             : 
    1086             :                 for (;;) {
    1087           0 :                         cgroup_threadgroup_change_begin(tsk);
    1088           0 :                         write_lock_irq(&tasklist_lock);
    1089             :                         /*
    1090             :                          * Do this under tasklist_lock to ensure that
    1091             :                          * exit_notify() can't miss ->group_exec_task
    1092             :                          */
    1093           0 :                         sig->notify_count = -1;
    1094           0 :                         if (likely(leader->exit_state))
    1095             :                                 break;
    1096           0 :                         __set_current_state(TASK_KILLABLE);
    1097           0 :                         write_unlock_irq(&tasklist_lock);
    1098           0 :                         cgroup_threadgroup_change_end(tsk);
    1099           0 :                         schedule();
    1100           0 :                         if (__fatal_signal_pending(tsk))
    1101             :                                 goto killed;
    1102             :                 }
    1103             : 
    1104             :                 /*
    1105             :                  * The only record we have of the real-time age of a
    1106             :                  * process, regardless of execs it's done, is start_time.
    1107             :                  * All the past CPU time is accumulated in signal_struct
    1108             :                  * from sister threads now dead.  But in this non-leader
    1109             :                  * exec, nothing survives from the original leader thread,
    1110             :                  * whose birth marks the true age of this process now.
    1111             :                  * When we take on its identity by switching to its PID, we
    1112             :                  * also take its birthdate (always earlier than our own).
    1113             :                  */
    1114           0 :                 tsk->start_time = leader->start_time;
    1115           0 :                 tsk->start_boottime = leader->start_boottime;
    1116             : 
    1117           0 :                 BUG_ON(!same_thread_group(leader, tsk));
    1118             :                 /*
    1119             :                  * An exec() starts a new thread group with the
    1120             :                  * TGID of the previous thread group. Rehash the
    1121             :                  * two threads with a switched PID, and release
    1122             :                  * the former thread group leader:
    1123             :                  */
    1124             : 
    1125             :                 /* Become a process group leader with the old leader's pid.
    1126             :                  * The old leader becomes a thread of the this thread group.
    1127             :                  */
    1128           0 :                 exchange_tids(tsk, leader);
    1129           0 :                 transfer_pid(leader, tsk, PIDTYPE_TGID);
    1130           0 :                 transfer_pid(leader, tsk, PIDTYPE_PGID);
    1131           0 :                 transfer_pid(leader, tsk, PIDTYPE_SID);
    1132             : 
    1133           0 :                 list_replace_rcu(&leader->tasks, &tsk->tasks);
    1134           0 :                 list_replace_init(&leader->sibling, &tsk->sibling);
    1135             : 
    1136           0 :                 tsk->group_leader = tsk;
    1137           0 :                 leader->group_leader = tsk;
    1138             : 
    1139           0 :                 tsk->exit_signal = SIGCHLD;
    1140           0 :                 leader->exit_signal = -1;
    1141             : 
    1142           0 :                 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
    1143           0 :                 leader->exit_state = EXIT_DEAD;
    1144             : 
    1145             :                 /*
    1146             :                  * We are going to release_task()->ptrace_unlink() silently,
    1147             :                  * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
    1148             :                  * the tracer won't block again waiting for this thread.
    1149             :                  */
    1150           0 :                 if (unlikely(leader->ptrace))
    1151           0 :                         __wake_up_parent(leader, leader->parent);
    1152           0 :                 write_unlock_irq(&tasklist_lock);
    1153           0 :                 cgroup_threadgroup_change_end(tsk);
    1154             : 
    1155           0 :                 release_task(leader);
    1156             :         }
    1157             : 
    1158           0 :         sig->group_exec_task = NULL;
    1159           0 :         sig->notify_count = 0;
    1160             : 
    1161             : no_thread_group:
    1162             :         /* we have changed execution domain */
    1163           0 :         tsk->exit_signal = SIGCHLD;
    1164             : 
    1165           0 :         BUG_ON(!thread_group_leader(tsk));
    1166             :         return 0;
    1167             : 
    1168             : killed:
    1169             :         /* protects against exit_notify() and __exit_signal() */
    1170           0 :         read_lock(&tasklist_lock);
    1171           0 :         sig->group_exec_task = NULL;
    1172           0 :         sig->notify_count = 0;
    1173           0 :         read_unlock(&tasklist_lock);
    1174           0 :         return -EAGAIN;
    1175             : }
    1176             : 
    1177             : 
    1178             : /*
    1179             :  * This function makes sure the current process has its own signal table,
    1180             :  * so that flush_signal_handlers can later reset the handlers without
    1181             :  * disturbing other processes.  (Other processes might share the signal
    1182             :  * table via the CLONE_SIGHAND option to clone().)
    1183             :  */
    1184           0 : static int unshare_sighand(struct task_struct *me)
    1185             : {
    1186           0 :         struct sighand_struct *oldsighand = me->sighand;
    1187             : 
    1188           0 :         if (refcount_read(&oldsighand->count) != 1) {
    1189             :                 struct sighand_struct *newsighand;
    1190             :                 /*
    1191             :                  * This ->sighand is shared with the CLONE_SIGHAND
    1192             :                  * but not CLONE_THREAD task, switch to the new one.
    1193             :                  */
    1194           0 :                 newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
    1195           0 :                 if (!newsighand)
    1196             :                         return -ENOMEM;
    1197             : 
    1198           0 :                 refcount_set(&newsighand->count, 1);
    1199             : 
    1200           0 :                 write_lock_irq(&tasklist_lock);
    1201           0 :                 spin_lock(&oldsighand->siglock);
    1202           0 :                 memcpy(newsighand->action, oldsighand->action,
    1203             :                        sizeof(newsighand->action));
    1204           0 :                 rcu_assign_pointer(me->sighand, newsighand);
    1205           0 :                 spin_unlock(&oldsighand->siglock);
    1206           0 :                 write_unlock_irq(&tasklist_lock);
    1207             : 
    1208           0 :                 __cleanup_sighand(oldsighand);
    1209             :         }
    1210             :         return 0;
    1211             : }
    1212             : 
    1213           0 : char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
    1214             : {
    1215           0 :         task_lock(tsk);
    1216             :         /* Always NUL terminated and zero-padded */
    1217           0 :         strscpy_pad(buf, tsk->comm, buf_size);
    1218           0 :         task_unlock(tsk);
    1219           0 :         return buf;
    1220             : }
    1221             : EXPORT_SYMBOL_GPL(__get_task_comm);
    1222             : 
    1223             : /*
    1224             :  * These functions flushes out all traces of the currently running executable
    1225             :  * so that a new one can be started
    1226             :  */
    1227             : 
    1228         347 : void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
    1229             : {
    1230         347 :         task_lock(tsk);
    1231         347 :         trace_task_rename(tsk, buf);
    1232         347 :         strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
    1233         347 :         task_unlock(tsk);
    1234         347 :         perf_event_comm(tsk, exec);
    1235         347 : }
    1236             : 
    1237             : /*
    1238             :  * Calling this is the point of no return. None of the failures will be
    1239             :  * seen by userspace since either the process is already taking a fatal
    1240             :  * signal (via de_thread() or coredump), or will have SEGV raised
    1241             :  * (after exec_mmap()) by search_binary_handler (see below).
    1242             :  */
    1243           0 : int begin_new_exec(struct linux_binprm * bprm)
    1244             : {
    1245           0 :         struct task_struct *me = current;
    1246             :         int retval;
    1247             : 
    1248             :         /* Once we are committed compute the creds */
    1249           0 :         retval = bprm_creds_from_file(bprm);
    1250           0 :         if (retval)
    1251             :                 return retval;
    1252             : 
    1253             :         /*
    1254             :          * Ensure all future errors are fatal.
    1255             :          */
    1256           0 :         bprm->point_of_no_return = true;
    1257             : 
    1258             :         /*
    1259             :          * Make this the only thread in the thread group.
    1260             :          */
    1261           0 :         retval = de_thread(me);
    1262           0 :         if (retval)
    1263             :                 goto out;
    1264             : 
    1265             :         /*
    1266             :          * Cancel any io_uring activity across execve
    1267             :          */
    1268           0 :         io_uring_task_cancel();
    1269             : 
    1270             :         /* Ensure the files table is not shared. */
    1271           0 :         retval = unshare_files();
    1272           0 :         if (retval)
    1273             :                 goto out;
    1274             : 
    1275             :         /*
    1276             :          * Must be called _before_ exec_mmap() as bprm->mm is
    1277             :          * not visible until then. This also enables the update
    1278             :          * to be lockless.
    1279             :          */
    1280           0 :         retval = set_mm_exe_file(bprm->mm, bprm->file);
    1281           0 :         if (retval)
    1282             :                 goto out;
    1283             : 
    1284             :         /* If the binary is not readable then enforce mm->dumpable=0 */
    1285           0 :         would_dump(bprm, bprm->file);
    1286           0 :         if (bprm->have_execfd)
    1287           0 :                 would_dump(bprm, bprm->executable);
    1288             : 
    1289             :         /*
    1290             :          * Release all of the old mmap stuff
    1291             :          */
    1292           0 :         acct_arg_size(bprm, 0);
    1293           0 :         retval = exec_mmap(bprm->mm);
    1294           0 :         if (retval)
    1295             :                 goto out;
    1296             : 
    1297           0 :         bprm->mm = NULL;
    1298             : 
    1299           0 :         retval = exec_task_namespaces();
    1300           0 :         if (retval)
    1301             :                 goto out_unlock;
    1302             : 
    1303             : #ifdef CONFIG_POSIX_TIMERS
    1304           0 :         spin_lock_irq(&me->sighand->siglock);
    1305           0 :         posix_cpu_timers_exit(me);
    1306           0 :         spin_unlock_irq(&me->sighand->siglock);
    1307           0 :         exit_itimers(me);
    1308           0 :         flush_itimer_signals();
    1309             : #endif
    1310             : 
    1311             :         /*
    1312             :          * Make the signal table private.
    1313             :          */
    1314           0 :         retval = unshare_sighand(me);
    1315           0 :         if (retval)
    1316             :                 goto out_unlock;
    1317             : 
    1318           0 :         me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
    1319             :                                         PF_NOFREEZE | PF_NO_SETAFFINITY);
    1320           0 :         flush_thread();
    1321           0 :         me->personality &= ~bprm->per_clear;
    1322             : 
    1323           0 :         clear_syscall_work_syscall_user_dispatch(me);
    1324             : 
    1325             :         /*
    1326             :          * We have to apply CLOEXEC before we change whether the process is
    1327             :          * dumpable (in setup_new_exec) to avoid a race with a process in userspace
    1328             :          * trying to access the should-be-closed file descriptors of a process
    1329             :          * undergoing exec(2).
    1330             :          */
    1331           0 :         do_close_on_exec(me->files);
    1332             : 
    1333           0 :         if (bprm->secureexec) {
    1334             :                 /* Make sure parent cannot signal privileged process. */
    1335           0 :                 me->pdeath_signal = 0;
    1336             : 
    1337             :                 /*
    1338             :                  * For secureexec, reset the stack limit to sane default to
    1339             :                  * avoid bad behavior from the prior rlimits. This has to
    1340             :                  * happen before arch_pick_mmap_layout(), which examines
    1341             :                  * RLIMIT_STACK, but after the point of no return to avoid
    1342             :                  * needing to clean up the change on failure.
    1343             :                  */
    1344           0 :                 if (bprm->rlim_stack.rlim_cur > _STK_LIM)
    1345           0 :                         bprm->rlim_stack.rlim_cur = _STK_LIM;
    1346             :         }
    1347             : 
    1348           0 :         me->sas_ss_sp = me->sas_ss_size = 0;
    1349             : 
    1350             :         /*
    1351             :          * Figure out dumpability. Note that this checking only of current
    1352             :          * is wrong, but userspace depends on it. This should be testing
    1353             :          * bprm->secureexec instead.
    1354             :          */
    1355           0 :         if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
    1356           0 :             !(uid_eq(current_euid(), current_uid()) &&
    1357           0 :               gid_eq(current_egid(), current_gid())))
    1358           0 :                 set_dumpable(current->mm, suid_dumpable);
    1359             :         else
    1360           0 :                 set_dumpable(current->mm, SUID_DUMP_USER);
    1361             : 
    1362             :         perf_event_exec();
    1363           0 :         __set_task_comm(me, kbasename(bprm->filename), true);
    1364             : 
    1365             :         /* An exec changes our domain. We are no longer part of the thread
    1366             :            group */
    1367           0 :         WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
    1368           0 :         flush_signal_handlers(me, 0);
    1369             : 
    1370           0 :         retval = set_cred_ucounts(bprm->cred);
    1371           0 :         if (retval < 0)
    1372             :                 goto out_unlock;
    1373             : 
    1374             :         /*
    1375             :          * install the new credentials for this executable
    1376             :          */
    1377           0 :         security_bprm_committing_creds(bprm);
    1378             : 
    1379           0 :         commit_creds(bprm->cred);
    1380           0 :         bprm->cred = NULL;
    1381             : 
    1382             :         /*
    1383             :          * Disable monitoring for regular users
    1384             :          * when executing setuid binaries. Must
    1385             :          * wait until new credentials are committed
    1386             :          * by commit_creds() above
    1387             :          */
    1388           0 :         if (get_dumpable(me->mm) != SUID_DUMP_USER)
    1389             :                 perf_event_exit_task(me);
    1390             :         /*
    1391             :          * cred_guard_mutex must be held at least to this point to prevent
    1392             :          * ptrace_attach() from altering our determination of the task's
    1393             :          * credentials; any time after this it may be unlocked.
    1394             :          */
    1395           0 :         security_bprm_committed_creds(bprm);
    1396             : 
    1397             :         /* Pass the opened binary to the interpreter. */
    1398           0 :         if (bprm->have_execfd) {
    1399           0 :                 retval = get_unused_fd_flags(0);
    1400           0 :                 if (retval < 0)
    1401             :                         goto out_unlock;
    1402           0 :                 fd_install(retval, bprm->executable);
    1403           0 :                 bprm->executable = NULL;
    1404           0 :                 bprm->execfd = retval;
    1405             :         }
    1406             :         return 0;
    1407             : 
    1408             : out_unlock:
    1409           0 :         up_write(&me->signal->exec_update_lock);
    1410             : out:
    1411             :         return retval;
    1412             : }
    1413             : EXPORT_SYMBOL(begin_new_exec);
    1414             : 
    1415           0 : void would_dump(struct linux_binprm *bprm, struct file *file)
    1416             : {
    1417           0 :         struct inode *inode = file_inode(file);
    1418           0 :         struct mnt_idmap *idmap = file_mnt_idmap(file);
    1419           0 :         if (inode_permission(idmap, inode, MAY_READ) < 0) {
    1420             :                 struct user_namespace *old, *user_ns;
    1421           0 :                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
    1422             : 
    1423             :                 /* Ensure mm->user_ns contains the executable */
    1424           0 :                 user_ns = old = bprm->mm->user_ns;
    1425           0 :                 while ((user_ns != &init_user_ns) &&
    1426           0 :                        !privileged_wrt_inode_uidgid(user_ns, idmap, inode))
    1427           0 :                         user_ns = user_ns->parent;
    1428             : 
    1429           0 :                 if (old != user_ns) {
    1430           0 :                         bprm->mm->user_ns = get_user_ns(user_ns);
    1431           0 :                         put_user_ns(old);
    1432             :                 }
    1433             :         }
    1434           0 : }
    1435             : EXPORT_SYMBOL(would_dump);
    1436             : 
    1437           0 : void setup_new_exec(struct linux_binprm * bprm)
    1438             : {
    1439             :         /* Setup things that can depend upon the personality */
    1440           0 :         struct task_struct *me = current;
    1441             : 
    1442           0 :         arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
    1443             : 
    1444             :         arch_setup_new_exec();
    1445             : 
    1446             :         /* Set the new mm task size. We have to do that late because it may
    1447             :          * depend on TIF_32BIT which is only updated in flush_thread() on
    1448             :          * some architectures like powerpc
    1449             :          */
    1450           0 :         me->mm->task_size = TASK_SIZE;
    1451           0 :         up_write(&me->signal->exec_update_lock);
    1452           0 :         mutex_unlock(&me->signal->cred_guard_mutex);
    1453           0 : }
    1454             : EXPORT_SYMBOL(setup_new_exec);
    1455             : 
    1456             : /* Runs immediately before start_thread() takes over. */
    1457           0 : void finalize_exec(struct linux_binprm *bprm)
    1458             : {
    1459             :         /* Store any stack rlimit changes before starting thread. */
    1460           0 :         task_lock(current->group_leader);
    1461           0 :         current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
    1462           0 :         task_unlock(current->group_leader);
    1463           0 : }
    1464             : EXPORT_SYMBOL(finalize_exec);
    1465             : 
    1466             : /*
    1467             :  * Prepare credentials and lock ->cred_guard_mutex.
    1468             :  * setup_new_exec() commits the new creds and drops the lock.
    1469             :  * Or, if exec fails before, free_bprm() should release ->cred
    1470             :  * and unlock.
    1471             :  */
    1472           0 : static int prepare_bprm_creds(struct linux_binprm *bprm)
    1473             : {
    1474           0 :         if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
    1475             :                 return -ERESTARTNOINTR;
    1476             : 
    1477           0 :         bprm->cred = prepare_exec_creds();
    1478           0 :         if (likely(bprm->cred))
    1479             :                 return 0;
    1480             : 
    1481           0 :         mutex_unlock(&current->signal->cred_guard_mutex);
    1482             :         return -ENOMEM;
    1483             : }
    1484             : 
    1485           0 : static void free_bprm(struct linux_binprm *bprm)
    1486             : {
    1487           0 :         if (bprm->mm) {
    1488           0 :                 acct_arg_size(bprm, 0);
    1489           0 :                 mmput(bprm->mm);
    1490             :         }
    1491           0 :         free_arg_pages(bprm);
    1492           0 :         if (bprm->cred) {
    1493           0 :                 mutex_unlock(&current->signal->cred_guard_mutex);
    1494           0 :                 abort_creds(bprm->cred);
    1495             :         }
    1496           0 :         if (bprm->file) {
    1497           0 :                 allow_write_access(bprm->file);
    1498           0 :                 fput(bprm->file);
    1499             :         }
    1500           0 :         if (bprm->executable)
    1501           0 :                 fput(bprm->executable);
    1502             :         /* If a binfmt changed the interp, free it. */
    1503           0 :         if (bprm->interp != bprm->filename)
    1504           0 :                 kfree(bprm->interp);
    1505           0 :         kfree(bprm->fdpath);
    1506           0 :         kfree(bprm);
    1507           0 : }
    1508             : 
    1509           0 : static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
    1510             : {
    1511           0 :         struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
    1512           0 :         int retval = -ENOMEM;
    1513           0 :         if (!bprm)
    1514             :                 goto out;
    1515             : 
    1516           0 :         if (fd == AT_FDCWD || filename->name[0] == '/') {
    1517           0 :                 bprm->filename = filename->name;
    1518             :         } else {
    1519           0 :                 if (filename->name[0] == '\0')
    1520           0 :                         bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
    1521             :                 else
    1522           0 :                         bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
    1523             :                                                   fd, filename->name);
    1524           0 :                 if (!bprm->fdpath)
    1525             :                         goto out_free;
    1526             : 
    1527           0 :                 bprm->filename = bprm->fdpath;
    1528             :         }
    1529           0 :         bprm->interp = bprm->filename;
    1530             : 
    1531           0 :         retval = bprm_mm_init(bprm);
    1532           0 :         if (retval)
    1533             :                 goto out_free;
    1534             :         return bprm;
    1535             : 
    1536             : out_free:
    1537           0 :         free_bprm(bprm);
    1538             : out:
    1539           0 :         return ERR_PTR(retval);
    1540             : }
    1541             : 
    1542           0 : int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
    1543             : {
    1544             :         /* If a binfmt changed the interp, free it first. */
    1545           0 :         if (bprm->interp != bprm->filename)
    1546           0 :                 kfree(bprm->interp);
    1547           0 :         bprm->interp = kstrdup(interp, GFP_KERNEL);
    1548           0 :         if (!bprm->interp)
    1549             :                 return -ENOMEM;
    1550           0 :         return 0;
    1551             : }
    1552             : EXPORT_SYMBOL(bprm_change_interp);
    1553             : 
    1554             : /*
    1555             :  * determine how safe it is to execute the proposed program
    1556             :  * - the caller must hold ->cred_guard_mutex to protect against
    1557             :  *   PTRACE_ATTACH or seccomp thread-sync
    1558             :  */
    1559           0 : static void check_unsafe_exec(struct linux_binprm *bprm)
    1560             : {
    1561           0 :         struct task_struct *p = current, *t;
    1562             :         unsigned n_fs;
    1563             : 
    1564           0 :         if (p->ptrace)
    1565           0 :                 bprm->unsafe |= LSM_UNSAFE_PTRACE;
    1566             : 
    1567             :         /*
    1568             :          * This isn't strictly necessary, but it makes it harder for LSMs to
    1569             :          * mess up.
    1570             :          */
    1571           0 :         if (task_no_new_privs(current))
    1572           0 :                 bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
    1573             : 
    1574             :         /*
    1575             :          * If another task is sharing our fs, we cannot safely
    1576             :          * suid exec because the differently privileged task
    1577             :          * will be able to manipulate the current directory, etc.
    1578             :          * It would be nice to force an unshare instead...
    1579             :          */
    1580           0 :         t = p;
    1581           0 :         n_fs = 1;
    1582           0 :         spin_lock(&p->fs->lock);
    1583             :         rcu_read_lock();
    1584           0 :         while_each_thread(p, t) {
    1585           0 :                 if (t->fs == p->fs)
    1586           0 :                         n_fs++;
    1587             :         }
    1588             :         rcu_read_unlock();
    1589             : 
    1590           0 :         if (p->fs->users > n_fs)
    1591           0 :                 bprm->unsafe |= LSM_UNSAFE_SHARE;
    1592             :         else
    1593           0 :                 p->fs->in_exec = 1;
    1594           0 :         spin_unlock(&p->fs->lock);
    1595           0 : }
    1596             : 
    1597           0 : static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
    1598             : {
    1599             :         /* Handle suid and sgid on files */
    1600             :         struct mnt_idmap *idmap;
    1601           0 :         struct inode *inode = file_inode(file);
    1602             :         unsigned int mode;
    1603             :         vfsuid_t vfsuid;
    1604             :         vfsgid_t vfsgid;
    1605             : 
    1606           0 :         if (!mnt_may_suid(file->f_path.mnt))
    1607             :                 return;
    1608             : 
    1609           0 :         if (task_no_new_privs(current))
    1610             :                 return;
    1611             : 
    1612           0 :         mode = READ_ONCE(inode->i_mode);
    1613           0 :         if (!(mode & (S_ISUID|S_ISGID)))
    1614             :                 return;
    1615             : 
    1616           0 :         idmap = file_mnt_idmap(file);
    1617             : 
    1618             :         /* Be careful if suid/sgid is set */
    1619           0 :         inode_lock(inode);
    1620             : 
    1621             :         /* reload atomically mode/uid/gid now that lock held */
    1622           0 :         mode = inode->i_mode;
    1623           0 :         vfsuid = i_uid_into_vfsuid(idmap, inode);
    1624           0 :         vfsgid = i_gid_into_vfsgid(idmap, inode);
    1625           0 :         inode_unlock(inode);
    1626             : 
    1627             :         /* We ignore suid/sgid if there are no mappings for them in the ns */
    1628           0 :         if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) ||
    1629           0 :             !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid))
    1630             :                 return;
    1631             : 
    1632           0 :         if (mode & S_ISUID) {
    1633           0 :                 bprm->per_clear |= PER_CLEAR_ON_SETID;
    1634           0 :                 bprm->cred->euid = vfsuid_into_kuid(vfsuid);
    1635             :         }
    1636             : 
    1637           0 :         if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
    1638           0 :                 bprm->per_clear |= PER_CLEAR_ON_SETID;
    1639           0 :                 bprm->cred->egid = vfsgid_into_kgid(vfsgid);
    1640             :         }
    1641             : }
    1642             : 
    1643             : /*
    1644             :  * Compute brpm->cred based upon the final binary.
    1645             :  */
    1646           0 : static int bprm_creds_from_file(struct linux_binprm *bprm)
    1647             : {
    1648             :         /* Compute creds based on which file? */
    1649           0 :         struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
    1650             : 
    1651           0 :         bprm_fill_uid(bprm, file);
    1652           0 :         return security_bprm_creds_from_file(bprm, file);
    1653             : }
    1654             : 
    1655             : /*
    1656             :  * Fill the binprm structure from the inode.
    1657             :  * Read the first BINPRM_BUF_SIZE bytes
    1658             :  *
    1659             :  * This may be called multiple times for binary chains (scripts for example).
    1660             :  */
    1661           0 : static int prepare_binprm(struct linux_binprm *bprm)
    1662             : {
    1663           0 :         loff_t pos = 0;
    1664             : 
    1665           0 :         memset(bprm->buf, 0, BINPRM_BUF_SIZE);
    1666           0 :         return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
    1667             : }
    1668             : 
    1669             : /*
    1670             :  * Arguments are '\0' separated strings found at the location bprm->p
    1671             :  * points to; chop off the first by relocating brpm->p to right after
    1672             :  * the first '\0' encountered.
    1673             :  */
    1674           0 : int remove_arg_zero(struct linux_binprm *bprm)
    1675             : {
    1676           0 :         int ret = 0;
    1677             :         unsigned long offset;
    1678             :         char *kaddr;
    1679             :         struct page *page;
    1680             : 
    1681           0 :         if (!bprm->argc)
    1682             :                 return 0;
    1683             : 
    1684             :         do {
    1685           0 :                 offset = bprm->p & ~PAGE_MASK;
    1686           0 :                 page = get_arg_page(bprm, bprm->p, 0);
    1687           0 :                 if (!page) {
    1688             :                         ret = -EFAULT;
    1689             :                         goto out;
    1690             :                 }
    1691           0 :                 kaddr = kmap_local_page(page);
    1692             : 
    1693           0 :                 for (; offset < PAGE_SIZE && kaddr[offset];
    1694           0 :                                 offset++, bprm->p++)
    1695             :                         ;
    1696             : 
    1697           0 :                 kunmap_local(kaddr);
    1698           0 :                 put_arg_page(page);
    1699           0 :         } while (offset == PAGE_SIZE);
    1700             : 
    1701           0 :         bprm->p++;
    1702           0 :         bprm->argc--;
    1703           0 :         ret = 0;
    1704             : 
    1705             : out:
    1706             :         return ret;
    1707             : }
    1708             : EXPORT_SYMBOL(remove_arg_zero);
    1709             : 
    1710             : #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
    1711             : /*
    1712             :  * cycle the list of binary formats handler, until one recognizes the image
    1713             :  */
    1714           0 : static int search_binary_handler(struct linux_binprm *bprm)
    1715             : {
    1716           0 :         bool need_retry = IS_ENABLED(CONFIG_MODULES);
    1717             :         struct linux_binfmt *fmt;
    1718             :         int retval;
    1719             : 
    1720           0 :         retval = prepare_binprm(bprm);
    1721           0 :         if (retval < 0)
    1722             :                 return retval;
    1723             : 
    1724           0 :         retval = security_bprm_check(bprm);
    1725             :         if (retval)
    1726             :                 return retval;
    1727             : 
    1728           0 :         retval = -ENOENT;
    1729             :  retry:
    1730           0 :         read_lock(&binfmt_lock);
    1731           0 :         list_for_each_entry(fmt, &formats, lh) {
    1732           0 :                 if (!try_module_get(fmt->module))
    1733             :                         continue;
    1734           0 :                 read_unlock(&binfmt_lock);
    1735             : 
    1736           0 :                 retval = fmt->load_binary(bprm);
    1737             : 
    1738           0 :                 read_lock(&binfmt_lock);
    1739           0 :                 put_binfmt(fmt);
    1740           0 :                 if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
    1741           0 :                         read_unlock(&binfmt_lock);
    1742           0 :                         return retval;
    1743             :                 }
    1744             :         }
    1745           0 :         read_unlock(&binfmt_lock);
    1746             : 
    1747             :         if (need_retry) {
    1748             :                 if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
    1749             :                     printable(bprm->buf[2]) && printable(bprm->buf[3]))
    1750             :                         return retval;
    1751             :                 if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
    1752             :                         return retval;
    1753             :                 need_retry = false;
    1754             :                 goto retry;
    1755             :         }
    1756             : 
    1757           0 :         return retval;
    1758             : }
    1759             : 
    1760             : /* binfmt handlers will call back into begin_new_exec() on success. */
    1761           0 : static int exec_binprm(struct linux_binprm *bprm)
    1762             : {
    1763             :         pid_t old_pid, old_vpid;
    1764             :         int ret, depth;
    1765             : 
    1766             :         /* Need to fetch pid before load_binary changes it */
    1767           0 :         old_pid = current->pid;
    1768             :         rcu_read_lock();
    1769           0 :         old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
    1770             :         rcu_read_unlock();
    1771             : 
    1772             :         /* This allows 4 levels of binfmt rewrites before failing hard. */
    1773           0 :         for (depth = 0;; depth++) {
    1774             :                 struct file *exec;
    1775           0 :                 if (depth > 5)
    1776             :                         return -ELOOP;
    1777             : 
    1778           0 :                 ret = search_binary_handler(bprm);
    1779           0 :                 if (ret < 0)
    1780             :                         return ret;
    1781           0 :                 if (!bprm->interpreter)
    1782             :                         break;
    1783             : 
    1784           0 :                 exec = bprm->file;
    1785           0 :                 bprm->file = bprm->interpreter;
    1786           0 :                 bprm->interpreter = NULL;
    1787             : 
    1788           0 :                 allow_write_access(exec);
    1789           0 :                 if (unlikely(bprm->have_execfd)) {
    1790           0 :                         if (bprm->executable) {
    1791           0 :                                 fput(exec);
    1792           0 :                                 return -ENOEXEC;
    1793             :                         }
    1794           0 :                         bprm->executable = exec;
    1795             :                 } else
    1796           0 :                         fput(exec);
    1797             :         }
    1798             : 
    1799           0 :         audit_bprm(bprm);
    1800           0 :         trace_sched_process_exec(current, old_pid, bprm);
    1801           0 :         ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
    1802           0 :         proc_exec_connector(current);
    1803           0 :         return 0;
    1804             : }
    1805             : 
    1806             : /*
    1807             :  * sys_execve() executes a new program.
    1808             :  */
    1809           0 : static int bprm_execve(struct linux_binprm *bprm,
    1810             :                        int fd, struct filename *filename, int flags)
    1811             : {
    1812             :         struct file *file;
    1813             :         int retval;
    1814             : 
    1815           0 :         retval = prepare_bprm_creds(bprm);
    1816           0 :         if (retval)
    1817             :                 return retval;
    1818             : 
    1819             :         /*
    1820             :          * Check for unsafe execution states before exec_binprm(), which
    1821             :          * will call back into begin_new_exec(), into bprm_creds_from_file(),
    1822             :          * where setuid-ness is evaluated.
    1823             :          */
    1824           0 :         check_unsafe_exec(bprm);
    1825           0 :         current->in_execve = 1;
    1826           0 :         sched_mm_cid_before_execve(current);
    1827             : 
    1828           0 :         file = do_open_execat(fd, filename, flags);
    1829           0 :         retval = PTR_ERR(file);
    1830           0 :         if (IS_ERR(file))
    1831             :                 goto out_unmark;
    1832             : 
    1833             :         sched_exec();
    1834             : 
    1835           0 :         bprm->file = file;
    1836             :         /*
    1837             :          * Record that a name derived from an O_CLOEXEC fd will be
    1838             :          * inaccessible after exec.  This allows the code in exec to
    1839             :          * choose to fail when the executable is not mmaped into the
    1840             :          * interpreter and an open file descriptor is not passed to
    1841             :          * the interpreter.  This makes for a better user experience
    1842             :          * than having the interpreter start and then immediately fail
    1843             :          * when it finds the executable is inaccessible.
    1844             :          */
    1845           0 :         if (bprm->fdpath && get_close_on_exec(fd))
    1846           0 :                 bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
    1847             : 
    1848             :         /* Set the unchanging part of bprm->cred */
    1849           0 :         retval = security_bprm_creds_for_exec(bprm);
    1850             :         if (retval)
    1851             :                 goto out;
    1852             : 
    1853           0 :         retval = exec_binprm(bprm);
    1854           0 :         if (retval < 0)
    1855             :                 goto out;
    1856             : 
    1857           0 :         sched_mm_cid_after_execve(current);
    1858             :         /* execve succeeded */
    1859           0 :         current->fs->in_exec = 0;
    1860           0 :         current->in_execve = 0;
    1861           0 :         rseq_execve(current);
    1862           0 :         acct_update_integrals(current);
    1863           0 :         task_numa_free(current, false);
    1864           0 :         return retval;
    1865             : 
    1866             : out:
    1867             :         /*
    1868             :          * If past the point of no return ensure the code never
    1869             :          * returns to the userspace process.  Use an existing fatal
    1870             :          * signal if present otherwise terminate the process with
    1871             :          * SIGSEGV.
    1872             :          */
    1873           0 :         if (bprm->point_of_no_return && !fatal_signal_pending(current))
    1874           0 :                 force_fatal_sig(SIGSEGV);
    1875             : 
    1876             : out_unmark:
    1877           0 :         sched_mm_cid_after_execve(current);
    1878           0 :         current->fs->in_exec = 0;
    1879           0 :         current->in_execve = 0;
    1880             : 
    1881           0 :         return retval;
    1882             : }
    1883             : 
    1884           0 : static int do_execveat_common(int fd, struct filename *filename,
    1885             :                               struct user_arg_ptr argv,
    1886             :                               struct user_arg_ptr envp,
    1887             :                               int flags)
    1888             : {
    1889             :         struct linux_binprm *bprm;
    1890             :         int retval;
    1891             : 
    1892           0 :         if (IS_ERR(filename))
    1893           0 :                 return PTR_ERR(filename);
    1894             : 
    1895             :         /*
    1896             :          * We move the actual failure in case of RLIMIT_NPROC excess from
    1897             :          * set*uid() to execve() because too many poorly written programs
    1898             :          * don't check setuid() return code.  Here we additionally recheck
    1899             :          * whether NPROC limit is still exceeded.
    1900             :          */
    1901           0 :         if ((current->flags & PF_NPROC_EXCEEDED) &&
    1902           0 :             is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
    1903             :                 retval = -EAGAIN;
    1904             :                 goto out_ret;
    1905             :         }
    1906             : 
    1907             :         /* We're below the limit (still or again), so we don't want to make
    1908             :          * further execve() calls fail. */
    1909           0 :         current->flags &= ~PF_NPROC_EXCEEDED;
    1910             : 
    1911           0 :         bprm = alloc_bprm(fd, filename);
    1912           0 :         if (IS_ERR(bprm)) {
    1913           0 :                 retval = PTR_ERR(bprm);
    1914           0 :                 goto out_ret;
    1915             :         }
    1916             : 
    1917           0 :         retval = count(argv, MAX_ARG_STRINGS);
    1918           0 :         if (retval == 0)
    1919           0 :                 pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
    1920             :                              current->comm, bprm->filename);
    1921           0 :         if (retval < 0)
    1922             :                 goto out_free;
    1923           0 :         bprm->argc = retval;
    1924             : 
    1925           0 :         retval = count(envp, MAX_ARG_STRINGS);
    1926           0 :         if (retval < 0)
    1927             :                 goto out_free;
    1928           0 :         bprm->envc = retval;
    1929             : 
    1930           0 :         retval = bprm_stack_limits(bprm);
    1931           0 :         if (retval < 0)
    1932             :                 goto out_free;
    1933             : 
    1934           0 :         retval = copy_string_kernel(bprm->filename, bprm);
    1935           0 :         if (retval < 0)
    1936             :                 goto out_free;
    1937           0 :         bprm->exec = bprm->p;
    1938             : 
    1939           0 :         retval = copy_strings(bprm->envc, envp, bprm);
    1940           0 :         if (retval < 0)
    1941             :                 goto out_free;
    1942             : 
    1943           0 :         retval = copy_strings(bprm->argc, argv, bprm);
    1944           0 :         if (retval < 0)
    1945             :                 goto out_free;
    1946             : 
    1947             :         /*
    1948             :          * When argv is empty, add an empty string ("") as argv[0] to
    1949             :          * ensure confused userspace programs that start processing
    1950             :          * from argv[1] won't end up walking envp. See also
    1951             :          * bprm_stack_limits().
    1952             :          */
    1953           0 :         if (bprm->argc == 0) {
    1954           0 :                 retval = copy_string_kernel("", bprm);
    1955           0 :                 if (retval < 0)
    1956             :                         goto out_free;
    1957           0 :                 bprm->argc = 1;
    1958             :         }
    1959             : 
    1960           0 :         retval = bprm_execve(bprm, fd, filename, flags);
    1961             : out_free:
    1962           0 :         free_bprm(bprm);
    1963             : 
    1964             : out_ret:
    1965           0 :         putname(filename);
    1966           0 :         return retval;
    1967             : }
    1968             : 
    1969           0 : int kernel_execve(const char *kernel_filename,
    1970             :                   const char *const *argv, const char *const *envp)
    1971             : {
    1972             :         struct filename *filename;
    1973             :         struct linux_binprm *bprm;
    1974           0 :         int fd = AT_FDCWD;
    1975             :         int retval;
    1976             : 
    1977             :         /* It is non-sense for kernel threads to call execve */
    1978           0 :         if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
    1979             :                 return -EINVAL;
    1980             : 
    1981           0 :         filename = getname_kernel(kernel_filename);
    1982           0 :         if (IS_ERR(filename))
    1983           0 :                 return PTR_ERR(filename);
    1984             : 
    1985           0 :         bprm = alloc_bprm(fd, filename);
    1986           0 :         if (IS_ERR(bprm)) {
    1987           0 :                 retval = PTR_ERR(bprm);
    1988           0 :                 goto out_ret;
    1989             :         }
    1990             : 
    1991           0 :         retval = count_strings_kernel(argv);
    1992           0 :         if (WARN_ON_ONCE(retval == 0))
    1993           0 :                 retval = -EINVAL;
    1994           0 :         if (retval < 0)
    1995             :                 goto out_free;
    1996           0 :         bprm->argc = retval;
    1997             : 
    1998           0 :         retval = count_strings_kernel(envp);
    1999           0 :         if (retval < 0)
    2000             :                 goto out_free;
    2001           0 :         bprm->envc = retval;
    2002             : 
    2003           0 :         retval = bprm_stack_limits(bprm);
    2004           0 :         if (retval < 0)
    2005             :                 goto out_free;
    2006             : 
    2007           0 :         retval = copy_string_kernel(bprm->filename, bprm);
    2008           0 :         if (retval < 0)
    2009             :                 goto out_free;
    2010           0 :         bprm->exec = bprm->p;
    2011             : 
    2012           0 :         retval = copy_strings_kernel(bprm->envc, envp, bprm);
    2013           0 :         if (retval < 0)
    2014             :                 goto out_free;
    2015             : 
    2016           0 :         retval = copy_strings_kernel(bprm->argc, argv, bprm);
    2017           0 :         if (retval < 0)
    2018             :                 goto out_free;
    2019             : 
    2020           0 :         retval = bprm_execve(bprm, fd, filename, 0);
    2021             : out_free:
    2022           0 :         free_bprm(bprm);
    2023             : out_ret:
    2024           0 :         putname(filename);
    2025           0 :         return retval;
    2026             : }
    2027             : 
    2028             : static int do_execve(struct filename *filename,
    2029             :         const char __user *const __user *__argv,
    2030             :         const char __user *const __user *__envp)
    2031             : {
    2032           0 :         struct user_arg_ptr argv = { .ptr.native = __argv };
    2033           0 :         struct user_arg_ptr envp = { .ptr.native = __envp };
    2034           0 :         return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
    2035             : }
    2036             : 
    2037             : static int do_execveat(int fd, struct filename *filename,
    2038             :                 const char __user *const __user *__argv,
    2039             :                 const char __user *const __user *__envp,
    2040             :                 int flags)
    2041             : {
    2042           0 :         struct user_arg_ptr argv = { .ptr.native = __argv };
    2043           0 :         struct user_arg_ptr envp = { .ptr.native = __envp };
    2044             : 
    2045           0 :         return do_execveat_common(fd, filename, argv, envp, flags);
    2046             : }
    2047             : 
    2048             : #ifdef CONFIG_COMPAT
    2049             : static int compat_do_execve(struct filename *filename,
    2050             :         const compat_uptr_t __user *__argv,
    2051             :         const compat_uptr_t __user *__envp)
    2052             : {
    2053             :         struct user_arg_ptr argv = {
    2054             :                 .is_compat = true,
    2055             :                 .ptr.compat = __argv,
    2056             :         };
    2057             :         struct user_arg_ptr envp = {
    2058             :                 .is_compat = true,
    2059             :                 .ptr.compat = __envp,
    2060             :         };
    2061             :         return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
    2062             : }
    2063             : 
    2064             : static int compat_do_execveat(int fd, struct filename *filename,
    2065             :                               const compat_uptr_t __user *__argv,
    2066             :                               const compat_uptr_t __user *__envp,
    2067             :                               int flags)
    2068             : {
    2069             :         struct user_arg_ptr argv = {
    2070             :                 .is_compat = true,
    2071             :                 .ptr.compat = __argv,
    2072             :         };
    2073             :         struct user_arg_ptr envp = {
    2074             :                 .is_compat = true,
    2075             :                 .ptr.compat = __envp,
    2076             :         };
    2077             :         return do_execveat_common(fd, filename, argv, envp, flags);
    2078             : }
    2079             : #endif
    2080             : 
    2081           0 : void set_binfmt(struct linux_binfmt *new)
    2082             : {
    2083           0 :         struct mm_struct *mm = current->mm;
    2084             : 
    2085           0 :         if (mm->binfmt)
    2086             :                 module_put(mm->binfmt->module);
    2087             : 
    2088           0 :         mm->binfmt = new;
    2089             :         if (new)
    2090             :                 __module_get(new->module);
    2091           0 : }
    2092             : EXPORT_SYMBOL(set_binfmt);
    2093             : 
    2094             : /*
    2095             :  * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
    2096             :  */
    2097           0 : void set_dumpable(struct mm_struct *mm, int value)
    2098             : {
    2099           0 :         if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
    2100             :                 return;
    2101             : 
    2102           0 :         set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
    2103             : }
    2104             : 
    2105           0 : SYSCALL_DEFINE3(execve,
    2106             :                 const char __user *, filename,
    2107             :                 const char __user *const __user *, argv,
    2108             :                 const char __user *const __user *, envp)
    2109             : {
    2110           0 :         return do_execve(getname(filename), argv, envp);
    2111             : }
    2112             : 
    2113           0 : SYSCALL_DEFINE5(execveat,
    2114             :                 int, fd, const char __user *, filename,
    2115             :                 const char __user *const __user *, argv,
    2116             :                 const char __user *const __user *, envp,
    2117             :                 int, flags)
    2118             : {
    2119           0 :         return do_execveat(fd,
    2120             :                            getname_uflags(filename, flags),
    2121             :                            argv, envp, flags);
    2122             : }
    2123             : 
    2124             : #ifdef CONFIG_COMPAT
    2125             : COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
    2126             :         const compat_uptr_t __user *, argv,
    2127             :         const compat_uptr_t __user *, envp)
    2128             : {
    2129             :         return compat_do_execve(getname(filename), argv, envp);
    2130             : }
    2131             : 
    2132             : COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
    2133             :                        const char __user *, filename,
    2134             :                        const compat_uptr_t __user *, argv,
    2135             :                        const compat_uptr_t __user *, envp,
    2136             :                        int,  flags)
    2137             : {
    2138             :         return compat_do_execveat(fd,
    2139             :                                   getname_uflags(filename, flags),
    2140             :                                   argv, envp, flags);
    2141             : }
    2142             : #endif
    2143             : 
    2144             : #ifdef CONFIG_SYSCTL
    2145             : 
    2146           0 : static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
    2147             :                 void *buffer, size_t *lenp, loff_t *ppos)
    2148             : {
    2149           0 :         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
    2150             : 
    2151           0 :         if (!error)
    2152           0 :                 validate_coredump_safety();
    2153           0 :         return error;
    2154             : }
    2155             : 
    2156             : static struct ctl_table fs_exec_sysctls[] = {
    2157             :         {
    2158             :                 .procname       = "suid_dumpable",
    2159             :                 .data           = &suid_dumpable,
    2160             :                 .maxlen         = sizeof(int),
    2161             :                 .mode           = 0644,
    2162             :                 .proc_handler   = proc_dointvec_minmax_coredump,
    2163             :                 .extra1         = SYSCTL_ZERO,
    2164             :                 .extra2         = SYSCTL_TWO,
    2165             :         },
    2166             :         { }
    2167             : };
    2168             : 
    2169           1 : static int __init init_fs_exec_sysctls(void)
    2170             : {
    2171           1 :         register_sysctl_init("fs", fs_exec_sysctls);
    2172           1 :         return 0;
    2173             : }
    2174             : 
    2175             : fs_initcall(init_fs_exec_sysctls);
    2176             : #endif /* CONFIG_SYSCTL */

Generated by: LCOV version 1.14