Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * linux/fs/exec.c
4 : *
5 : * Copyright (C) 1991, 1992 Linus Torvalds
6 : */
7 :
8 : /*
9 : * #!-checking implemented by tytso.
10 : */
11 : /*
12 : * Demand-loading implemented 01.12.91 - no need to read anything but
13 : * the header into memory. The inode of the executable is put into
14 : * "current->executable", and page faults do the actual loading. Clean.
15 : *
16 : * Once more I can proudly say that linux stood up to being changed: it
17 : * was less than 2 hours work to get demand-loading completely implemented.
18 : *
19 : * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
20 : * current->executable is only used by the procfs. This allows a dispatch
21 : * table to check for several different types of binary formats. We keep
22 : * trying until we recognize the file or we run out of supported binary
23 : * formats.
24 : */
25 :
26 : #include <linux/kernel_read_file.h>
27 : #include <linux/slab.h>
28 : #include <linux/file.h>
29 : #include <linux/fdtable.h>
30 : #include <linux/mm.h>
31 : #include <linux/stat.h>
32 : #include <linux/fcntl.h>
33 : #include <linux/swap.h>
34 : #include <linux/string.h>
35 : #include <linux/init.h>
36 : #include <linux/sched/mm.h>
37 : #include <linux/sched/coredump.h>
38 : #include <linux/sched/signal.h>
39 : #include <linux/sched/numa_balancing.h>
40 : #include <linux/sched/task.h>
41 : #include <linux/pagemap.h>
42 : #include <linux/perf_event.h>
43 : #include <linux/highmem.h>
44 : #include <linux/spinlock.h>
45 : #include <linux/key.h>
46 : #include <linux/personality.h>
47 : #include <linux/binfmts.h>
48 : #include <linux/utsname.h>
49 : #include <linux/pid_namespace.h>
50 : #include <linux/module.h>
51 : #include <linux/namei.h>
52 : #include <linux/mount.h>
53 : #include <linux/security.h>
54 : #include <linux/syscalls.h>
55 : #include <linux/tsacct_kern.h>
56 : #include <linux/cn_proc.h>
57 : #include <linux/audit.h>
58 : #include <linux/kmod.h>
59 : #include <linux/fsnotify.h>
60 : #include <linux/fs_struct.h>
61 : #include <linux/oom.h>
62 : #include <linux/compat.h>
63 : #include <linux/vmalloc.h>
64 : #include <linux/io_uring.h>
65 : #include <linux/syscall_user_dispatch.h>
66 : #include <linux/coredump.h>
67 : #include <linux/time_namespace.h>
68 : #include <linux/user_events.h>
69 :
70 : #include <linux/uaccess.h>
71 : #include <asm/mmu_context.h>
72 : #include <asm/tlb.h>
73 :
74 : #include <trace/events/task.h>
75 : #include "internal.h"
76 :
77 : #include <trace/events/sched.h>
78 :
79 : static int bprm_creds_from_file(struct linux_binprm *bprm);
80 :
81 : int suid_dumpable = 0;
82 :
83 : static LIST_HEAD(formats);
84 : static DEFINE_RWLOCK(binfmt_lock);
85 :
86 2 : void __register_binfmt(struct linux_binfmt * fmt, int insert)
87 : {
88 2 : write_lock(&binfmt_lock);
89 2 : insert ? list_add(&fmt->lh, &formats) :
90 2 : list_add_tail(&fmt->lh, &formats);
91 2 : write_unlock(&binfmt_lock);
92 2 : }
93 :
94 : EXPORT_SYMBOL(__register_binfmt);
95 :
96 0 : void unregister_binfmt(struct linux_binfmt * fmt)
97 : {
98 0 : write_lock(&binfmt_lock);
99 0 : list_del(&fmt->lh);
100 0 : write_unlock(&binfmt_lock);
101 0 : }
102 :
103 : EXPORT_SYMBOL(unregister_binfmt);
104 :
105 : static inline void put_binfmt(struct linux_binfmt * fmt)
106 : {
107 0 : module_put(fmt->module);
108 : }
109 :
110 0 : bool path_noexec(const struct path *path)
111 : {
112 0 : return (path->mnt->mnt_flags & MNT_NOEXEC) ||
113 0 : (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
114 : }
115 :
116 : #ifdef CONFIG_USELIB
117 : /*
118 : * Note that a shared library must be both readable and executable due to
119 : * security reasons.
120 : *
121 : * Also note that we take the address to load from the file itself.
122 : */
123 : SYSCALL_DEFINE1(uselib, const char __user *, library)
124 : {
125 : struct linux_binfmt *fmt;
126 : struct file *file;
127 : struct filename *tmp = getname(library);
128 : int error = PTR_ERR(tmp);
129 : static const struct open_flags uselib_flags = {
130 : .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
131 : .acc_mode = MAY_READ | MAY_EXEC,
132 : .intent = LOOKUP_OPEN,
133 : .lookup_flags = LOOKUP_FOLLOW,
134 : };
135 :
136 : if (IS_ERR(tmp))
137 : goto out;
138 :
139 : file = do_filp_open(AT_FDCWD, tmp, &uselib_flags);
140 : putname(tmp);
141 : error = PTR_ERR(file);
142 : if (IS_ERR(file))
143 : goto out;
144 :
145 : /*
146 : * may_open() has already checked for this, so it should be
147 : * impossible to trip now. But we need to be extra cautious
148 : * and check again at the very end too.
149 : */
150 : error = -EACCES;
151 : if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
152 : path_noexec(&file->f_path)))
153 : goto exit;
154 :
155 : fsnotify_open(file);
156 :
157 : error = -ENOEXEC;
158 :
159 : read_lock(&binfmt_lock);
160 : list_for_each_entry(fmt, &formats, lh) {
161 : if (!fmt->load_shlib)
162 : continue;
163 : if (!try_module_get(fmt->module))
164 : continue;
165 : read_unlock(&binfmt_lock);
166 : error = fmt->load_shlib(file);
167 : read_lock(&binfmt_lock);
168 : put_binfmt(fmt);
169 : if (error != -ENOEXEC)
170 : break;
171 : }
172 : read_unlock(&binfmt_lock);
173 : exit:
174 : fput(file);
175 : out:
176 : return error;
177 : }
178 : #endif /* #ifdef CONFIG_USELIB */
179 :
180 : #ifdef CONFIG_MMU
181 : /*
182 : * The nascent bprm->mm is not visible until exec_mmap() but it can
183 : * use a lot of memory, account these pages in current->mm temporary
184 : * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we
185 : * change the counter back via acct_arg_size(0).
186 : */
187 0 : static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
188 : {
189 0 : struct mm_struct *mm = current->mm;
190 0 : long diff = (long)(pages - bprm->vma_pages);
191 :
192 0 : if (!mm || !diff)
193 : return;
194 :
195 0 : bprm->vma_pages = pages;
196 0 : add_mm_counter(mm, MM_ANONPAGES, diff);
197 : }
198 :
199 0 : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
200 : int write)
201 : {
202 : struct page *page;
203 : int ret;
204 0 : unsigned int gup_flags = 0;
205 :
206 : #ifdef CONFIG_STACK_GROWSUP
207 : if (write) {
208 : ret = expand_downwards(bprm->vma, pos);
209 : if (ret < 0)
210 : return NULL;
211 : }
212 : #endif
213 :
214 0 : if (write)
215 0 : gup_flags |= FOLL_WRITE;
216 :
217 : /*
218 : * We are doing an exec(). 'current' is the process
219 : * doing the exec and bprm->mm is the new process's mm.
220 : */
221 0 : mmap_read_lock(bprm->mm);
222 0 : ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
223 : &page, NULL, NULL);
224 0 : mmap_read_unlock(bprm->mm);
225 0 : if (ret <= 0)
226 : return NULL;
227 :
228 0 : if (write)
229 0 : acct_arg_size(bprm, vma_pages(bprm->vma));
230 :
231 0 : return page;
232 : }
233 :
234 : static void put_arg_page(struct page *page)
235 : {
236 0 : put_page(page);
237 : }
238 :
239 : static void free_arg_pages(struct linux_binprm *bprm)
240 : {
241 : }
242 :
243 : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
244 : struct page *page)
245 : {
246 0 : flush_cache_page(bprm->vma, pos, page_to_pfn(page));
247 : }
248 :
249 0 : static int __bprm_mm_init(struct linux_binprm *bprm)
250 : {
251 : int err;
252 0 : struct vm_area_struct *vma = NULL;
253 0 : struct mm_struct *mm = bprm->mm;
254 :
255 0 : bprm->vma = vma = vm_area_alloc(mm);
256 0 : if (!vma)
257 : return -ENOMEM;
258 0 : vma_set_anonymous(vma);
259 :
260 0 : if (mmap_write_lock_killable(mm)) {
261 : err = -EINTR;
262 : goto err_free;
263 : }
264 :
265 : /*
266 : * Place the stack at the largest stack address the architecture
267 : * supports. Later, we'll move this to an appropriate place. We don't
268 : * use STACK_TOP because that can depend on attributes which aren't
269 : * configured yet.
270 : */
271 : BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
272 0 : vma->vm_end = STACK_TOP_MAX;
273 0 : vma->vm_start = vma->vm_end - PAGE_SIZE;
274 0 : vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
275 0 : vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
276 :
277 0 : err = insert_vm_struct(mm, vma);
278 0 : if (err)
279 : goto err;
280 :
281 0 : mm->stack_vm = mm->total_vm = 1;
282 0 : mmap_write_unlock(mm);
283 0 : bprm->p = vma->vm_end - sizeof(void *);
284 0 : return 0;
285 : err:
286 : mmap_write_unlock(mm);
287 : err_free:
288 0 : bprm->vma = NULL;
289 0 : vm_area_free(vma);
290 0 : return err;
291 : }
292 :
293 : static bool valid_arg_len(struct linux_binprm *bprm, long len)
294 : {
295 0 : return len <= MAX_ARG_STRLEN;
296 : }
297 :
298 : #else
299 :
300 : static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
301 : {
302 : }
303 :
304 : static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
305 : int write)
306 : {
307 : struct page *page;
308 :
309 : page = bprm->page[pos / PAGE_SIZE];
310 : if (!page && write) {
311 : page = alloc_page(GFP_HIGHUSER|__GFP_ZERO);
312 : if (!page)
313 : return NULL;
314 : bprm->page[pos / PAGE_SIZE] = page;
315 : }
316 :
317 : return page;
318 : }
319 :
320 : static void put_arg_page(struct page *page)
321 : {
322 : }
323 :
324 : static void free_arg_page(struct linux_binprm *bprm, int i)
325 : {
326 : if (bprm->page[i]) {
327 : __free_page(bprm->page[i]);
328 : bprm->page[i] = NULL;
329 : }
330 : }
331 :
332 : static void free_arg_pages(struct linux_binprm *bprm)
333 : {
334 : int i;
335 :
336 : for (i = 0; i < MAX_ARG_PAGES; i++)
337 : free_arg_page(bprm, i);
338 : }
339 :
340 : static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos,
341 : struct page *page)
342 : {
343 : }
344 :
345 : static int __bprm_mm_init(struct linux_binprm *bprm)
346 : {
347 : bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *);
348 : return 0;
349 : }
350 :
351 : static bool valid_arg_len(struct linux_binprm *bprm, long len)
352 : {
353 : return len <= bprm->p;
354 : }
355 :
356 : #endif /* CONFIG_MMU */
357 :
358 : /*
359 : * Create a new mm_struct and populate it with a temporary stack
360 : * vm_area_struct. We don't have enough context at this point to set the stack
361 : * flags, permissions, and offset, so we use temporary values. We'll update
362 : * them later in setup_arg_pages().
363 : */
364 0 : static int bprm_mm_init(struct linux_binprm *bprm)
365 : {
366 : int err;
367 0 : struct mm_struct *mm = NULL;
368 :
369 0 : bprm->mm = mm = mm_alloc();
370 0 : err = -ENOMEM;
371 0 : if (!mm)
372 : goto err;
373 :
374 : /* Save current stack limit for all calculations made during exec. */
375 0 : task_lock(current->group_leader);
376 0 : bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK];
377 0 : task_unlock(current->group_leader);
378 :
379 0 : err = __bprm_mm_init(bprm);
380 0 : if (err)
381 : goto err;
382 :
383 : return 0;
384 :
385 : err:
386 0 : if (mm) {
387 0 : bprm->mm = NULL;
388 : mmdrop(mm);
389 : }
390 :
391 : return err;
392 : }
393 :
394 : struct user_arg_ptr {
395 : #ifdef CONFIG_COMPAT
396 : bool is_compat;
397 : #endif
398 : union {
399 : const char __user *const __user *native;
400 : #ifdef CONFIG_COMPAT
401 : const compat_uptr_t __user *compat;
402 : #endif
403 : } ptr;
404 : };
405 :
406 0 : static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr)
407 : {
408 : const char __user *native;
409 :
410 : #ifdef CONFIG_COMPAT
411 : if (unlikely(argv.is_compat)) {
412 : compat_uptr_t compat;
413 :
414 : if (get_user(compat, argv.ptr.compat + nr))
415 : return ERR_PTR(-EFAULT);
416 :
417 : return compat_ptr(compat);
418 : }
419 : #endif
420 :
421 0 : if (get_user(native, argv.ptr.native + nr))
422 : return ERR_PTR(-EFAULT);
423 :
424 0 : return native;
425 : }
426 :
427 : /*
428 : * count() counts the number of strings in array ARGV.
429 : */
430 0 : static int count(struct user_arg_ptr argv, int max)
431 : {
432 0 : int i = 0;
433 :
434 0 : if (argv.ptr.native != NULL) {
435 0 : for (;;) {
436 0 : const char __user *p = get_user_arg_ptr(argv, i);
437 :
438 0 : if (!p)
439 : break;
440 :
441 0 : if (IS_ERR(p))
442 : return -EFAULT;
443 :
444 0 : if (i >= max)
445 : return -E2BIG;
446 0 : ++i;
447 :
448 0 : if (fatal_signal_pending(current))
449 : return -ERESTARTNOHAND;
450 0 : cond_resched();
451 : }
452 : }
453 : return i;
454 : }
455 :
456 0 : static int count_strings_kernel(const char *const *argv)
457 : {
458 : int i;
459 :
460 0 : if (!argv)
461 : return 0;
462 :
463 0 : for (i = 0; argv[i]; ++i) {
464 0 : if (i >= MAX_ARG_STRINGS)
465 : return -E2BIG;
466 0 : if (fatal_signal_pending(current))
467 : return -ERESTARTNOHAND;
468 0 : cond_resched();
469 : }
470 : return i;
471 : }
472 :
473 : static int bprm_stack_limits(struct linux_binprm *bprm)
474 : {
475 : unsigned long limit, ptr_size;
476 :
477 : /*
478 : * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
479 : * (whichever is smaller) for the argv+env strings.
480 : * This ensures that:
481 : * - the remaining binfmt code will not run out of stack space,
482 : * - the program will have a reasonable amount of stack left
483 : * to work from.
484 : */
485 0 : limit = _STK_LIM / 4 * 3;
486 0 : limit = min(limit, bprm->rlim_stack.rlim_cur / 4);
487 : /*
488 : * We've historically supported up to 32 pages (ARG_MAX)
489 : * of argument strings even with small stacks
490 : */
491 0 : limit = max_t(unsigned long, limit, ARG_MAX);
492 : /*
493 : * We must account for the size of all the argv and envp pointers to
494 : * the argv and envp strings, since they will also take up space in
495 : * the stack. They aren't stored until much later when we can't
496 : * signal to the parent that the child has run out of stack space.
497 : * Instead, calculate it here so it's possible to fail gracefully.
498 : *
499 : * In the case of argc = 0, make sure there is space for adding a
500 : * empty string (which will bump argc to 1), to ensure confused
501 : * userspace programs don't start processing from argv[1], thinking
502 : * argc can never be 0, to keep them from walking envp by accident.
503 : * See do_execveat_common().
504 : */
505 0 : ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *);
506 0 : if (limit <= ptr_size)
507 : return -E2BIG;
508 0 : limit -= ptr_size;
509 :
510 0 : bprm->argmin = bprm->p - limit;
511 : return 0;
512 : }
513 :
514 : /*
515 : * 'copy_strings()' copies argument/environment strings from the old
516 : * processes's memory to the new process's stack. The call to get_user_pages()
517 : * ensures the destination page is created and not swapped out.
518 : */
519 0 : static int copy_strings(int argc, struct user_arg_ptr argv,
520 : struct linux_binprm *bprm)
521 : {
522 0 : struct page *kmapped_page = NULL;
523 0 : char *kaddr = NULL;
524 0 : unsigned long kpos = 0;
525 : int ret;
526 :
527 0 : while (argc-- > 0) {
528 : const char __user *str;
529 : int len;
530 : unsigned long pos;
531 :
532 0 : ret = -EFAULT;
533 0 : str = get_user_arg_ptr(argv, argc);
534 0 : if (IS_ERR(str))
535 : goto out;
536 :
537 0 : len = strnlen_user(str, MAX_ARG_STRLEN);
538 0 : if (!len)
539 : goto out;
540 :
541 0 : ret = -E2BIG;
542 0 : if (!valid_arg_len(bprm, len))
543 : goto out;
544 :
545 : /* We're going to work our way backwards. */
546 0 : pos = bprm->p;
547 0 : str += len;
548 0 : bprm->p -= len;
549 : #ifdef CONFIG_MMU
550 0 : if (bprm->p < bprm->argmin)
551 : goto out;
552 : #endif
553 :
554 0 : while (len > 0) {
555 : int offset, bytes_to_copy;
556 :
557 0 : if (fatal_signal_pending(current)) {
558 : ret = -ERESTARTNOHAND;
559 : goto out;
560 : }
561 0 : cond_resched();
562 :
563 0 : offset = pos % PAGE_SIZE;
564 0 : if (offset == 0)
565 0 : offset = PAGE_SIZE;
566 :
567 0 : bytes_to_copy = offset;
568 0 : if (bytes_to_copy > len)
569 0 : bytes_to_copy = len;
570 :
571 0 : offset -= bytes_to_copy;
572 0 : pos -= bytes_to_copy;
573 0 : str -= bytes_to_copy;
574 0 : len -= bytes_to_copy;
575 :
576 0 : if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
577 : struct page *page;
578 :
579 0 : page = get_arg_page(bprm, pos, 1);
580 0 : if (!page) {
581 : ret = -E2BIG;
582 : goto out;
583 : }
584 :
585 0 : if (kmapped_page) {
586 0 : flush_dcache_page(kmapped_page);
587 0 : kunmap_local(kaddr);
588 : put_arg_page(kmapped_page);
589 : }
590 0 : kmapped_page = page;
591 0 : kaddr = kmap_local_page(kmapped_page);
592 0 : kpos = pos & PAGE_MASK;
593 0 : flush_arg_page(bprm, kpos, kmapped_page);
594 : }
595 0 : if (copy_from_user(kaddr+offset, str, bytes_to_copy)) {
596 : ret = -EFAULT;
597 : goto out;
598 : }
599 : }
600 : }
601 : ret = 0;
602 : out:
603 0 : if (kmapped_page) {
604 0 : flush_dcache_page(kmapped_page);
605 0 : kunmap_local(kaddr);
606 : put_arg_page(kmapped_page);
607 : }
608 0 : return ret;
609 : }
610 :
611 : /*
612 : * Copy and argument/environment string from the kernel to the processes stack.
613 : */
614 0 : int copy_string_kernel(const char *arg, struct linux_binprm *bprm)
615 : {
616 0 : int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */;
617 0 : unsigned long pos = bprm->p;
618 :
619 0 : if (len == 0)
620 : return -EFAULT;
621 0 : if (!valid_arg_len(bprm, len))
622 : return -E2BIG;
623 :
624 : /* We're going to work our way backwards. */
625 0 : arg += len;
626 0 : bprm->p -= len;
627 0 : if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin)
628 : return -E2BIG;
629 :
630 0 : while (len > 0) {
631 0 : unsigned int bytes_to_copy = min_t(unsigned int, len,
632 : min_not_zero(offset_in_page(pos), PAGE_SIZE));
633 : struct page *page;
634 :
635 0 : pos -= bytes_to_copy;
636 0 : arg -= bytes_to_copy;
637 0 : len -= bytes_to_copy;
638 :
639 0 : page = get_arg_page(bprm, pos, 1);
640 0 : if (!page)
641 : return -E2BIG;
642 0 : flush_arg_page(bprm, pos & PAGE_MASK, page);
643 0 : memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy);
644 : put_arg_page(page);
645 : }
646 :
647 : return 0;
648 : }
649 : EXPORT_SYMBOL(copy_string_kernel);
650 :
651 0 : static int copy_strings_kernel(int argc, const char *const *argv,
652 : struct linux_binprm *bprm)
653 : {
654 0 : while (argc-- > 0) {
655 0 : int ret = copy_string_kernel(argv[argc], bprm);
656 0 : if (ret < 0)
657 : return ret;
658 0 : if (fatal_signal_pending(current))
659 : return -ERESTARTNOHAND;
660 0 : cond_resched();
661 : }
662 : return 0;
663 : }
664 :
665 : #ifdef CONFIG_MMU
666 :
667 : /*
668 : * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once
669 : * the binfmt code determines where the new stack should reside, we shift it to
670 : * its final location. The process proceeds as follows:
671 : *
672 : * 1) Use shift to calculate the new vma endpoints.
673 : * 2) Extend vma to cover both the old and new ranges. This ensures the
674 : * arguments passed to subsequent functions are consistent.
675 : * 3) Move vma's page tables to the new range.
676 : * 4) Free up any cleared pgd range.
677 : * 5) Shrink the vma to cover only the new range.
678 : */
679 0 : static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
680 : {
681 0 : struct mm_struct *mm = vma->vm_mm;
682 0 : unsigned long old_start = vma->vm_start;
683 0 : unsigned long old_end = vma->vm_end;
684 0 : unsigned long length = old_end - old_start;
685 0 : unsigned long new_start = old_start - shift;
686 0 : unsigned long new_end = old_end - shift;
687 0 : VMA_ITERATOR(vmi, mm, new_start);
688 : struct vm_area_struct *next;
689 : struct mmu_gather tlb;
690 :
691 0 : BUG_ON(new_start > new_end);
692 :
693 : /*
694 : * ensure there are no vmas between where we want to go
695 : * and where we are
696 : */
697 0 : if (vma != vma_next(&vmi))
698 : return -EFAULT;
699 :
700 : /*
701 : * cover the whole range: [new_start, old_end)
702 : */
703 0 : if (vma_expand(&vmi, vma, new_start, old_end, vma->vm_pgoff, NULL))
704 : return -ENOMEM;
705 :
706 : /*
707 : * move the page tables downwards, on failure we rely on
708 : * process cleanup to remove whatever mess we made.
709 : */
710 0 : if (length != move_page_tables(vma, old_start,
711 : vma, new_start, length, false))
712 : return -ENOMEM;
713 :
714 0 : lru_add_drain();
715 0 : tlb_gather_mmu(&tlb, mm);
716 0 : next = vma_next(&vmi);
717 0 : if (new_end > old_start) {
718 : /*
719 : * when the old and new regions overlap clear from new_end.
720 : */
721 0 : free_pgd_range(&tlb, new_end, old_end, new_end,
722 : next ? next->vm_start : USER_PGTABLES_CEILING);
723 : } else {
724 : /*
725 : * otherwise, clean from old_start; this is done to not touch
726 : * the address space in [new_end, old_start) some architectures
727 : * have constraints on va-space that make this illegal (IA64) -
728 : * for the others its just a little faster.
729 : */
730 0 : free_pgd_range(&tlb, old_start, old_end, new_end,
731 : next ? next->vm_start : USER_PGTABLES_CEILING);
732 : }
733 0 : tlb_finish_mmu(&tlb);
734 :
735 0 : vma_prev(&vmi);
736 : /* Shrink the vma to just the new range */
737 0 : return vma_shrink(&vmi, vma, new_start, new_end, vma->vm_pgoff);
738 : }
739 :
740 : /*
741 : * Finalizes the stack vm_area_struct. The flags and permissions are updated,
742 : * the stack is optionally relocated, and some extra space is added.
743 : */
744 0 : int setup_arg_pages(struct linux_binprm *bprm,
745 : unsigned long stack_top,
746 : int executable_stack)
747 : {
748 : unsigned long ret;
749 : unsigned long stack_shift;
750 0 : struct mm_struct *mm = current->mm;
751 0 : struct vm_area_struct *vma = bprm->vma;
752 0 : struct vm_area_struct *prev = NULL;
753 : unsigned long vm_flags;
754 : unsigned long stack_base;
755 : unsigned long stack_size;
756 : unsigned long stack_expand;
757 : unsigned long rlim_stack;
758 : struct mmu_gather tlb;
759 : struct vma_iterator vmi;
760 :
761 : #ifdef CONFIG_STACK_GROWSUP
762 : /* Limit stack size */
763 : stack_base = bprm->rlim_stack.rlim_max;
764 :
765 : stack_base = calc_max_stack_size(stack_base);
766 :
767 : /* Add space for stack randomization. */
768 : stack_base += (STACK_RND_MASK << PAGE_SHIFT);
769 :
770 : /* Make sure we didn't let the argument array grow too large. */
771 : if (vma->vm_end - vma->vm_start > stack_base)
772 : return -ENOMEM;
773 :
774 : stack_base = PAGE_ALIGN(stack_top - stack_base);
775 :
776 : stack_shift = vma->vm_start - stack_base;
777 : mm->arg_start = bprm->p - stack_shift;
778 : bprm->p = vma->vm_end - stack_shift;
779 : #else
780 0 : stack_top = arch_align_stack(stack_top);
781 0 : stack_top = PAGE_ALIGN(stack_top);
782 :
783 0 : if (unlikely(stack_top < mmap_min_addr) ||
784 0 : unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr))
785 : return -ENOMEM;
786 :
787 0 : stack_shift = vma->vm_end - stack_top;
788 :
789 0 : bprm->p -= stack_shift;
790 0 : mm->arg_start = bprm->p;
791 : #endif
792 :
793 0 : if (bprm->loader)
794 0 : bprm->loader -= stack_shift;
795 0 : bprm->exec -= stack_shift;
796 :
797 0 : if (mmap_write_lock_killable(mm))
798 : return -EINTR;
799 :
800 0 : vm_flags = VM_STACK_FLAGS;
801 :
802 : /*
803 : * Adjust stack execute permissions; explicitly enable for
804 : * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone
805 : * (arch default) otherwise.
806 : */
807 0 : if (unlikely(executable_stack == EXSTACK_ENABLE_X))
808 : vm_flags |= VM_EXEC;
809 0 : else if (executable_stack == EXSTACK_DISABLE_X)
810 0 : vm_flags &= ~VM_EXEC;
811 0 : vm_flags |= mm->def_flags;
812 0 : vm_flags |= VM_STACK_INCOMPLETE_SETUP;
813 :
814 0 : vma_iter_init(&vmi, mm, vma->vm_start);
815 :
816 0 : tlb_gather_mmu(&tlb, mm);
817 0 : ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end,
818 : vm_flags);
819 0 : tlb_finish_mmu(&tlb);
820 :
821 0 : if (ret)
822 : goto out_unlock;
823 0 : BUG_ON(prev != vma);
824 :
825 0 : if (unlikely(vm_flags & VM_EXEC)) {
826 0 : pr_warn_once("process '%pD4' started with executable stack\n",
827 : bprm->file);
828 : }
829 :
830 : /* Move stack pages down in memory. */
831 0 : if (stack_shift) {
832 0 : ret = shift_arg_pages(vma, stack_shift);
833 0 : if (ret)
834 : goto out_unlock;
835 : }
836 :
837 : /* mprotect_fixup is overkill to remove the temporary stack flags */
838 0 : vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP);
839 :
840 0 : stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */
841 0 : stack_size = vma->vm_end - vma->vm_start;
842 : /*
843 : * Align this down to a page boundary as expand_stack
844 : * will align it up.
845 : */
846 0 : rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK;
847 :
848 0 : stack_expand = min(rlim_stack, stack_size + stack_expand);
849 :
850 : #ifdef CONFIG_STACK_GROWSUP
851 : stack_base = vma->vm_start + stack_expand;
852 : #else
853 0 : stack_base = vma->vm_end - stack_expand;
854 : #endif
855 0 : current->mm->start_stack = bprm->p;
856 0 : ret = expand_stack(vma, stack_base);
857 0 : if (ret)
858 0 : ret = -EFAULT;
859 :
860 : out_unlock:
861 0 : mmap_write_unlock(mm);
862 0 : return ret;
863 : }
864 : EXPORT_SYMBOL(setup_arg_pages);
865 :
866 : #else
867 :
868 : /*
869 : * Transfer the program arguments and environment from the holding pages
870 : * onto the stack. The provided stack pointer is adjusted accordingly.
871 : */
872 : int transfer_args_to_stack(struct linux_binprm *bprm,
873 : unsigned long *sp_location)
874 : {
875 : unsigned long index, stop, sp;
876 : int ret = 0;
877 :
878 : stop = bprm->p >> PAGE_SHIFT;
879 : sp = *sp_location;
880 :
881 : for (index = MAX_ARG_PAGES - 1; index >= stop; index--) {
882 : unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0;
883 : char *src = kmap_local_page(bprm->page[index]) + offset;
884 : sp -= PAGE_SIZE - offset;
885 : if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0)
886 : ret = -EFAULT;
887 : kunmap_local(src);
888 : if (ret)
889 : goto out;
890 : }
891 :
892 : *sp_location = sp;
893 :
894 : out:
895 : return ret;
896 : }
897 : EXPORT_SYMBOL(transfer_args_to_stack);
898 :
899 : #endif /* CONFIG_MMU */
900 :
901 0 : static struct file *do_open_execat(int fd, struct filename *name, int flags)
902 : {
903 : struct file *file;
904 : int err;
905 0 : struct open_flags open_exec_flags = {
906 : .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
907 : .acc_mode = MAY_EXEC,
908 : .intent = LOOKUP_OPEN,
909 : .lookup_flags = LOOKUP_FOLLOW,
910 : };
911 :
912 0 : if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
913 : return ERR_PTR(-EINVAL);
914 0 : if (flags & AT_SYMLINK_NOFOLLOW)
915 0 : open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
916 0 : if (flags & AT_EMPTY_PATH)
917 0 : open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
918 :
919 0 : file = do_filp_open(fd, name, &open_exec_flags);
920 0 : if (IS_ERR(file))
921 : goto out;
922 :
923 : /*
924 : * may_open() has already checked for this, so it should be
925 : * impossible to trip now. But we need to be extra cautious
926 : * and check again at the very end too.
927 : */
928 0 : err = -EACCES;
929 0 : if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
930 : path_noexec(&file->f_path)))
931 : goto exit;
932 :
933 0 : err = deny_write_access(file);
934 0 : if (err)
935 : goto exit;
936 :
937 0 : if (name->name[0] != '\0')
938 0 : fsnotify_open(file);
939 :
940 : out:
941 : return file;
942 :
943 : exit:
944 0 : fput(file);
945 0 : return ERR_PTR(err);
946 : }
947 :
948 0 : struct file *open_exec(const char *name)
949 : {
950 0 : struct filename *filename = getname_kernel(name);
951 0 : struct file *f = ERR_CAST(filename);
952 :
953 0 : if (!IS_ERR(filename)) {
954 0 : f = do_open_execat(AT_FDCWD, filename, 0);
955 0 : putname(filename);
956 : }
957 0 : return f;
958 : }
959 : EXPORT_SYMBOL(open_exec);
960 :
961 : #if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC)
962 : ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
963 : {
964 : ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
965 : if (res > 0)
966 : flush_icache_user_range(addr, addr + len);
967 : return res;
968 : }
969 : EXPORT_SYMBOL(read_code);
970 : #endif
971 :
972 : /*
973 : * Maps the mm_struct mm into the current task struct.
974 : * On success, this function returns with exec_update_lock
975 : * held for writing.
976 : */
977 0 : static int exec_mmap(struct mm_struct *mm)
978 : {
979 : struct task_struct *tsk;
980 : struct mm_struct *old_mm, *active_mm;
981 : int ret;
982 :
983 : /* Notify parent that we're no longer interested in the old VM */
984 0 : tsk = current;
985 0 : old_mm = current->mm;
986 0 : exec_mm_release(tsk, old_mm);
987 : if (old_mm)
988 : sync_mm_rss(old_mm);
989 :
990 0 : ret = down_write_killable(&tsk->signal->exec_update_lock);
991 0 : if (ret)
992 : return ret;
993 :
994 0 : if (old_mm) {
995 : /*
996 : * If there is a pending fatal signal perhaps a signal
997 : * whose default action is to create a coredump get
998 : * out and die instead of going through with the exec.
999 : */
1000 0 : ret = mmap_read_lock_killable(old_mm);
1001 0 : if (ret) {
1002 0 : up_write(&tsk->signal->exec_update_lock);
1003 0 : return ret;
1004 : }
1005 : }
1006 :
1007 0 : task_lock(tsk);
1008 0 : membarrier_exec_mmap(mm);
1009 :
1010 : local_irq_disable();
1011 0 : active_mm = tsk->active_mm;
1012 0 : tsk->active_mm = mm;
1013 0 : tsk->mm = mm;
1014 0 : mm_init_cid(mm);
1015 : /*
1016 : * This prevents preemption while active_mm is being loaded and
1017 : * it and mm are being updated, which could cause problems for
1018 : * lazy tlb mm refcounting when these are updated by context
1019 : * switches. Not all architectures can handle irqs off over
1020 : * activate_mm yet.
1021 : */
1022 : if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1023 : local_irq_enable();
1024 0 : activate_mm(active_mm, mm);
1025 : if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
1026 : local_irq_enable();
1027 0 : lru_gen_add_mm(mm);
1028 0 : task_unlock(tsk);
1029 0 : lru_gen_use_mm(mm);
1030 0 : if (old_mm) {
1031 0 : mmap_read_unlock(old_mm);
1032 0 : BUG_ON(active_mm != old_mm);
1033 0 : setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm);
1034 0 : mm_update_next_owner(old_mm);
1035 0 : mmput(old_mm);
1036 0 : return 0;
1037 : }
1038 : mmdrop_lazy_tlb(active_mm);
1039 : return 0;
1040 : }
1041 :
1042 0 : static int de_thread(struct task_struct *tsk)
1043 : {
1044 0 : struct signal_struct *sig = tsk->signal;
1045 0 : struct sighand_struct *oldsighand = tsk->sighand;
1046 0 : spinlock_t *lock = &oldsighand->siglock;
1047 :
1048 0 : if (thread_group_empty(tsk))
1049 : goto no_thread_group;
1050 :
1051 : /*
1052 : * Kill all other threads in the thread group.
1053 : */
1054 0 : spin_lock_irq(lock);
1055 0 : if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) {
1056 : /*
1057 : * Another group action in progress, just
1058 : * return so that the signal is processed.
1059 : */
1060 0 : spin_unlock_irq(lock);
1061 0 : return -EAGAIN;
1062 : }
1063 :
1064 0 : sig->group_exec_task = tsk;
1065 0 : sig->notify_count = zap_other_threads(tsk);
1066 0 : if (!thread_group_leader(tsk))
1067 0 : sig->notify_count--;
1068 :
1069 0 : while (sig->notify_count) {
1070 0 : __set_current_state(TASK_KILLABLE);
1071 0 : spin_unlock_irq(lock);
1072 0 : schedule();
1073 0 : if (__fatal_signal_pending(tsk))
1074 : goto killed;
1075 : spin_lock_irq(lock);
1076 : }
1077 0 : spin_unlock_irq(lock);
1078 :
1079 : /*
1080 : * At this point all other threads have exited, all we have to
1081 : * do is to wait for the thread group leader to become inactive,
1082 : * and to assume its PID:
1083 : */
1084 0 : if (!thread_group_leader(tsk)) {
1085 0 : struct task_struct *leader = tsk->group_leader;
1086 :
1087 : for (;;) {
1088 0 : cgroup_threadgroup_change_begin(tsk);
1089 0 : write_lock_irq(&tasklist_lock);
1090 : /*
1091 : * Do this under tasklist_lock to ensure that
1092 : * exit_notify() can't miss ->group_exec_task
1093 : */
1094 0 : sig->notify_count = -1;
1095 0 : if (likely(leader->exit_state))
1096 : break;
1097 0 : __set_current_state(TASK_KILLABLE);
1098 0 : write_unlock_irq(&tasklist_lock);
1099 0 : cgroup_threadgroup_change_end(tsk);
1100 0 : schedule();
1101 0 : if (__fatal_signal_pending(tsk))
1102 : goto killed;
1103 : }
1104 :
1105 : /*
1106 : * The only record we have of the real-time age of a
1107 : * process, regardless of execs it's done, is start_time.
1108 : * All the past CPU time is accumulated in signal_struct
1109 : * from sister threads now dead. But in this non-leader
1110 : * exec, nothing survives from the original leader thread,
1111 : * whose birth marks the true age of this process now.
1112 : * When we take on its identity by switching to its PID, we
1113 : * also take its birthdate (always earlier than our own).
1114 : */
1115 0 : tsk->start_time = leader->start_time;
1116 0 : tsk->start_boottime = leader->start_boottime;
1117 :
1118 0 : BUG_ON(!same_thread_group(leader, tsk));
1119 : /*
1120 : * An exec() starts a new thread group with the
1121 : * TGID of the previous thread group. Rehash the
1122 : * two threads with a switched PID, and release
1123 : * the former thread group leader:
1124 : */
1125 :
1126 : /* Become a process group leader with the old leader's pid.
1127 : * The old leader becomes a thread of the this thread group.
1128 : */
1129 0 : exchange_tids(tsk, leader);
1130 0 : transfer_pid(leader, tsk, PIDTYPE_TGID);
1131 0 : transfer_pid(leader, tsk, PIDTYPE_PGID);
1132 0 : transfer_pid(leader, tsk, PIDTYPE_SID);
1133 :
1134 0 : list_replace_rcu(&leader->tasks, &tsk->tasks);
1135 0 : list_replace_init(&leader->sibling, &tsk->sibling);
1136 :
1137 0 : tsk->group_leader = tsk;
1138 0 : leader->group_leader = tsk;
1139 :
1140 0 : tsk->exit_signal = SIGCHLD;
1141 0 : leader->exit_signal = -1;
1142 :
1143 0 : BUG_ON(leader->exit_state != EXIT_ZOMBIE);
1144 0 : leader->exit_state = EXIT_DEAD;
1145 :
1146 : /*
1147 : * We are going to release_task()->ptrace_unlink() silently,
1148 : * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
1149 : * the tracer won't block again waiting for this thread.
1150 : */
1151 0 : if (unlikely(leader->ptrace))
1152 0 : __wake_up_parent(leader, leader->parent);
1153 0 : write_unlock_irq(&tasklist_lock);
1154 0 : cgroup_threadgroup_change_end(tsk);
1155 :
1156 0 : release_task(leader);
1157 : }
1158 :
1159 0 : sig->group_exec_task = NULL;
1160 0 : sig->notify_count = 0;
1161 :
1162 : no_thread_group:
1163 : /* we have changed execution domain */
1164 0 : tsk->exit_signal = SIGCHLD;
1165 :
1166 0 : BUG_ON(!thread_group_leader(tsk));
1167 : return 0;
1168 :
1169 : killed:
1170 : /* protects against exit_notify() and __exit_signal() */
1171 0 : read_lock(&tasklist_lock);
1172 0 : sig->group_exec_task = NULL;
1173 0 : sig->notify_count = 0;
1174 0 : read_unlock(&tasklist_lock);
1175 0 : return -EAGAIN;
1176 : }
1177 :
1178 :
1179 : /*
1180 : * This function makes sure the current process has its own signal table,
1181 : * so that flush_signal_handlers can later reset the handlers without
1182 : * disturbing other processes. (Other processes might share the signal
1183 : * table via the CLONE_SIGHAND option to clone().)
1184 : */
1185 0 : static int unshare_sighand(struct task_struct *me)
1186 : {
1187 0 : struct sighand_struct *oldsighand = me->sighand;
1188 :
1189 0 : if (refcount_read(&oldsighand->count) != 1) {
1190 : struct sighand_struct *newsighand;
1191 : /*
1192 : * This ->sighand is shared with the CLONE_SIGHAND
1193 : * but not CLONE_THREAD task, switch to the new one.
1194 : */
1195 0 : newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
1196 0 : if (!newsighand)
1197 : return -ENOMEM;
1198 :
1199 0 : refcount_set(&newsighand->count, 1);
1200 :
1201 0 : write_lock_irq(&tasklist_lock);
1202 0 : spin_lock(&oldsighand->siglock);
1203 0 : memcpy(newsighand->action, oldsighand->action,
1204 : sizeof(newsighand->action));
1205 0 : rcu_assign_pointer(me->sighand, newsighand);
1206 0 : spin_unlock(&oldsighand->siglock);
1207 0 : write_unlock_irq(&tasklist_lock);
1208 :
1209 0 : __cleanup_sighand(oldsighand);
1210 : }
1211 : return 0;
1212 : }
1213 :
1214 0 : char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
1215 : {
1216 0 : task_lock(tsk);
1217 : /* Always NUL terminated and zero-padded */
1218 0 : strscpy_pad(buf, tsk->comm, buf_size);
1219 0 : task_unlock(tsk);
1220 0 : return buf;
1221 : }
1222 : EXPORT_SYMBOL_GPL(__get_task_comm);
1223 :
1224 : /*
1225 : * These functions flushes out all traces of the currently running executable
1226 : * so that a new one can be started
1227 : */
1228 :
1229 1 : void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
1230 : {
1231 1 : task_lock(tsk);
1232 1 : trace_task_rename(tsk, buf);
1233 1 : strscpy_pad(tsk->comm, buf, sizeof(tsk->comm));
1234 1 : task_unlock(tsk);
1235 1 : perf_event_comm(tsk, exec);
1236 1 : }
1237 :
1238 : /*
1239 : * Calling this is the point of no return. None of the failures will be
1240 : * seen by userspace since either the process is already taking a fatal
1241 : * signal (via de_thread() or coredump), or will have SEGV raised
1242 : * (after exec_mmap()) by search_binary_handler (see below).
1243 : */
1244 0 : int begin_new_exec(struct linux_binprm * bprm)
1245 : {
1246 0 : struct task_struct *me = current;
1247 : int retval;
1248 :
1249 : /* Once we are committed compute the creds */
1250 0 : retval = bprm_creds_from_file(bprm);
1251 0 : if (retval)
1252 : return retval;
1253 :
1254 : /*
1255 : * Ensure all future errors are fatal.
1256 : */
1257 0 : bprm->point_of_no_return = true;
1258 :
1259 : /*
1260 : * Make this the only thread in the thread group.
1261 : */
1262 0 : retval = de_thread(me);
1263 0 : if (retval)
1264 : goto out;
1265 :
1266 : /*
1267 : * Cancel any io_uring activity across execve
1268 : */
1269 0 : io_uring_task_cancel();
1270 :
1271 : /* Ensure the files table is not shared. */
1272 0 : retval = unshare_files();
1273 0 : if (retval)
1274 : goto out;
1275 :
1276 : /*
1277 : * Must be called _before_ exec_mmap() as bprm->mm is
1278 : * not visible until then. This also enables the update
1279 : * to be lockless.
1280 : */
1281 0 : retval = set_mm_exe_file(bprm->mm, bprm->file);
1282 0 : if (retval)
1283 : goto out;
1284 :
1285 : /* If the binary is not readable then enforce mm->dumpable=0 */
1286 0 : would_dump(bprm, bprm->file);
1287 0 : if (bprm->have_execfd)
1288 0 : would_dump(bprm, bprm->executable);
1289 :
1290 : /*
1291 : * Release all of the old mmap stuff
1292 : */
1293 0 : acct_arg_size(bprm, 0);
1294 0 : retval = exec_mmap(bprm->mm);
1295 0 : if (retval)
1296 : goto out;
1297 :
1298 0 : bprm->mm = NULL;
1299 :
1300 0 : retval = exec_task_namespaces();
1301 0 : if (retval)
1302 : goto out_unlock;
1303 :
1304 : #ifdef CONFIG_POSIX_TIMERS
1305 0 : spin_lock_irq(&me->sighand->siglock);
1306 0 : posix_cpu_timers_exit(me);
1307 0 : spin_unlock_irq(&me->sighand->siglock);
1308 0 : exit_itimers(me);
1309 0 : flush_itimer_signals();
1310 : #endif
1311 :
1312 : /*
1313 : * Make the signal table private.
1314 : */
1315 0 : retval = unshare_sighand(me);
1316 0 : if (retval)
1317 : goto out_unlock;
1318 :
1319 0 : me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC |
1320 : PF_NOFREEZE | PF_NO_SETAFFINITY);
1321 0 : flush_thread();
1322 0 : me->personality &= ~bprm->per_clear;
1323 :
1324 0 : clear_syscall_work_syscall_user_dispatch(me);
1325 :
1326 : /*
1327 : * We have to apply CLOEXEC before we change whether the process is
1328 : * dumpable (in setup_new_exec) to avoid a race with a process in userspace
1329 : * trying to access the should-be-closed file descriptors of a process
1330 : * undergoing exec(2).
1331 : */
1332 0 : do_close_on_exec(me->files);
1333 :
1334 0 : if (bprm->secureexec) {
1335 : /* Make sure parent cannot signal privileged process. */
1336 0 : me->pdeath_signal = 0;
1337 :
1338 : /*
1339 : * For secureexec, reset the stack limit to sane default to
1340 : * avoid bad behavior from the prior rlimits. This has to
1341 : * happen before arch_pick_mmap_layout(), which examines
1342 : * RLIMIT_STACK, but after the point of no return to avoid
1343 : * needing to clean up the change on failure.
1344 : */
1345 0 : if (bprm->rlim_stack.rlim_cur > _STK_LIM)
1346 0 : bprm->rlim_stack.rlim_cur = _STK_LIM;
1347 : }
1348 :
1349 0 : me->sas_ss_sp = me->sas_ss_size = 0;
1350 :
1351 : /*
1352 : * Figure out dumpability. Note that this checking only of current
1353 : * is wrong, but userspace depends on it. This should be testing
1354 : * bprm->secureexec instead.
1355 : */
1356 0 : if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
1357 0 : !(uid_eq(current_euid(), current_uid()) &&
1358 0 : gid_eq(current_egid(), current_gid())))
1359 0 : set_dumpable(current->mm, suid_dumpable);
1360 : else
1361 0 : set_dumpable(current->mm, SUID_DUMP_USER);
1362 :
1363 : perf_event_exec();
1364 0 : __set_task_comm(me, kbasename(bprm->filename), true);
1365 :
1366 : /* An exec changes our domain. We are no longer part of the thread
1367 : group */
1368 0 : WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
1369 0 : flush_signal_handlers(me, 0);
1370 :
1371 0 : retval = set_cred_ucounts(bprm->cred);
1372 0 : if (retval < 0)
1373 : goto out_unlock;
1374 :
1375 : /*
1376 : * install the new credentials for this executable
1377 : */
1378 0 : security_bprm_committing_creds(bprm);
1379 :
1380 0 : commit_creds(bprm->cred);
1381 0 : bprm->cred = NULL;
1382 :
1383 : /*
1384 : * Disable monitoring for regular users
1385 : * when executing setuid binaries. Must
1386 : * wait until new credentials are committed
1387 : * by commit_creds() above
1388 : */
1389 0 : if (get_dumpable(me->mm) != SUID_DUMP_USER)
1390 : perf_event_exit_task(me);
1391 : /*
1392 : * cred_guard_mutex must be held at least to this point to prevent
1393 : * ptrace_attach() from altering our determination of the task's
1394 : * credentials; any time after this it may be unlocked.
1395 : */
1396 0 : security_bprm_committed_creds(bprm);
1397 :
1398 : /* Pass the opened binary to the interpreter. */
1399 0 : if (bprm->have_execfd) {
1400 0 : retval = get_unused_fd_flags(0);
1401 0 : if (retval < 0)
1402 : goto out_unlock;
1403 0 : fd_install(retval, bprm->executable);
1404 0 : bprm->executable = NULL;
1405 0 : bprm->execfd = retval;
1406 : }
1407 : return 0;
1408 :
1409 : out_unlock:
1410 0 : up_write(&me->signal->exec_update_lock);
1411 : out:
1412 : return retval;
1413 : }
1414 : EXPORT_SYMBOL(begin_new_exec);
1415 :
1416 0 : void would_dump(struct linux_binprm *bprm, struct file *file)
1417 : {
1418 0 : struct inode *inode = file_inode(file);
1419 0 : struct mnt_idmap *idmap = file_mnt_idmap(file);
1420 0 : if (inode_permission(idmap, inode, MAY_READ) < 0) {
1421 : struct user_namespace *old, *user_ns;
1422 0 : bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1423 :
1424 : /* Ensure mm->user_ns contains the executable */
1425 0 : user_ns = old = bprm->mm->user_ns;
1426 0 : while ((user_ns != &init_user_ns) &&
1427 0 : !privileged_wrt_inode_uidgid(user_ns, idmap, inode))
1428 0 : user_ns = user_ns->parent;
1429 :
1430 0 : if (old != user_ns) {
1431 0 : bprm->mm->user_ns = get_user_ns(user_ns);
1432 0 : put_user_ns(old);
1433 : }
1434 : }
1435 0 : }
1436 : EXPORT_SYMBOL(would_dump);
1437 :
1438 0 : void setup_new_exec(struct linux_binprm * bprm)
1439 : {
1440 : /* Setup things that can depend upon the personality */
1441 0 : struct task_struct *me = current;
1442 :
1443 0 : arch_pick_mmap_layout(me->mm, &bprm->rlim_stack);
1444 :
1445 : arch_setup_new_exec();
1446 :
1447 : /* Set the new mm task size. We have to do that late because it may
1448 : * depend on TIF_32BIT which is only updated in flush_thread() on
1449 : * some architectures like powerpc
1450 : */
1451 0 : me->mm->task_size = TASK_SIZE;
1452 0 : up_write(&me->signal->exec_update_lock);
1453 0 : mutex_unlock(&me->signal->cred_guard_mutex);
1454 0 : }
1455 : EXPORT_SYMBOL(setup_new_exec);
1456 :
1457 : /* Runs immediately before start_thread() takes over. */
1458 0 : void finalize_exec(struct linux_binprm *bprm)
1459 : {
1460 : /* Store any stack rlimit changes before starting thread. */
1461 0 : task_lock(current->group_leader);
1462 0 : current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack;
1463 0 : task_unlock(current->group_leader);
1464 0 : }
1465 : EXPORT_SYMBOL(finalize_exec);
1466 :
1467 : /*
1468 : * Prepare credentials and lock ->cred_guard_mutex.
1469 : * setup_new_exec() commits the new creds and drops the lock.
1470 : * Or, if exec fails before, free_bprm() should release ->cred
1471 : * and unlock.
1472 : */
1473 0 : static int prepare_bprm_creds(struct linux_binprm *bprm)
1474 : {
1475 0 : if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex))
1476 : return -ERESTARTNOINTR;
1477 :
1478 0 : bprm->cred = prepare_exec_creds();
1479 0 : if (likely(bprm->cred))
1480 : return 0;
1481 :
1482 0 : mutex_unlock(¤t->signal->cred_guard_mutex);
1483 : return -ENOMEM;
1484 : }
1485 :
1486 0 : static void free_bprm(struct linux_binprm *bprm)
1487 : {
1488 0 : if (bprm->mm) {
1489 0 : acct_arg_size(bprm, 0);
1490 0 : mmput(bprm->mm);
1491 : }
1492 0 : free_arg_pages(bprm);
1493 0 : if (bprm->cred) {
1494 0 : mutex_unlock(¤t->signal->cred_guard_mutex);
1495 0 : abort_creds(bprm->cred);
1496 : }
1497 0 : if (bprm->file) {
1498 0 : allow_write_access(bprm->file);
1499 0 : fput(bprm->file);
1500 : }
1501 0 : if (bprm->executable)
1502 0 : fput(bprm->executable);
1503 : /* If a binfmt changed the interp, free it. */
1504 0 : if (bprm->interp != bprm->filename)
1505 0 : kfree(bprm->interp);
1506 0 : kfree(bprm->fdpath);
1507 0 : kfree(bprm);
1508 0 : }
1509 :
1510 0 : static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
1511 : {
1512 0 : struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
1513 0 : int retval = -ENOMEM;
1514 0 : if (!bprm)
1515 : goto out;
1516 :
1517 0 : if (fd == AT_FDCWD || filename->name[0] == '/') {
1518 0 : bprm->filename = filename->name;
1519 : } else {
1520 0 : if (filename->name[0] == '\0')
1521 0 : bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd);
1522 : else
1523 0 : bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s",
1524 : fd, filename->name);
1525 0 : if (!bprm->fdpath)
1526 : goto out_free;
1527 :
1528 0 : bprm->filename = bprm->fdpath;
1529 : }
1530 0 : bprm->interp = bprm->filename;
1531 :
1532 0 : retval = bprm_mm_init(bprm);
1533 0 : if (retval)
1534 : goto out_free;
1535 : return bprm;
1536 :
1537 : out_free:
1538 0 : free_bprm(bprm);
1539 : out:
1540 0 : return ERR_PTR(retval);
1541 : }
1542 :
1543 0 : int bprm_change_interp(const char *interp, struct linux_binprm *bprm)
1544 : {
1545 : /* If a binfmt changed the interp, free it first. */
1546 0 : if (bprm->interp != bprm->filename)
1547 0 : kfree(bprm->interp);
1548 0 : bprm->interp = kstrdup(interp, GFP_KERNEL);
1549 0 : if (!bprm->interp)
1550 : return -ENOMEM;
1551 0 : return 0;
1552 : }
1553 : EXPORT_SYMBOL(bprm_change_interp);
1554 :
1555 : /*
1556 : * determine how safe it is to execute the proposed program
1557 : * - the caller must hold ->cred_guard_mutex to protect against
1558 : * PTRACE_ATTACH or seccomp thread-sync
1559 : */
1560 0 : static void check_unsafe_exec(struct linux_binprm *bprm)
1561 : {
1562 0 : struct task_struct *p = current, *t;
1563 : unsigned n_fs;
1564 :
1565 0 : if (p->ptrace)
1566 0 : bprm->unsafe |= LSM_UNSAFE_PTRACE;
1567 :
1568 : /*
1569 : * This isn't strictly necessary, but it makes it harder for LSMs to
1570 : * mess up.
1571 : */
1572 0 : if (task_no_new_privs(current))
1573 0 : bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
1574 :
1575 : /*
1576 : * If another task is sharing our fs, we cannot safely
1577 : * suid exec because the differently privileged task
1578 : * will be able to manipulate the current directory, etc.
1579 : * It would be nice to force an unshare instead...
1580 : */
1581 0 : t = p;
1582 0 : n_fs = 1;
1583 0 : spin_lock(&p->fs->lock);
1584 : rcu_read_lock();
1585 0 : while_each_thread(p, t) {
1586 0 : if (t->fs == p->fs)
1587 0 : n_fs++;
1588 : }
1589 : rcu_read_unlock();
1590 :
1591 0 : if (p->fs->users > n_fs)
1592 0 : bprm->unsafe |= LSM_UNSAFE_SHARE;
1593 : else
1594 0 : p->fs->in_exec = 1;
1595 0 : spin_unlock(&p->fs->lock);
1596 0 : }
1597 :
1598 0 : static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
1599 : {
1600 : /* Handle suid and sgid on files */
1601 : struct mnt_idmap *idmap;
1602 0 : struct inode *inode = file_inode(file);
1603 : unsigned int mode;
1604 : vfsuid_t vfsuid;
1605 : vfsgid_t vfsgid;
1606 :
1607 0 : if (!mnt_may_suid(file->f_path.mnt))
1608 : return;
1609 :
1610 0 : if (task_no_new_privs(current))
1611 : return;
1612 :
1613 0 : mode = READ_ONCE(inode->i_mode);
1614 0 : if (!(mode & (S_ISUID|S_ISGID)))
1615 : return;
1616 :
1617 0 : idmap = file_mnt_idmap(file);
1618 :
1619 : /* Be careful if suid/sgid is set */
1620 0 : inode_lock(inode);
1621 :
1622 : /* reload atomically mode/uid/gid now that lock held */
1623 0 : mode = inode->i_mode;
1624 0 : vfsuid = i_uid_into_vfsuid(idmap, inode);
1625 0 : vfsgid = i_gid_into_vfsgid(idmap, inode);
1626 0 : inode_unlock(inode);
1627 :
1628 : /* We ignore suid/sgid if there are no mappings for them in the ns */
1629 0 : if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) ||
1630 0 : !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid))
1631 : return;
1632 :
1633 0 : if (mode & S_ISUID) {
1634 0 : bprm->per_clear |= PER_CLEAR_ON_SETID;
1635 0 : bprm->cred->euid = vfsuid_into_kuid(vfsuid);
1636 : }
1637 :
1638 0 : if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1639 0 : bprm->per_clear |= PER_CLEAR_ON_SETID;
1640 0 : bprm->cred->egid = vfsgid_into_kgid(vfsgid);
1641 : }
1642 : }
1643 :
1644 : /*
1645 : * Compute brpm->cred based upon the final binary.
1646 : */
1647 0 : static int bprm_creds_from_file(struct linux_binprm *bprm)
1648 : {
1649 : /* Compute creds based on which file? */
1650 0 : struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file;
1651 :
1652 0 : bprm_fill_uid(bprm, file);
1653 0 : return security_bprm_creds_from_file(bprm, file);
1654 : }
1655 :
1656 : /*
1657 : * Fill the binprm structure from the inode.
1658 : * Read the first BINPRM_BUF_SIZE bytes
1659 : *
1660 : * This may be called multiple times for binary chains (scripts for example).
1661 : */
1662 0 : static int prepare_binprm(struct linux_binprm *bprm)
1663 : {
1664 0 : loff_t pos = 0;
1665 :
1666 0 : memset(bprm->buf, 0, BINPRM_BUF_SIZE);
1667 0 : return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos);
1668 : }
1669 :
1670 : /*
1671 : * Arguments are '\0' separated strings found at the location bprm->p
1672 : * points to; chop off the first by relocating brpm->p to right after
1673 : * the first '\0' encountered.
1674 : */
1675 0 : int remove_arg_zero(struct linux_binprm *bprm)
1676 : {
1677 0 : int ret = 0;
1678 : unsigned long offset;
1679 : char *kaddr;
1680 : struct page *page;
1681 :
1682 0 : if (!bprm->argc)
1683 : return 0;
1684 :
1685 : do {
1686 0 : offset = bprm->p & ~PAGE_MASK;
1687 0 : page = get_arg_page(bprm, bprm->p, 0);
1688 0 : if (!page) {
1689 : ret = -EFAULT;
1690 : goto out;
1691 : }
1692 0 : kaddr = kmap_local_page(page);
1693 :
1694 0 : for (; offset < PAGE_SIZE && kaddr[offset];
1695 0 : offset++, bprm->p++)
1696 : ;
1697 :
1698 0 : kunmap_local(kaddr);
1699 0 : put_arg_page(page);
1700 0 : } while (offset == PAGE_SIZE);
1701 :
1702 0 : bprm->p++;
1703 0 : bprm->argc--;
1704 0 : ret = 0;
1705 :
1706 : out:
1707 : return ret;
1708 : }
1709 : EXPORT_SYMBOL(remove_arg_zero);
1710 :
1711 : #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
1712 : /*
1713 : * cycle the list of binary formats handler, until one recognizes the image
1714 : */
1715 0 : static int search_binary_handler(struct linux_binprm *bprm)
1716 : {
1717 0 : bool need_retry = IS_ENABLED(CONFIG_MODULES);
1718 : struct linux_binfmt *fmt;
1719 : int retval;
1720 :
1721 0 : retval = prepare_binprm(bprm);
1722 0 : if (retval < 0)
1723 : return retval;
1724 :
1725 0 : retval = security_bprm_check(bprm);
1726 : if (retval)
1727 : return retval;
1728 :
1729 0 : retval = -ENOENT;
1730 : retry:
1731 0 : read_lock(&binfmt_lock);
1732 0 : list_for_each_entry(fmt, &formats, lh) {
1733 0 : if (!try_module_get(fmt->module))
1734 : continue;
1735 0 : read_unlock(&binfmt_lock);
1736 :
1737 0 : retval = fmt->load_binary(bprm);
1738 :
1739 0 : read_lock(&binfmt_lock);
1740 0 : put_binfmt(fmt);
1741 0 : if (bprm->point_of_no_return || (retval != -ENOEXEC)) {
1742 0 : read_unlock(&binfmt_lock);
1743 0 : return retval;
1744 : }
1745 : }
1746 0 : read_unlock(&binfmt_lock);
1747 :
1748 : if (need_retry) {
1749 : if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
1750 : printable(bprm->buf[2]) && printable(bprm->buf[3]))
1751 : return retval;
1752 : if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
1753 : return retval;
1754 : need_retry = false;
1755 : goto retry;
1756 : }
1757 :
1758 0 : return retval;
1759 : }
1760 :
1761 : /* binfmt handlers will call back into begin_new_exec() on success. */
1762 0 : static int exec_binprm(struct linux_binprm *bprm)
1763 : {
1764 : pid_t old_pid, old_vpid;
1765 : int ret, depth;
1766 :
1767 : /* Need to fetch pid before load_binary changes it */
1768 0 : old_pid = current->pid;
1769 : rcu_read_lock();
1770 0 : old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
1771 : rcu_read_unlock();
1772 :
1773 : /* This allows 4 levels of binfmt rewrites before failing hard. */
1774 0 : for (depth = 0;; depth++) {
1775 : struct file *exec;
1776 0 : if (depth > 5)
1777 : return -ELOOP;
1778 :
1779 0 : ret = search_binary_handler(bprm);
1780 0 : if (ret < 0)
1781 : return ret;
1782 0 : if (!bprm->interpreter)
1783 : break;
1784 :
1785 0 : exec = bprm->file;
1786 0 : bprm->file = bprm->interpreter;
1787 0 : bprm->interpreter = NULL;
1788 :
1789 0 : allow_write_access(exec);
1790 0 : if (unlikely(bprm->have_execfd)) {
1791 0 : if (bprm->executable) {
1792 0 : fput(exec);
1793 0 : return -ENOEXEC;
1794 : }
1795 0 : bprm->executable = exec;
1796 : } else
1797 0 : fput(exec);
1798 : }
1799 :
1800 0 : audit_bprm(bprm);
1801 0 : trace_sched_process_exec(current, old_pid, bprm);
1802 0 : ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
1803 0 : proc_exec_connector(current);
1804 0 : return 0;
1805 : }
1806 :
1807 : /*
1808 : * sys_execve() executes a new program.
1809 : */
1810 0 : static int bprm_execve(struct linux_binprm *bprm,
1811 : int fd, struct filename *filename, int flags)
1812 : {
1813 : struct file *file;
1814 : int retval;
1815 :
1816 0 : retval = prepare_bprm_creds(bprm);
1817 0 : if (retval)
1818 : return retval;
1819 :
1820 : /*
1821 : * Check for unsafe execution states before exec_binprm(), which
1822 : * will call back into begin_new_exec(), into bprm_creds_from_file(),
1823 : * where setuid-ness is evaluated.
1824 : */
1825 0 : check_unsafe_exec(bprm);
1826 0 : current->in_execve = 1;
1827 0 : sched_mm_cid_before_execve(current);
1828 :
1829 0 : file = do_open_execat(fd, filename, flags);
1830 0 : retval = PTR_ERR(file);
1831 0 : if (IS_ERR(file))
1832 : goto out_unmark;
1833 :
1834 : sched_exec();
1835 :
1836 0 : bprm->file = file;
1837 : /*
1838 : * Record that a name derived from an O_CLOEXEC fd will be
1839 : * inaccessible after exec. This allows the code in exec to
1840 : * choose to fail when the executable is not mmaped into the
1841 : * interpreter and an open file descriptor is not passed to
1842 : * the interpreter. This makes for a better user experience
1843 : * than having the interpreter start and then immediately fail
1844 : * when it finds the executable is inaccessible.
1845 : */
1846 0 : if (bprm->fdpath && get_close_on_exec(fd))
1847 0 : bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
1848 :
1849 : /* Set the unchanging part of bprm->cred */
1850 0 : retval = security_bprm_creds_for_exec(bprm);
1851 : if (retval)
1852 : goto out;
1853 :
1854 0 : retval = exec_binprm(bprm);
1855 0 : if (retval < 0)
1856 : goto out;
1857 :
1858 0 : sched_mm_cid_after_execve(current);
1859 : /* execve succeeded */
1860 0 : current->fs->in_exec = 0;
1861 0 : current->in_execve = 0;
1862 0 : rseq_execve(current);
1863 0 : user_events_execve(current);
1864 0 : acct_update_integrals(current);
1865 0 : task_numa_free(current, false);
1866 0 : return retval;
1867 :
1868 : out:
1869 : /*
1870 : * If past the point of no return ensure the code never
1871 : * returns to the userspace process. Use an existing fatal
1872 : * signal if present otherwise terminate the process with
1873 : * SIGSEGV.
1874 : */
1875 0 : if (bprm->point_of_no_return && !fatal_signal_pending(current))
1876 0 : force_fatal_sig(SIGSEGV);
1877 :
1878 : out_unmark:
1879 0 : sched_mm_cid_after_execve(current);
1880 0 : current->fs->in_exec = 0;
1881 0 : current->in_execve = 0;
1882 :
1883 0 : return retval;
1884 : }
1885 :
1886 0 : static int do_execveat_common(int fd, struct filename *filename,
1887 : struct user_arg_ptr argv,
1888 : struct user_arg_ptr envp,
1889 : int flags)
1890 : {
1891 : struct linux_binprm *bprm;
1892 : int retval;
1893 :
1894 0 : if (IS_ERR(filename))
1895 0 : return PTR_ERR(filename);
1896 :
1897 : /*
1898 : * We move the actual failure in case of RLIMIT_NPROC excess from
1899 : * set*uid() to execve() because too many poorly written programs
1900 : * don't check setuid() return code. Here we additionally recheck
1901 : * whether NPROC limit is still exceeded.
1902 : */
1903 0 : if ((current->flags & PF_NPROC_EXCEEDED) &&
1904 0 : is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
1905 : retval = -EAGAIN;
1906 : goto out_ret;
1907 : }
1908 :
1909 : /* We're below the limit (still or again), so we don't want to make
1910 : * further execve() calls fail. */
1911 0 : current->flags &= ~PF_NPROC_EXCEEDED;
1912 :
1913 0 : bprm = alloc_bprm(fd, filename);
1914 0 : if (IS_ERR(bprm)) {
1915 0 : retval = PTR_ERR(bprm);
1916 0 : goto out_ret;
1917 : }
1918 :
1919 0 : retval = count(argv, MAX_ARG_STRINGS);
1920 0 : if (retval == 0)
1921 0 : pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
1922 : current->comm, bprm->filename);
1923 0 : if (retval < 0)
1924 : goto out_free;
1925 0 : bprm->argc = retval;
1926 :
1927 0 : retval = count(envp, MAX_ARG_STRINGS);
1928 0 : if (retval < 0)
1929 : goto out_free;
1930 0 : bprm->envc = retval;
1931 :
1932 0 : retval = bprm_stack_limits(bprm);
1933 0 : if (retval < 0)
1934 : goto out_free;
1935 :
1936 0 : retval = copy_string_kernel(bprm->filename, bprm);
1937 0 : if (retval < 0)
1938 : goto out_free;
1939 0 : bprm->exec = bprm->p;
1940 :
1941 0 : retval = copy_strings(bprm->envc, envp, bprm);
1942 0 : if (retval < 0)
1943 : goto out_free;
1944 :
1945 0 : retval = copy_strings(bprm->argc, argv, bprm);
1946 0 : if (retval < 0)
1947 : goto out_free;
1948 :
1949 : /*
1950 : * When argv is empty, add an empty string ("") as argv[0] to
1951 : * ensure confused userspace programs that start processing
1952 : * from argv[1] won't end up walking envp. See also
1953 : * bprm_stack_limits().
1954 : */
1955 0 : if (bprm->argc == 0) {
1956 0 : retval = copy_string_kernel("", bprm);
1957 0 : if (retval < 0)
1958 : goto out_free;
1959 0 : bprm->argc = 1;
1960 : }
1961 :
1962 0 : retval = bprm_execve(bprm, fd, filename, flags);
1963 : out_free:
1964 0 : free_bprm(bprm);
1965 :
1966 : out_ret:
1967 0 : putname(filename);
1968 0 : return retval;
1969 : }
1970 :
1971 0 : int kernel_execve(const char *kernel_filename,
1972 : const char *const *argv, const char *const *envp)
1973 : {
1974 : struct filename *filename;
1975 : struct linux_binprm *bprm;
1976 0 : int fd = AT_FDCWD;
1977 : int retval;
1978 :
1979 : /* It is non-sense for kernel threads to call execve */
1980 0 : if (WARN_ON_ONCE(current->flags & PF_KTHREAD))
1981 : return -EINVAL;
1982 :
1983 0 : filename = getname_kernel(kernel_filename);
1984 0 : if (IS_ERR(filename))
1985 0 : return PTR_ERR(filename);
1986 :
1987 0 : bprm = alloc_bprm(fd, filename);
1988 0 : if (IS_ERR(bprm)) {
1989 0 : retval = PTR_ERR(bprm);
1990 0 : goto out_ret;
1991 : }
1992 :
1993 0 : retval = count_strings_kernel(argv);
1994 0 : if (WARN_ON_ONCE(retval == 0))
1995 0 : retval = -EINVAL;
1996 0 : if (retval < 0)
1997 : goto out_free;
1998 0 : bprm->argc = retval;
1999 :
2000 0 : retval = count_strings_kernel(envp);
2001 0 : if (retval < 0)
2002 : goto out_free;
2003 0 : bprm->envc = retval;
2004 :
2005 0 : retval = bprm_stack_limits(bprm);
2006 0 : if (retval < 0)
2007 : goto out_free;
2008 :
2009 0 : retval = copy_string_kernel(bprm->filename, bprm);
2010 0 : if (retval < 0)
2011 : goto out_free;
2012 0 : bprm->exec = bprm->p;
2013 :
2014 0 : retval = copy_strings_kernel(bprm->envc, envp, bprm);
2015 0 : if (retval < 0)
2016 : goto out_free;
2017 :
2018 0 : retval = copy_strings_kernel(bprm->argc, argv, bprm);
2019 0 : if (retval < 0)
2020 : goto out_free;
2021 :
2022 0 : retval = bprm_execve(bprm, fd, filename, 0);
2023 : out_free:
2024 0 : free_bprm(bprm);
2025 : out_ret:
2026 0 : putname(filename);
2027 0 : return retval;
2028 : }
2029 :
2030 : static int do_execve(struct filename *filename,
2031 : const char __user *const __user *__argv,
2032 : const char __user *const __user *__envp)
2033 : {
2034 0 : struct user_arg_ptr argv = { .ptr.native = __argv };
2035 0 : struct user_arg_ptr envp = { .ptr.native = __envp };
2036 0 : return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
2037 : }
2038 :
2039 : static int do_execveat(int fd, struct filename *filename,
2040 : const char __user *const __user *__argv,
2041 : const char __user *const __user *__envp,
2042 : int flags)
2043 : {
2044 0 : struct user_arg_ptr argv = { .ptr.native = __argv };
2045 0 : struct user_arg_ptr envp = { .ptr.native = __envp };
2046 :
2047 0 : return do_execveat_common(fd, filename, argv, envp, flags);
2048 : }
2049 :
2050 : #ifdef CONFIG_COMPAT
2051 : static int compat_do_execve(struct filename *filename,
2052 : const compat_uptr_t __user *__argv,
2053 : const compat_uptr_t __user *__envp)
2054 : {
2055 : struct user_arg_ptr argv = {
2056 : .is_compat = true,
2057 : .ptr.compat = __argv,
2058 : };
2059 : struct user_arg_ptr envp = {
2060 : .is_compat = true,
2061 : .ptr.compat = __envp,
2062 : };
2063 : return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
2064 : }
2065 :
2066 : static int compat_do_execveat(int fd, struct filename *filename,
2067 : const compat_uptr_t __user *__argv,
2068 : const compat_uptr_t __user *__envp,
2069 : int flags)
2070 : {
2071 : struct user_arg_ptr argv = {
2072 : .is_compat = true,
2073 : .ptr.compat = __argv,
2074 : };
2075 : struct user_arg_ptr envp = {
2076 : .is_compat = true,
2077 : .ptr.compat = __envp,
2078 : };
2079 : return do_execveat_common(fd, filename, argv, envp, flags);
2080 : }
2081 : #endif
2082 :
2083 0 : void set_binfmt(struct linux_binfmt *new)
2084 : {
2085 0 : struct mm_struct *mm = current->mm;
2086 :
2087 0 : if (mm->binfmt)
2088 : module_put(mm->binfmt->module);
2089 :
2090 0 : mm->binfmt = new;
2091 : if (new)
2092 : __module_get(new->module);
2093 0 : }
2094 : EXPORT_SYMBOL(set_binfmt);
2095 :
2096 : /*
2097 : * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
2098 : */
2099 0 : void set_dumpable(struct mm_struct *mm, int value)
2100 : {
2101 0 : if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
2102 : return;
2103 :
2104 0 : set_mask_bits(&mm->flags, MMF_DUMPABLE_MASK, value);
2105 : }
2106 :
2107 0 : SYSCALL_DEFINE3(execve,
2108 : const char __user *, filename,
2109 : const char __user *const __user *, argv,
2110 : const char __user *const __user *, envp)
2111 : {
2112 0 : return do_execve(getname(filename), argv, envp);
2113 : }
2114 :
2115 0 : SYSCALL_DEFINE5(execveat,
2116 : int, fd, const char __user *, filename,
2117 : const char __user *const __user *, argv,
2118 : const char __user *const __user *, envp,
2119 : int, flags)
2120 : {
2121 0 : return do_execveat(fd,
2122 : getname_uflags(filename, flags),
2123 : argv, envp, flags);
2124 : }
2125 :
2126 : #ifdef CONFIG_COMPAT
2127 : COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
2128 : const compat_uptr_t __user *, argv,
2129 : const compat_uptr_t __user *, envp)
2130 : {
2131 : return compat_do_execve(getname(filename), argv, envp);
2132 : }
2133 :
2134 : COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
2135 : const char __user *, filename,
2136 : const compat_uptr_t __user *, argv,
2137 : const compat_uptr_t __user *, envp,
2138 : int, flags)
2139 : {
2140 : return compat_do_execveat(fd,
2141 : getname_uflags(filename, flags),
2142 : argv, envp, flags);
2143 : }
2144 : #endif
2145 :
2146 : #ifdef CONFIG_SYSCTL
2147 :
2148 0 : static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2149 : void *buffer, size_t *lenp, loff_t *ppos)
2150 : {
2151 0 : int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2152 :
2153 0 : if (!error)
2154 0 : validate_coredump_safety();
2155 0 : return error;
2156 : }
2157 :
2158 : static struct ctl_table fs_exec_sysctls[] = {
2159 : {
2160 : .procname = "suid_dumpable",
2161 : .data = &suid_dumpable,
2162 : .maxlen = sizeof(int),
2163 : .mode = 0644,
2164 : .proc_handler = proc_dointvec_minmax_coredump,
2165 : .extra1 = SYSCTL_ZERO,
2166 : .extra2 = SYSCTL_TWO,
2167 : },
2168 : { }
2169 : };
2170 :
2171 1 : static int __init init_fs_exec_sysctls(void)
2172 : {
2173 1 : register_sysctl_init("fs", fs_exec_sysctls);
2174 1 : return 0;
2175 : }
2176 :
2177 : fs_initcall(init_fs_exec_sysctls);
2178 : #endif /* CONFIG_SYSCTL */
|