LCOV - code coverage report
Current view: top level - fs/proc - base.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 13 927 1.4 %
Date: 2023-07-19 18:55:55 Functions: 3 84 3.6 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/fs/proc/base.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992 Linus Torvalds
       6             :  *
       7             :  *  proc base directory handling functions
       8             :  *
       9             :  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
      10             :  *  Instead of using magical inumbers to determine the kind of object
      11             :  *  we allocate and fill in-core inodes upon lookup. They don't even
      12             :  *  go into icache. We cache the reference to task_struct upon lookup too.
      13             :  *  Eventually it should become a filesystem in its own. We don't use the
      14             :  *  rest of procfs anymore.
      15             :  *
      16             :  *
      17             :  *  Changelog:
      18             :  *  17-Jan-2005
      19             :  *  Allan Bezerra
      20             :  *  Bruna Moreira <bruna.moreira@indt.org.br>
      21             :  *  Edjard Mota <edjard.mota@indt.org.br>
      22             :  *  Ilias Biris <ilias.biris@indt.org.br>
      23             :  *  Mauricio Lin <mauricio.lin@indt.org.br>
      24             :  *
      25             :  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
      26             :  *
      27             :  *  A new process specific entry (smaps) included in /proc. It shows the
      28             :  *  size of rss for each memory area. The maps entry lacks information
      29             :  *  about physical memory size (rss) for each mapped file, i.e.,
      30             :  *  rss information for executables and library files.
      31             :  *  This additional information is useful for any tools that need to know
      32             :  *  about physical memory consumption for a process specific library.
      33             :  *
      34             :  *  Changelog:
      35             :  *  21-Feb-2005
      36             :  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
      37             :  *  Pud inclusion in the page table walking.
      38             :  *
      39             :  *  ChangeLog:
      40             :  *  10-Mar-2005
      41             :  *  10LE Instituto Nokia de Tecnologia - INdT:
      42             :  *  A better way to walks through the page table as suggested by Hugh Dickins.
      43             :  *
      44             :  *  Simo Piiroinen <simo.piiroinen@nokia.com>:
      45             :  *  Smaps information related to shared, private, clean and dirty pages.
      46             :  *
      47             :  *  Paul Mundt <paul.mundt@nokia.com>:
      48             :  *  Overall revision about smaps.
      49             :  */
      50             : 
      51             : #include <linux/uaccess.h>
      52             : 
      53             : #include <linux/errno.h>
      54             : #include <linux/time.h>
      55             : #include <linux/proc_fs.h>
      56             : #include <linux/stat.h>
      57             : #include <linux/task_io_accounting_ops.h>
      58             : #include <linux/init.h>
      59             : #include <linux/capability.h>
      60             : #include <linux/file.h>
      61             : #include <linux/fdtable.h>
      62             : #include <linux/generic-radix-tree.h>
      63             : #include <linux/string.h>
      64             : #include <linux/seq_file.h>
      65             : #include <linux/namei.h>
      66             : #include <linux/mnt_namespace.h>
      67             : #include <linux/mm.h>
      68             : #include <linux/swap.h>
      69             : #include <linux/rcupdate.h>
      70             : #include <linux/kallsyms.h>
      71             : #include <linux/stacktrace.h>
      72             : #include <linux/resource.h>
      73             : #include <linux/module.h>
      74             : #include <linux/mount.h>
      75             : #include <linux/security.h>
      76             : #include <linux/ptrace.h>
      77             : #include <linux/printk.h>
      78             : #include <linux/cache.h>
      79             : #include <linux/cgroup.h>
      80             : #include <linux/cpuset.h>
      81             : #include <linux/audit.h>
      82             : #include <linux/poll.h>
      83             : #include <linux/nsproxy.h>
      84             : #include <linux/oom.h>
      85             : #include <linux/elf.h>
      86             : #include <linux/pid_namespace.h>
      87             : #include <linux/user_namespace.h>
      88             : #include <linux/fs_struct.h>
      89             : #include <linux/slab.h>
      90             : #include <linux/sched/autogroup.h>
      91             : #include <linux/sched/mm.h>
      92             : #include <linux/sched/coredump.h>
      93             : #include <linux/sched/debug.h>
      94             : #include <linux/sched/stat.h>
      95             : #include <linux/posix-timers.h>
      96             : #include <linux/time_namespace.h>
      97             : #include <linux/resctrl.h>
      98             : #include <linux/cn_proc.h>
      99             : #include <linux/ksm.h>
     100             : #include <trace/events/oom.h>
     101             : #include "internal.h"
     102             : #include "fd.h"
     103             : 
     104             : #include "../../lib/kstrtox.h"
     105             : 
     106             : /* NOTE:
     107             :  *      Implementing inode permission operations in /proc is almost
     108             :  *      certainly an error.  Permission checks need to happen during
     109             :  *      each system call not at open time.  The reason is that most of
     110             :  *      what we wish to check for permissions in /proc varies at runtime.
     111             :  *
     112             :  *      The classic example of a problem is opening file descriptors
     113             :  *      in /proc for a task before it execs a suid executable.
     114             :  */
     115             : 
     116             : static u8 nlink_tid __ro_after_init;
     117             : static u8 nlink_tgid __ro_after_init;
     118             : 
     119             : struct pid_entry {
     120             :         const char *name;
     121             :         unsigned int len;
     122             :         umode_t mode;
     123             :         const struct inode_operations *iop;
     124             :         const struct file_operations *fop;
     125             :         union proc_op op;
     126             : };
     127             : 
     128             : #define NOD(NAME, MODE, IOP, FOP, OP) {                 \
     129             :         .name = (NAME),                                 \
     130             :         .len  = sizeof(NAME) - 1,                       \
     131             :         .mode = MODE,                                   \
     132             :         .iop  = IOP,                                    \
     133             :         .fop  = FOP,                                    \
     134             :         .op   = OP,                                     \
     135             : }
     136             : 
     137             : #define DIR(NAME, MODE, iops, fops)     \
     138             :         NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
     139             : #define LNK(NAME, get_link)                                     \
     140             :         NOD(NAME, (S_IFLNK|S_IRWXUGO),                          \
     141             :                 &proc_pid_link_inode_operations, NULL,              \
     142             :                 { .proc_get_link = get_link } )
     143             : #define REG(NAME, MODE, fops)                           \
     144             :         NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
     145             : #define ONE(NAME, MODE, show)                           \
     146             :         NOD(NAME, (S_IFREG|(MODE)),                     \
     147             :                 NULL, &proc_single_file_operations, \
     148             :                 { .proc_show = show } )
     149             : #define ATTR(LSM, NAME, MODE)                           \
     150             :         NOD(NAME, (S_IFREG|(MODE)),                     \
     151             :                 NULL, &proc_pid_attr_operations,    \
     152             :                 { .lsm = LSM })
     153             : 
     154             : /*
     155             :  * Count the number of hardlinks for the pid_entry table, excluding the .
     156             :  * and .. links.
     157             :  */
     158           2 : static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
     159             :         unsigned int n)
     160             : {
     161             :         unsigned int i;
     162             :         unsigned int count;
     163             : 
     164           2 :         count = 2;
     165          63 :         for (i = 0; i < n; ++i) {
     166          61 :                 if (S_ISDIR(entries[i].mode))
     167           8 :                         ++count;
     168             :         }
     169             : 
     170           2 :         return count;
     171             : }
     172             : 
     173             : static int get_task_root(struct task_struct *task, struct path *root)
     174             : {
     175           0 :         int result = -ENOENT;
     176             : 
     177           0 :         task_lock(task);
     178           0 :         if (task->fs) {
     179           0 :                 get_fs_root(task->fs, root);
     180           0 :                 result = 0;
     181             :         }
     182           0 :         task_unlock(task);
     183             :         return result;
     184             : }
     185             : 
     186           0 : static int proc_cwd_link(struct dentry *dentry, struct path *path)
     187             : {
     188           0 :         struct task_struct *task = get_proc_task(d_inode(dentry));
     189           0 :         int result = -ENOENT;
     190             : 
     191           0 :         if (task) {
     192             :                 task_lock(task);
     193           0 :                 if (task->fs) {
     194           0 :                         get_fs_pwd(task->fs, path);
     195           0 :                         result = 0;
     196             :                 }
     197             :                 task_unlock(task);
     198           0 :                 put_task_struct(task);
     199             :         }
     200           0 :         return result;
     201             : }
     202             : 
     203           0 : static int proc_root_link(struct dentry *dentry, struct path *path)
     204             : {
     205           0 :         struct task_struct *task = get_proc_task(d_inode(dentry));
     206           0 :         int result = -ENOENT;
     207             : 
     208           0 :         if (task) {
     209           0 :                 result = get_task_root(task, path);
     210           0 :                 put_task_struct(task);
     211             :         }
     212           0 :         return result;
     213             : }
     214             : 
     215             : /*
     216             :  * If the user used setproctitle(), we just get the string from
     217             :  * user space at arg_start, and limit it to a maximum of one page.
     218             :  */
     219           0 : static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
     220             :                                 size_t count, unsigned long pos,
     221             :                                 unsigned long arg_start)
     222             : {
     223             :         char *page;
     224             :         int ret, got;
     225             : 
     226           0 :         if (pos >= PAGE_SIZE)
     227             :                 return 0;
     228             : 
     229           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     230           0 :         if (!page)
     231             :                 return -ENOMEM;
     232             : 
     233           0 :         ret = 0;
     234           0 :         got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
     235           0 :         if (got > 0) {
     236           0 :                 int len = strnlen(page, got);
     237             : 
     238             :                 /* Include the NUL character if it was found */
     239           0 :                 if (len < got)
     240           0 :                         len++;
     241             : 
     242           0 :                 if (len > pos) {
     243           0 :                         len -= pos;
     244           0 :                         if (len > count)
     245           0 :                                 len = count;
     246           0 :                         len -= copy_to_user(buf, page+pos, len);
     247           0 :                         if (!len)
     248           0 :                                 len = -EFAULT;
     249             :                         ret = len;
     250             :                 }
     251             :         }
     252           0 :         free_page((unsigned long)page);
     253           0 :         return ret;
     254             : }
     255             : 
     256           0 : static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
     257             :                               size_t count, loff_t *ppos)
     258             : {
     259             :         unsigned long arg_start, arg_end, env_start, env_end;
     260             :         unsigned long pos, len;
     261             :         char *page, c;
     262             : 
     263             :         /* Check if process spawned far enough to have cmdline. */
     264           0 :         if (!mm->env_end)
     265             :                 return 0;
     266             : 
     267           0 :         spin_lock(&mm->arg_lock);
     268           0 :         arg_start = mm->arg_start;
     269           0 :         arg_end = mm->arg_end;
     270           0 :         env_start = mm->env_start;
     271           0 :         env_end = mm->env_end;
     272           0 :         spin_unlock(&mm->arg_lock);
     273             : 
     274           0 :         if (arg_start >= arg_end)
     275             :                 return 0;
     276             : 
     277             :         /*
     278             :          * We allow setproctitle() to overwrite the argument
     279             :          * strings, and overflow past the original end. But
     280             :          * only when it overflows into the environment area.
     281             :          */
     282           0 :         if (env_start != arg_end || env_end < env_start)
     283           0 :                 env_start = env_end = arg_end;
     284           0 :         len = env_end - arg_start;
     285             : 
     286             :         /* We're not going to care if "*ppos" has high bits set */
     287           0 :         pos = *ppos;
     288           0 :         if (pos >= len)
     289             :                 return 0;
     290           0 :         if (count > len - pos)
     291           0 :                 count = len - pos;
     292           0 :         if (!count)
     293             :                 return 0;
     294             : 
     295             :         /*
     296             :          * Magical special case: if the argv[] end byte is not
     297             :          * zero, the user has overwritten it with setproctitle(3).
     298             :          *
     299             :          * Possible future enhancement: do this only once when
     300             :          * pos is 0, and set a flag in the 'struct file'.
     301             :          */
     302           0 :         if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
     303           0 :                 return get_mm_proctitle(mm, buf, count, pos, arg_start);
     304             : 
     305             :         /*
     306             :          * For the non-setproctitle() case we limit things strictly
     307             :          * to the [arg_start, arg_end[ range.
     308             :          */
     309           0 :         pos += arg_start;
     310           0 :         if (pos < arg_start || pos >= arg_end)
     311             :                 return 0;
     312           0 :         if (count > arg_end - pos)
     313           0 :                 count = arg_end - pos;
     314             : 
     315           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     316           0 :         if (!page)
     317             :                 return -ENOMEM;
     318             : 
     319             :         len = 0;
     320           0 :         while (count) {
     321             :                 int got;
     322           0 :                 size_t size = min_t(size_t, PAGE_SIZE, count);
     323             : 
     324           0 :                 got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
     325           0 :                 if (got <= 0)
     326             :                         break;
     327           0 :                 got -= copy_to_user(buf, page, got);
     328           0 :                 if (unlikely(!got)) {
     329           0 :                         if (!len)
     330           0 :                                 len = -EFAULT;
     331             :                         break;
     332             :                 }
     333           0 :                 pos += got;
     334           0 :                 buf += got;
     335           0 :                 len += got;
     336           0 :                 count -= got;
     337             :         }
     338             : 
     339           0 :         free_page((unsigned long)page);
     340           0 :         return len;
     341             : }
     342             : 
     343           0 : static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,
     344             :                                 size_t count, loff_t *pos)
     345             : {
     346             :         struct mm_struct *mm;
     347             :         ssize_t ret;
     348             : 
     349           0 :         mm = get_task_mm(tsk);
     350           0 :         if (!mm)
     351             :                 return 0;
     352             : 
     353           0 :         ret = get_mm_cmdline(mm, buf, count, pos);
     354           0 :         mmput(mm);
     355           0 :         return ret;
     356             : }
     357             : 
     358           0 : static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
     359             :                                      size_t count, loff_t *pos)
     360             : {
     361             :         struct task_struct *tsk;
     362             :         ssize_t ret;
     363             : 
     364           0 :         BUG_ON(*pos < 0);
     365             : 
     366           0 :         tsk = get_proc_task(file_inode(file));
     367           0 :         if (!tsk)
     368             :                 return -ESRCH;
     369           0 :         ret = get_task_cmdline(tsk, buf, count, pos);
     370           0 :         put_task_struct(tsk);
     371           0 :         if (ret > 0)
     372           0 :                 *pos += ret;
     373             :         return ret;
     374             : }
     375             : 
     376             : static const struct file_operations proc_pid_cmdline_ops = {
     377             :         .read   = proc_pid_cmdline_read,
     378             :         .llseek = generic_file_llseek,
     379             : };
     380             : 
     381             : #ifdef CONFIG_KALLSYMS
     382             : /*
     383             :  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
     384             :  * Returns the resolved symbol.  If that fails, simply return the address.
     385             :  */
     386           0 : static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
     387             :                           struct pid *pid, struct task_struct *task)
     388             : {
     389             :         unsigned long wchan;
     390             :         char symname[KSYM_NAME_LEN];
     391             : 
     392           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
     393             :                 goto print0;
     394             : 
     395           0 :         wchan = get_wchan(task);
     396           0 :         if (wchan && !lookup_symbol_name(wchan, symname)) {
     397           0 :                 seq_puts(m, symname);
     398           0 :                 return 0;
     399             :         }
     400             : 
     401             : print0:
     402           0 :         seq_putc(m, '0');
     403           0 :         return 0;
     404             : }
     405             : #endif /* CONFIG_KALLSYMS */
     406             : 
     407           0 : static int lock_trace(struct task_struct *task)
     408             : {
     409           0 :         int err = down_read_killable(&task->signal->exec_update_lock);
     410           0 :         if (err)
     411             :                 return err;
     412           0 :         if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
     413           0 :                 up_read(&task->signal->exec_update_lock);
     414           0 :                 return -EPERM;
     415             :         }
     416             :         return 0;
     417             : }
     418             : 
     419             : static void unlock_trace(struct task_struct *task)
     420             : {
     421           0 :         up_read(&task->signal->exec_update_lock);
     422             : }
     423             : 
     424             : #ifdef CONFIG_STACKTRACE
     425             : 
     426             : #define MAX_STACK_TRACE_DEPTH   64
     427             : 
     428           0 : static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
     429             :                           struct pid *pid, struct task_struct *task)
     430             : {
     431             :         unsigned long *entries;
     432             :         int err;
     433             : 
     434             :         /*
     435             :          * The ability to racily run the kernel stack unwinder on a running task
     436             :          * and then observe the unwinder output is scary; while it is useful for
     437             :          * debugging kernel issues, it can also allow an attacker to leak kernel
     438             :          * stack contents.
     439             :          * Doing this in a manner that is at least safe from races would require
     440             :          * some work to ensure that the remote task can not be scheduled; and
     441             :          * even then, this would still expose the unwinder as local attack
     442             :          * surface.
     443             :          * Therefore, this interface is restricted to root.
     444             :          */
     445           0 :         if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
     446             :                 return -EACCES;
     447             : 
     448           0 :         entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
     449             :                                 GFP_KERNEL);
     450           0 :         if (!entries)
     451             :                 return -ENOMEM;
     452             : 
     453           0 :         err = lock_trace(task);
     454           0 :         if (!err) {
     455             :                 unsigned int i, nr_entries;
     456             : 
     457           0 :                 nr_entries = stack_trace_save_tsk(task, entries,
     458             :                                                   MAX_STACK_TRACE_DEPTH, 0);
     459             : 
     460           0 :                 for (i = 0; i < nr_entries; i++) {
     461           0 :                         seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
     462             :                 }
     463             : 
     464           0 :                 unlock_trace(task);
     465             :         }
     466           0 :         kfree(entries);
     467             : 
     468           0 :         return err;
     469             : }
     470             : #endif
     471             : 
     472             : #ifdef CONFIG_SCHED_INFO
     473             : /*
     474             :  * Provides /proc/PID/schedstat
     475             :  */
     476             : static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
     477             :                               struct pid *pid, struct task_struct *task)
     478             : {
     479             :         if (unlikely(!sched_info_on()))
     480             :                 seq_puts(m, "0 0 0\n");
     481             :         else
     482             :                 seq_printf(m, "%llu %llu %lu\n",
     483             :                    (unsigned long long)task->se.sum_exec_runtime,
     484             :                    (unsigned long long)task->sched_info.run_delay,
     485             :                    task->sched_info.pcount);
     486             : 
     487             :         return 0;
     488             : }
     489             : #endif
     490             : 
     491             : #ifdef CONFIG_LATENCYTOP
     492             : static int lstats_show_proc(struct seq_file *m, void *v)
     493             : {
     494             :         int i;
     495             :         struct inode *inode = m->private;
     496             :         struct task_struct *task = get_proc_task(inode);
     497             : 
     498             :         if (!task)
     499             :                 return -ESRCH;
     500             :         seq_puts(m, "Latency Top version : v0.1\n");
     501             :         for (i = 0; i < LT_SAVECOUNT; i++) {
     502             :                 struct latency_record *lr = &task->latency_record[i];
     503             :                 if (lr->backtrace[0]) {
     504             :                         int q;
     505             :                         seq_printf(m, "%i %li %li",
     506             :                                    lr->count, lr->time, lr->max);
     507             :                         for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
     508             :                                 unsigned long bt = lr->backtrace[q];
     509             : 
     510             :                                 if (!bt)
     511             :                                         break;
     512             :                                 seq_printf(m, " %ps", (void *)bt);
     513             :                         }
     514             :                         seq_putc(m, '\n');
     515             :                 }
     516             : 
     517             :         }
     518             :         put_task_struct(task);
     519             :         return 0;
     520             : }
     521             : 
     522             : static int lstats_open(struct inode *inode, struct file *file)
     523             : {
     524             :         return single_open(file, lstats_show_proc, inode);
     525             : }
     526             : 
     527             : static ssize_t lstats_write(struct file *file, const char __user *buf,
     528             :                             size_t count, loff_t *offs)
     529             : {
     530             :         struct task_struct *task = get_proc_task(file_inode(file));
     531             : 
     532             :         if (!task)
     533             :                 return -ESRCH;
     534             :         clear_tsk_latency_tracing(task);
     535             :         put_task_struct(task);
     536             : 
     537             :         return count;
     538             : }
     539             : 
     540             : static const struct file_operations proc_lstats_operations = {
     541             :         .open           = lstats_open,
     542             :         .read           = seq_read,
     543             :         .write          = lstats_write,
     544             :         .llseek         = seq_lseek,
     545             :         .release        = single_release,
     546             : };
     547             : 
     548             : #endif
     549             : 
     550           0 : static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
     551             :                           struct pid *pid, struct task_struct *task)
     552             : {
     553           0 :         unsigned long totalpages = totalram_pages() + total_swap_pages;
     554           0 :         unsigned long points = 0;
     555             :         long badness;
     556             : 
     557           0 :         badness = oom_badness(task, totalpages);
     558             :         /*
     559             :          * Special case OOM_SCORE_ADJ_MIN for all others scale the
     560             :          * badness value into [0, 2000] range which we have been
     561             :          * exporting for a long time so userspace might depend on it.
     562             :          */
     563           0 :         if (badness != LONG_MIN)
     564           0 :                 points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3;
     565             : 
     566           0 :         seq_printf(m, "%lu\n", points);
     567             : 
     568           0 :         return 0;
     569             : }
     570             : 
     571             : struct limit_names {
     572             :         const char *name;
     573             :         const char *unit;
     574             : };
     575             : 
     576             : static const struct limit_names lnames[RLIM_NLIMITS] = {
     577             :         [RLIMIT_CPU] = {"Max cpu time", "seconds"},
     578             :         [RLIMIT_FSIZE] = {"Max file size", "bytes"},
     579             :         [RLIMIT_DATA] = {"Max data size", "bytes"},
     580             :         [RLIMIT_STACK] = {"Max stack size", "bytes"},
     581             :         [RLIMIT_CORE] = {"Max core file size", "bytes"},
     582             :         [RLIMIT_RSS] = {"Max resident set", "bytes"},
     583             :         [RLIMIT_NPROC] = {"Max processes", "processes"},
     584             :         [RLIMIT_NOFILE] = {"Max open files", "files"},
     585             :         [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
     586             :         [RLIMIT_AS] = {"Max address space", "bytes"},
     587             :         [RLIMIT_LOCKS] = {"Max file locks", "locks"},
     588             :         [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
     589             :         [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
     590             :         [RLIMIT_NICE] = {"Max nice priority", NULL},
     591             :         [RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
     592             :         [RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
     593             : };
     594             : 
     595             : /* Display limits for a process */
     596           0 : static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
     597             :                            struct pid *pid, struct task_struct *task)
     598             : {
     599             :         unsigned int i;
     600             :         unsigned long flags;
     601             : 
     602             :         struct rlimit rlim[RLIM_NLIMITS];
     603             : 
     604           0 :         if (!lock_task_sighand(task, &flags))
     605             :                 return 0;
     606           0 :         memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
     607           0 :         unlock_task_sighand(task, &flags);
     608             : 
     609             :         /*
     610             :          * print the file header
     611             :          */
     612           0 :         seq_puts(m, "Limit                     "
     613             :                 "Soft Limit           "
     614             :                 "Hard Limit           "
     615             :                 "Units     \n");
     616             : 
     617           0 :         for (i = 0; i < RLIM_NLIMITS; i++) {
     618           0 :                 if (rlim[i].rlim_cur == RLIM_INFINITY)
     619           0 :                         seq_printf(m, "%-25s %-20s ",
     620             :                                    lnames[i].name, "unlimited");
     621             :                 else
     622           0 :                         seq_printf(m, "%-25s %-20lu ",
     623             :                                    lnames[i].name, rlim[i].rlim_cur);
     624             : 
     625           0 :                 if (rlim[i].rlim_max == RLIM_INFINITY)
     626           0 :                         seq_printf(m, "%-20s ", "unlimited");
     627             :                 else
     628           0 :                         seq_printf(m, "%-20lu ", rlim[i].rlim_max);
     629             : 
     630           0 :                 if (lnames[i].unit)
     631           0 :                         seq_printf(m, "%-10s\n", lnames[i].unit);
     632             :                 else
     633           0 :                         seq_putc(m, '\n');
     634             :         }
     635             : 
     636             :         return 0;
     637             : }
     638             : 
     639             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
     640             : static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
     641             :                             struct pid *pid, struct task_struct *task)
     642             : {
     643             :         struct syscall_info info;
     644             :         u64 *args = &info.data.args[0];
     645             :         int res;
     646             : 
     647             :         res = lock_trace(task);
     648             :         if (res)
     649             :                 return res;
     650             : 
     651             :         if (task_current_syscall(task, &info))
     652             :                 seq_puts(m, "running\n");
     653             :         else if (info.data.nr < 0)
     654             :                 seq_printf(m, "%d 0x%llx 0x%llx\n",
     655             :                            info.data.nr, info.sp, info.data.instruction_pointer);
     656             :         else
     657             :                 seq_printf(m,
     658             :                        "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
     659             :                        info.data.nr,
     660             :                        args[0], args[1], args[2], args[3], args[4], args[5],
     661             :                        info.sp, info.data.instruction_pointer);
     662             :         unlock_trace(task);
     663             : 
     664             :         return 0;
     665             : }
     666             : #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
     667             : 
     668             : /************************************************************************/
     669             : /*                       Here the fs part begins                        */
     670             : /************************************************************************/
     671             : 
     672             : /* permission checks */
     673           0 : static bool proc_fd_access_allowed(struct inode *inode)
     674             : {
     675             :         struct task_struct *task;
     676           0 :         bool allowed = false;
     677             :         /* Allow access to a task's file descriptors if it is us or we
     678             :          * may use ptrace attach to the process and find out that
     679             :          * information.
     680             :          */
     681           0 :         task = get_proc_task(inode);
     682           0 :         if (task) {
     683           0 :                 allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     684           0 :                 put_task_struct(task);
     685             :         }
     686           0 :         return allowed;
     687             : }
     688             : 
     689           0 : int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
     690             :                  struct iattr *attr)
     691             : {
     692             :         int error;
     693           0 :         struct inode *inode = d_inode(dentry);
     694             : 
     695           0 :         if (attr->ia_valid & ATTR_MODE)
     696             :                 return -EPERM;
     697             : 
     698           0 :         error = setattr_prepare(&nop_mnt_idmap, dentry, attr);
     699           0 :         if (error)
     700             :                 return error;
     701             : 
     702           0 :         setattr_copy(&nop_mnt_idmap, inode, attr);
     703           0 :         return 0;
     704             : }
     705             : 
     706             : /*
     707             :  * May current process learn task's sched/cmdline info (for hide_pid_min=1)
     708             :  * or euid/egid (for hide_pid_min=2)?
     709             :  */
     710           0 : static bool has_pid_permissions(struct proc_fs_info *fs_info,
     711             :                                  struct task_struct *task,
     712             :                                  enum proc_hidepid hide_pid_min)
     713             : {
     714             :         /*
     715             :          * If 'hidpid' mount option is set force a ptrace check,
     716             :          * we indicate that we are using a filesystem syscall
     717             :          * by passing PTRACE_MODE_READ_FSCREDS
     718             :          */
     719           0 :         if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE)
     720           0 :                 return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     721             : 
     722           0 :         if (fs_info->hide_pid < hide_pid_min)
     723             :                 return true;
     724           0 :         if (in_group_p(fs_info->pid_gid))
     725             :                 return true;
     726           0 :         return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
     727             : }
     728             : 
     729             : 
     730           0 : static int proc_pid_permission(struct mnt_idmap *idmap,
     731             :                                struct inode *inode, int mask)
     732             : {
     733           0 :         struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
     734             :         struct task_struct *task;
     735             :         bool has_perms;
     736             : 
     737           0 :         task = get_proc_task(inode);
     738           0 :         if (!task)
     739             :                 return -ESRCH;
     740           0 :         has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS);
     741           0 :         put_task_struct(task);
     742             : 
     743           0 :         if (!has_perms) {
     744           0 :                 if (fs_info->hide_pid == HIDEPID_INVISIBLE) {
     745             :                         /*
     746             :                          * Let's make getdents(), stat(), and open()
     747             :                          * consistent with each other.  If a process
     748             :                          * may not stat() a file, it shouldn't be seen
     749             :                          * in procfs at all.
     750             :                          */
     751             :                         return -ENOENT;
     752             :                 }
     753             : 
     754           0 :                 return -EPERM;
     755             :         }
     756           0 :         return generic_permission(&nop_mnt_idmap, inode, mask);
     757             : }
     758             : 
     759             : 
     760             : 
     761             : static const struct inode_operations proc_def_inode_operations = {
     762             :         .setattr        = proc_setattr,
     763             : };
     764             : 
     765           0 : static int proc_single_show(struct seq_file *m, void *v)
     766             : {
     767           0 :         struct inode *inode = m->private;
     768           0 :         struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
     769           0 :         struct pid *pid = proc_pid(inode);
     770             :         struct task_struct *task;
     771             :         int ret;
     772             : 
     773           0 :         task = get_pid_task(pid, PIDTYPE_PID);
     774           0 :         if (!task)
     775             :                 return -ESRCH;
     776             : 
     777           0 :         ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);
     778             : 
     779           0 :         put_task_struct(task);
     780           0 :         return ret;
     781             : }
     782             : 
     783           0 : static int proc_single_open(struct inode *inode, struct file *filp)
     784             : {
     785           0 :         return single_open(filp, proc_single_show, inode);
     786             : }
     787             : 
     788             : static const struct file_operations proc_single_file_operations = {
     789             :         .open           = proc_single_open,
     790             :         .read           = seq_read,
     791             :         .llseek         = seq_lseek,
     792             :         .release        = single_release,
     793             : };
     794             : 
     795             : 
     796           0 : struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
     797             : {
     798           0 :         struct task_struct *task = get_proc_task(inode);
     799           0 :         struct mm_struct *mm = ERR_PTR(-ESRCH);
     800             : 
     801           0 :         if (task) {
     802           0 :                 mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
     803           0 :                 put_task_struct(task);
     804             : 
     805           0 :                 if (!IS_ERR_OR_NULL(mm)) {
     806             :                         /* ensure this mm_struct can't be freed */
     807           0 :                         mmgrab(mm);
     808             :                         /* but do not pin its memory */
     809           0 :                         mmput(mm);
     810             :                 }
     811             :         }
     812             : 
     813           0 :         return mm;
     814             : }
     815             : 
     816             : static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
     817             : {
     818           0 :         struct mm_struct *mm = proc_mem_open(inode, mode);
     819             : 
     820           0 :         if (IS_ERR(mm))
     821           0 :                 return PTR_ERR(mm);
     822             : 
     823           0 :         file->private_data = mm;
     824             :         return 0;
     825             : }
     826             : 
     827           0 : static int mem_open(struct inode *inode, struct file *file)
     828             : {
     829           0 :         int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
     830             : 
     831             :         /* OK to pass negative loff_t, we can catch out-of-range */
     832           0 :         file->f_mode |= FMODE_UNSIGNED_OFFSET;
     833             : 
     834           0 :         return ret;
     835             : }
     836             : 
     837           0 : static ssize_t mem_rw(struct file *file, char __user *buf,
     838             :                         size_t count, loff_t *ppos, int write)
     839             : {
     840           0 :         struct mm_struct *mm = file->private_data;
     841           0 :         unsigned long addr = *ppos;
     842             :         ssize_t copied;
     843             :         char *page;
     844             :         unsigned int flags;
     845             : 
     846           0 :         if (!mm)
     847             :                 return 0;
     848             : 
     849           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     850           0 :         if (!page)
     851             :                 return -ENOMEM;
     852             : 
     853           0 :         copied = 0;
     854           0 :         if (!mmget_not_zero(mm))
     855             :                 goto free;
     856             : 
     857           0 :         flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
     858             : 
     859           0 :         while (count > 0) {
     860           0 :                 size_t this_len = min_t(size_t, count, PAGE_SIZE);
     861             : 
     862           0 :                 if (write && copy_from_user(page, buf, this_len)) {
     863             :                         copied = -EFAULT;
     864             :                         break;
     865             :                 }
     866             : 
     867           0 :                 this_len = access_remote_vm(mm, addr, page, this_len, flags);
     868           0 :                 if (!this_len) {
     869           0 :                         if (!copied)
     870           0 :                                 copied = -EIO;
     871             :                         break;
     872             :                 }
     873             : 
     874           0 :                 if (!write && copy_to_user(buf, page, this_len)) {
     875             :                         copied = -EFAULT;
     876             :                         break;
     877             :                 }
     878             : 
     879           0 :                 buf += this_len;
     880           0 :                 addr += this_len;
     881           0 :                 copied += this_len;
     882           0 :                 count -= this_len;
     883             :         }
     884           0 :         *ppos = addr;
     885             : 
     886           0 :         mmput(mm);
     887             : free:
     888           0 :         free_page((unsigned long) page);
     889             :         return copied;
     890             : }
     891             : 
     892           0 : static ssize_t mem_read(struct file *file, char __user *buf,
     893             :                         size_t count, loff_t *ppos)
     894             : {
     895           0 :         return mem_rw(file, buf, count, ppos, 0);
     896             : }
     897             : 
     898           0 : static ssize_t mem_write(struct file *file, const char __user *buf,
     899             :                          size_t count, loff_t *ppos)
     900             : {
     901           0 :         return mem_rw(file, (char __user*)buf, count, ppos, 1);
     902             : }
     903             : 
     904           0 : loff_t mem_lseek(struct file *file, loff_t offset, int orig)
     905             : {
     906           0 :         switch (orig) {
     907             :         case 0:
     908           0 :                 file->f_pos = offset;
     909           0 :                 break;
     910             :         case 1:
     911           0 :                 file->f_pos += offset;
     912           0 :                 break;
     913             :         default:
     914             :                 return -EINVAL;
     915             :         }
     916             :         force_successful_syscall_return();
     917           0 :         return file->f_pos;
     918             : }
     919             : 
     920           0 : static int mem_release(struct inode *inode, struct file *file)
     921             : {
     922           0 :         struct mm_struct *mm = file->private_data;
     923           0 :         if (mm)
     924             :                 mmdrop(mm);
     925           0 :         return 0;
     926             : }
     927             : 
     928             : static const struct file_operations proc_mem_operations = {
     929             :         .llseek         = mem_lseek,
     930             :         .read           = mem_read,
     931             :         .write          = mem_write,
     932             :         .open           = mem_open,
     933             :         .release        = mem_release,
     934             : };
     935             : 
     936           0 : static int environ_open(struct inode *inode, struct file *file)
     937             : {
     938           0 :         return __mem_open(inode, file, PTRACE_MODE_READ);
     939             : }
     940             : 
     941           0 : static ssize_t environ_read(struct file *file, char __user *buf,
     942             :                         size_t count, loff_t *ppos)
     943             : {
     944             :         char *page;
     945           0 :         unsigned long src = *ppos;
     946           0 :         int ret = 0;
     947           0 :         struct mm_struct *mm = file->private_data;
     948             :         unsigned long env_start, env_end;
     949             : 
     950             :         /* Ensure the process spawned far enough to have an environment. */
     951           0 :         if (!mm || !mm->env_end)
     952             :                 return 0;
     953             : 
     954           0 :         page = (char *)__get_free_page(GFP_KERNEL);
     955           0 :         if (!page)
     956             :                 return -ENOMEM;
     957             : 
     958           0 :         ret = 0;
     959           0 :         if (!mmget_not_zero(mm))
     960             :                 goto free;
     961             : 
     962           0 :         spin_lock(&mm->arg_lock);
     963           0 :         env_start = mm->env_start;
     964           0 :         env_end = mm->env_end;
     965           0 :         spin_unlock(&mm->arg_lock);
     966             : 
     967           0 :         while (count > 0) {
     968             :                 size_t this_len, max_len;
     969             :                 int retval;
     970             : 
     971           0 :                 if (src >= (env_end - env_start))
     972             :                         break;
     973             : 
     974           0 :                 this_len = env_end - (env_start + src);
     975             : 
     976           0 :                 max_len = min_t(size_t, PAGE_SIZE, count);
     977           0 :                 this_len = min(max_len, this_len);
     978             : 
     979           0 :                 retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
     980             : 
     981           0 :                 if (retval <= 0) {
     982             :                         ret = retval;
     983             :                         break;
     984             :                 }
     985             : 
     986           0 :                 if (copy_to_user(buf, page, retval)) {
     987             :                         ret = -EFAULT;
     988             :                         break;
     989             :                 }
     990             : 
     991           0 :                 ret += retval;
     992           0 :                 src += retval;
     993           0 :                 buf += retval;
     994           0 :                 count -= retval;
     995             :         }
     996           0 :         *ppos = src;
     997           0 :         mmput(mm);
     998             : 
     999             : free:
    1000           0 :         free_page((unsigned long) page);
    1001           0 :         return ret;
    1002             : }
    1003             : 
    1004             : static const struct file_operations proc_environ_operations = {
    1005             :         .open           = environ_open,
    1006             :         .read           = environ_read,
    1007             :         .llseek         = generic_file_llseek,
    1008             :         .release        = mem_release,
    1009             : };
    1010             : 
    1011           0 : static int auxv_open(struct inode *inode, struct file *file)
    1012             : {
    1013           0 :         return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
    1014             : }
    1015             : 
    1016           0 : static ssize_t auxv_read(struct file *file, char __user *buf,
    1017             :                         size_t count, loff_t *ppos)
    1018             : {
    1019           0 :         struct mm_struct *mm = file->private_data;
    1020           0 :         unsigned int nwords = 0;
    1021             : 
    1022           0 :         if (!mm)
    1023             :                 return 0;
    1024             :         do {
    1025           0 :                 nwords += 2;
    1026           0 :         } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
    1027           0 :         return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
    1028             :                                        nwords * sizeof(mm->saved_auxv[0]));
    1029             : }
    1030             : 
    1031             : static const struct file_operations proc_auxv_operations = {
    1032             :         .open           = auxv_open,
    1033             :         .read           = auxv_read,
    1034             :         .llseek         = generic_file_llseek,
    1035             :         .release        = mem_release,
    1036             : };
    1037             : 
    1038           0 : static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
    1039             :                             loff_t *ppos)
    1040             : {
    1041           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    1042             :         char buffer[PROC_NUMBUF];
    1043           0 :         int oom_adj = OOM_ADJUST_MIN;
    1044             :         size_t len;
    1045             : 
    1046           0 :         if (!task)
    1047             :                 return -ESRCH;
    1048           0 :         if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
    1049             :                 oom_adj = OOM_ADJUST_MAX;
    1050             :         else
    1051           0 :                 oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
    1052             :                           OOM_SCORE_ADJ_MAX;
    1053           0 :         put_task_struct(task);
    1054           0 :         if (oom_adj > OOM_ADJUST_MAX)
    1055           0 :                 oom_adj = OOM_ADJUST_MAX;
    1056           0 :         len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
    1057           0 :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1058             : }
    1059             : 
    1060           0 : static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
    1061             : {
    1062           0 :         struct mm_struct *mm = NULL;
    1063             :         struct task_struct *task;
    1064           0 :         int err = 0;
    1065             : 
    1066           0 :         task = get_proc_task(file_inode(file));
    1067           0 :         if (!task)
    1068             :                 return -ESRCH;
    1069             : 
    1070           0 :         mutex_lock(&oom_adj_mutex);
    1071           0 :         if (legacy) {
    1072           0 :                 if (oom_adj < task->signal->oom_score_adj &&
    1073           0 :                                 !capable(CAP_SYS_RESOURCE)) {
    1074             :                         err = -EACCES;
    1075             :                         goto err_unlock;
    1076             :                 }
    1077             :                 /*
    1078             :                  * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
    1079             :                  * /proc/pid/oom_score_adj instead.
    1080             :                  */
    1081           0 :                 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
    1082             :                           current->comm, task_pid_nr(current), task_pid_nr(task),
    1083             :                           task_pid_nr(task));
    1084             :         } else {
    1085           0 :                 if ((short)oom_adj < task->signal->oom_score_adj_min &&
    1086           0 :                                 !capable(CAP_SYS_RESOURCE)) {
    1087             :                         err = -EACCES;
    1088             :                         goto err_unlock;
    1089             :                 }
    1090             :         }
    1091             : 
    1092             :         /*
    1093             :          * Make sure we will check other processes sharing the mm if this is
    1094             :          * not vfrok which wants its own oom_score_adj.
    1095             :          * pin the mm so it doesn't go away and get reused after task_unlock
    1096             :          */
    1097           0 :         if (!task->vfork_done) {
    1098           0 :                 struct task_struct *p = find_lock_task_mm(task);
    1099             : 
    1100           0 :                 if (p) {
    1101           0 :                         if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
    1102           0 :                                 mm = p->mm;
    1103             :                                 mmgrab(mm);
    1104             :                         }
    1105           0 :                         task_unlock(p);
    1106             :                 }
    1107             :         }
    1108             : 
    1109           0 :         task->signal->oom_score_adj = oom_adj;
    1110           0 :         if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
    1111           0 :                 task->signal->oom_score_adj_min = (short)oom_adj;
    1112             :         trace_oom_score_adj_update(task);
    1113             : 
    1114           0 :         if (mm) {
    1115             :                 struct task_struct *p;
    1116             : 
    1117             :                 rcu_read_lock();
    1118           0 :                 for_each_process(p) {
    1119           0 :                         if (same_thread_group(task, p))
    1120           0 :                                 continue;
    1121             : 
    1122             :                         /* do not touch kernel threads or the global init */
    1123           0 :                         if (p->flags & PF_KTHREAD || is_global_init(p))
    1124           0 :                                 continue;
    1125             : 
    1126           0 :                         task_lock(p);
    1127           0 :                         if (!p->vfork_done && process_shares_mm(p, mm)) {
    1128           0 :                                 p->signal->oom_score_adj = oom_adj;
    1129           0 :                                 if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
    1130           0 :                                         p->signal->oom_score_adj_min = (short)oom_adj;
    1131             :                         }
    1132           0 :                         task_unlock(p);
    1133             :                 }
    1134           0 :                 rcu_read_unlock();
    1135             :                 mmdrop(mm);
    1136             :         }
    1137             : err_unlock:
    1138           0 :         mutex_unlock(&oom_adj_mutex);
    1139           0 :         put_task_struct(task);
    1140           0 :         return err;
    1141             : }
    1142             : 
    1143             : /*
    1144             :  * /proc/pid/oom_adj exists solely for backwards compatibility with previous
    1145             :  * kernels.  The effective policy is defined by oom_score_adj, which has a
    1146             :  * different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
    1147             :  * Values written to oom_adj are simply mapped linearly to oom_score_adj.
    1148             :  * Processes that become oom disabled via oom_adj will still be oom disabled
    1149             :  * with this implementation.
    1150             :  *
    1151             :  * oom_adj cannot be removed since existing userspace binaries use it.
    1152             :  */
    1153           0 : static ssize_t oom_adj_write(struct file *file, const char __user *buf,
    1154             :                              size_t count, loff_t *ppos)
    1155             : {
    1156             :         char buffer[PROC_NUMBUF];
    1157             :         int oom_adj;
    1158             :         int err;
    1159             : 
    1160           0 :         memset(buffer, 0, sizeof(buffer));
    1161           0 :         if (count > sizeof(buffer) - 1)
    1162           0 :                 count = sizeof(buffer) - 1;
    1163           0 :         if (copy_from_user(buffer, buf, count)) {
    1164             :                 err = -EFAULT;
    1165             :                 goto out;
    1166             :         }
    1167             : 
    1168           0 :         err = kstrtoint(strstrip(buffer), 0, &oom_adj);
    1169           0 :         if (err)
    1170             :                 goto out;
    1171           0 :         if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
    1172             :              oom_adj != OOM_DISABLE) {
    1173             :                 err = -EINVAL;
    1174             :                 goto out;
    1175             :         }
    1176             : 
    1177             :         /*
    1178             :          * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
    1179             :          * value is always attainable.
    1180             :          */
    1181           0 :         if (oom_adj == OOM_ADJUST_MAX)
    1182           0 :                 oom_adj = OOM_SCORE_ADJ_MAX;
    1183             :         else
    1184           0 :                 oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;
    1185             : 
    1186           0 :         err = __set_oom_adj(file, oom_adj, true);
    1187             : out:
    1188           0 :         return err < 0 ? err : count;
    1189             : }
    1190             : 
    1191             : static const struct file_operations proc_oom_adj_operations = {
    1192             :         .read           = oom_adj_read,
    1193             :         .write          = oom_adj_write,
    1194             :         .llseek         = generic_file_llseek,
    1195             : };
    1196             : 
    1197           0 : static ssize_t oom_score_adj_read(struct file *file, char __user *buf,
    1198             :                                         size_t count, loff_t *ppos)
    1199             : {
    1200           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    1201             :         char buffer[PROC_NUMBUF];
    1202           0 :         short oom_score_adj = OOM_SCORE_ADJ_MIN;
    1203             :         size_t len;
    1204             : 
    1205           0 :         if (!task)
    1206             :                 return -ESRCH;
    1207           0 :         oom_score_adj = task->signal->oom_score_adj;
    1208           0 :         put_task_struct(task);
    1209           0 :         len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj);
    1210           0 :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1211             : }
    1212             : 
    1213           0 : static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
    1214             :                                         size_t count, loff_t *ppos)
    1215             : {
    1216             :         char buffer[PROC_NUMBUF];
    1217             :         int oom_score_adj;
    1218             :         int err;
    1219             : 
    1220           0 :         memset(buffer, 0, sizeof(buffer));
    1221           0 :         if (count > sizeof(buffer) - 1)
    1222           0 :                 count = sizeof(buffer) - 1;
    1223           0 :         if (copy_from_user(buffer, buf, count)) {
    1224             :                 err = -EFAULT;
    1225             :                 goto out;
    1226             :         }
    1227             : 
    1228           0 :         err = kstrtoint(strstrip(buffer), 0, &oom_score_adj);
    1229           0 :         if (err)
    1230             :                 goto out;
    1231           0 :         if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
    1232             :                         oom_score_adj > OOM_SCORE_ADJ_MAX) {
    1233             :                 err = -EINVAL;
    1234             :                 goto out;
    1235             :         }
    1236             : 
    1237           0 :         err = __set_oom_adj(file, oom_score_adj, false);
    1238             : out:
    1239           0 :         return err < 0 ? err : count;
    1240             : }
    1241             : 
    1242             : static const struct file_operations proc_oom_score_adj_operations = {
    1243             :         .read           = oom_score_adj_read,
    1244             :         .write          = oom_score_adj_write,
    1245             :         .llseek         = default_llseek,
    1246             : };
    1247             : 
    1248             : #ifdef CONFIG_AUDIT
    1249             : #define TMPBUFLEN 11
    1250             : static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
    1251             :                                   size_t count, loff_t *ppos)
    1252             : {
    1253             :         struct inode * inode = file_inode(file);
    1254             :         struct task_struct *task = get_proc_task(inode);
    1255             :         ssize_t length;
    1256             :         char tmpbuf[TMPBUFLEN];
    1257             : 
    1258             :         if (!task)
    1259             :                 return -ESRCH;
    1260             :         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
    1261             :                            from_kuid(file->f_cred->user_ns,
    1262             :                                      audit_get_loginuid(task)));
    1263             :         put_task_struct(task);
    1264             :         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
    1265             : }
    1266             : 
    1267             : static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
    1268             :                                    size_t count, loff_t *ppos)
    1269             : {
    1270             :         struct inode * inode = file_inode(file);
    1271             :         uid_t loginuid;
    1272             :         kuid_t kloginuid;
    1273             :         int rv;
    1274             : 
    1275             :         /* Don't let kthreads write their own loginuid */
    1276             :         if (current->flags & PF_KTHREAD)
    1277             :                 return -EPERM;
    1278             : 
    1279             :         rcu_read_lock();
    1280             :         if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
    1281             :                 rcu_read_unlock();
    1282             :                 return -EPERM;
    1283             :         }
    1284             :         rcu_read_unlock();
    1285             : 
    1286             :         if (*ppos != 0) {
    1287             :                 /* No partial writes. */
    1288             :                 return -EINVAL;
    1289             :         }
    1290             : 
    1291             :         rv = kstrtou32_from_user(buf, count, 10, &loginuid);
    1292             :         if (rv < 0)
    1293             :                 return rv;
    1294             : 
    1295             :         /* is userspace tring to explicitly UNSET the loginuid? */
    1296             :         if (loginuid == AUDIT_UID_UNSET) {
    1297             :                 kloginuid = INVALID_UID;
    1298             :         } else {
    1299             :                 kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
    1300             :                 if (!uid_valid(kloginuid))
    1301             :                         return -EINVAL;
    1302             :         }
    1303             : 
    1304             :         rv = audit_set_loginuid(kloginuid);
    1305             :         if (rv < 0)
    1306             :                 return rv;
    1307             :         return count;
    1308             : }
    1309             : 
    1310             : static const struct file_operations proc_loginuid_operations = {
    1311             :         .read           = proc_loginuid_read,
    1312             :         .write          = proc_loginuid_write,
    1313             :         .llseek         = generic_file_llseek,
    1314             : };
    1315             : 
    1316             : static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
    1317             :                                   size_t count, loff_t *ppos)
    1318             : {
    1319             :         struct inode * inode = file_inode(file);
    1320             :         struct task_struct *task = get_proc_task(inode);
    1321             :         ssize_t length;
    1322             :         char tmpbuf[TMPBUFLEN];
    1323             : 
    1324             :         if (!task)
    1325             :                 return -ESRCH;
    1326             :         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
    1327             :                                 audit_get_sessionid(task));
    1328             :         put_task_struct(task);
    1329             :         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
    1330             : }
    1331             : 
    1332             : static const struct file_operations proc_sessionid_operations = {
    1333             :         .read           = proc_sessionid_read,
    1334             :         .llseek         = generic_file_llseek,
    1335             : };
    1336             : #endif
    1337             : 
    1338             : #ifdef CONFIG_FAULT_INJECTION
    1339             : static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
    1340             :                                       size_t count, loff_t *ppos)
    1341             : {
    1342             :         struct task_struct *task = get_proc_task(file_inode(file));
    1343             :         char buffer[PROC_NUMBUF];
    1344             :         size_t len;
    1345             :         int make_it_fail;
    1346             : 
    1347             :         if (!task)
    1348             :                 return -ESRCH;
    1349             :         make_it_fail = task->make_it_fail;
    1350             :         put_task_struct(task);
    1351             : 
    1352             :         len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail);
    1353             : 
    1354             :         return simple_read_from_buffer(buf, count, ppos, buffer, len);
    1355             : }
    1356             : 
    1357             : static ssize_t proc_fault_inject_write(struct file * file,
    1358             :                         const char __user * buf, size_t count, loff_t *ppos)
    1359             : {
    1360             :         struct task_struct *task;
    1361             :         char buffer[PROC_NUMBUF];
    1362             :         int make_it_fail;
    1363             :         int rv;
    1364             : 
    1365             :         if (!capable(CAP_SYS_RESOURCE))
    1366             :                 return -EPERM;
    1367             :         memset(buffer, 0, sizeof(buffer));
    1368             :         if (count > sizeof(buffer) - 1)
    1369             :                 count = sizeof(buffer) - 1;
    1370             :         if (copy_from_user(buffer, buf, count))
    1371             :                 return -EFAULT;
    1372             :         rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
    1373             :         if (rv < 0)
    1374             :                 return rv;
    1375             :         if (make_it_fail < 0 || make_it_fail > 1)
    1376             :                 return -EINVAL;
    1377             : 
    1378             :         task = get_proc_task(file_inode(file));
    1379             :         if (!task)
    1380             :                 return -ESRCH;
    1381             :         task->make_it_fail = make_it_fail;
    1382             :         put_task_struct(task);
    1383             : 
    1384             :         return count;
    1385             : }
    1386             : 
    1387             : static const struct file_operations proc_fault_inject_operations = {
    1388             :         .read           = proc_fault_inject_read,
    1389             :         .write          = proc_fault_inject_write,
    1390             :         .llseek         = generic_file_llseek,
    1391             : };
    1392             : 
    1393             : static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf,
    1394             :                                    size_t count, loff_t *ppos)
    1395             : {
    1396             :         struct task_struct *task;
    1397             :         int err;
    1398             :         unsigned int n;
    1399             : 
    1400             :         err = kstrtouint_from_user(buf, count, 0, &n);
    1401             :         if (err)
    1402             :                 return err;
    1403             : 
    1404             :         task = get_proc_task(file_inode(file));
    1405             :         if (!task)
    1406             :                 return -ESRCH;
    1407             :         task->fail_nth = n;
    1408             :         put_task_struct(task);
    1409             : 
    1410             :         return count;
    1411             : }
    1412             : 
    1413             : static ssize_t proc_fail_nth_read(struct file *file, char __user *buf,
    1414             :                                   size_t count, loff_t *ppos)
    1415             : {
    1416             :         struct task_struct *task;
    1417             :         char numbuf[PROC_NUMBUF];
    1418             :         ssize_t len;
    1419             : 
    1420             :         task = get_proc_task(file_inode(file));
    1421             :         if (!task)
    1422             :                 return -ESRCH;
    1423             :         len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth);
    1424             :         put_task_struct(task);
    1425             :         return simple_read_from_buffer(buf, count, ppos, numbuf, len);
    1426             : }
    1427             : 
    1428             : static const struct file_operations proc_fail_nth_operations = {
    1429             :         .read           = proc_fail_nth_read,
    1430             :         .write          = proc_fail_nth_write,
    1431             : };
    1432             : #endif
    1433             : 
    1434             : 
    1435             : #ifdef CONFIG_SCHED_DEBUG
    1436             : /*
    1437             :  * Print out various scheduling related per-task fields:
    1438             :  */
    1439             : static int sched_show(struct seq_file *m, void *v)
    1440             : {
    1441             :         struct inode *inode = m->private;
    1442             :         struct pid_namespace *ns = proc_pid_ns(inode->i_sb);
    1443             :         struct task_struct *p;
    1444             : 
    1445             :         p = get_proc_task(inode);
    1446             :         if (!p)
    1447             :                 return -ESRCH;
    1448             :         proc_sched_show_task(p, ns, m);
    1449             : 
    1450             :         put_task_struct(p);
    1451             : 
    1452             :         return 0;
    1453             : }
    1454             : 
    1455             : static ssize_t
    1456             : sched_write(struct file *file, const char __user *buf,
    1457             :             size_t count, loff_t *offset)
    1458             : {
    1459             :         struct inode *inode = file_inode(file);
    1460             :         struct task_struct *p;
    1461             : 
    1462             :         p = get_proc_task(inode);
    1463             :         if (!p)
    1464             :                 return -ESRCH;
    1465             :         proc_sched_set_task(p);
    1466             : 
    1467             :         put_task_struct(p);
    1468             : 
    1469             :         return count;
    1470             : }
    1471             : 
    1472             : static int sched_open(struct inode *inode, struct file *filp)
    1473             : {
    1474             :         return single_open(filp, sched_show, inode);
    1475             : }
    1476             : 
    1477             : static const struct file_operations proc_pid_sched_operations = {
    1478             :         .open           = sched_open,
    1479             :         .read           = seq_read,
    1480             :         .write          = sched_write,
    1481             :         .llseek         = seq_lseek,
    1482             :         .release        = single_release,
    1483             : };
    1484             : 
    1485             : #endif
    1486             : 
    1487             : #ifdef CONFIG_SCHED_AUTOGROUP
    1488             : /*
    1489             :  * Print out autogroup related information:
    1490             :  */
    1491             : static int sched_autogroup_show(struct seq_file *m, void *v)
    1492             : {
    1493             :         struct inode *inode = m->private;
    1494             :         struct task_struct *p;
    1495             : 
    1496             :         p = get_proc_task(inode);
    1497             :         if (!p)
    1498             :                 return -ESRCH;
    1499             :         proc_sched_autogroup_show_task(p, m);
    1500             : 
    1501             :         put_task_struct(p);
    1502             : 
    1503             :         return 0;
    1504             : }
    1505             : 
    1506             : static ssize_t
    1507             : sched_autogroup_write(struct file *file, const char __user *buf,
    1508             :             size_t count, loff_t *offset)
    1509             : {
    1510             :         struct inode *inode = file_inode(file);
    1511             :         struct task_struct *p;
    1512             :         char buffer[PROC_NUMBUF];
    1513             :         int nice;
    1514             :         int err;
    1515             : 
    1516             :         memset(buffer, 0, sizeof(buffer));
    1517             :         if (count > sizeof(buffer) - 1)
    1518             :                 count = sizeof(buffer) - 1;
    1519             :         if (copy_from_user(buffer, buf, count))
    1520             :                 return -EFAULT;
    1521             : 
    1522             :         err = kstrtoint(strstrip(buffer), 0, &nice);
    1523             :         if (err < 0)
    1524             :                 return err;
    1525             : 
    1526             :         p = get_proc_task(inode);
    1527             :         if (!p)
    1528             :                 return -ESRCH;
    1529             : 
    1530             :         err = proc_sched_autogroup_set_nice(p, nice);
    1531             :         if (err)
    1532             :                 count = err;
    1533             : 
    1534             :         put_task_struct(p);
    1535             : 
    1536             :         return count;
    1537             : }
    1538             : 
    1539             : static int sched_autogroup_open(struct inode *inode, struct file *filp)
    1540             : {
    1541             :         int ret;
    1542             : 
    1543             :         ret = single_open(filp, sched_autogroup_show, NULL);
    1544             :         if (!ret) {
    1545             :                 struct seq_file *m = filp->private_data;
    1546             : 
    1547             :                 m->private = inode;
    1548             :         }
    1549             :         return ret;
    1550             : }
    1551             : 
    1552             : static const struct file_operations proc_pid_sched_autogroup_operations = {
    1553             :         .open           = sched_autogroup_open,
    1554             :         .read           = seq_read,
    1555             :         .write          = sched_autogroup_write,
    1556             :         .llseek         = seq_lseek,
    1557             :         .release        = single_release,
    1558             : };
    1559             : 
    1560             : #endif /* CONFIG_SCHED_AUTOGROUP */
    1561             : 
    1562             : #ifdef CONFIG_TIME_NS
    1563             : static int timens_offsets_show(struct seq_file *m, void *v)
    1564             : {
    1565             :         struct task_struct *p;
    1566             : 
    1567             :         p = get_proc_task(file_inode(m->file));
    1568             :         if (!p)
    1569             :                 return -ESRCH;
    1570             :         proc_timens_show_offsets(p, m);
    1571             : 
    1572             :         put_task_struct(p);
    1573             : 
    1574             :         return 0;
    1575             : }
    1576             : 
    1577             : static ssize_t timens_offsets_write(struct file *file, const char __user *buf,
    1578             :                                     size_t count, loff_t *ppos)
    1579             : {
    1580             :         struct inode *inode = file_inode(file);
    1581             :         struct proc_timens_offset offsets[2];
    1582             :         char *kbuf = NULL, *pos, *next_line;
    1583             :         struct task_struct *p;
    1584             :         int ret, noffsets;
    1585             : 
    1586             :         /* Only allow < page size writes at the beginning of the file */
    1587             :         if ((*ppos != 0) || (count >= PAGE_SIZE))
    1588             :                 return -EINVAL;
    1589             : 
    1590             :         /* Slurp in the user data */
    1591             :         kbuf = memdup_user_nul(buf, count);
    1592             :         if (IS_ERR(kbuf))
    1593             :                 return PTR_ERR(kbuf);
    1594             : 
    1595             :         /* Parse the user data */
    1596             :         ret = -EINVAL;
    1597             :         noffsets = 0;
    1598             :         for (pos = kbuf; pos; pos = next_line) {
    1599             :                 struct proc_timens_offset *off = &offsets[noffsets];
    1600             :                 char clock[10];
    1601             :                 int err;
    1602             : 
    1603             :                 /* Find the end of line and ensure we don't look past it */
    1604             :                 next_line = strchr(pos, '\n');
    1605             :                 if (next_line) {
    1606             :                         *next_line = '\0';
    1607             :                         next_line++;
    1608             :                         if (*next_line == '\0')
    1609             :                                 next_line = NULL;
    1610             :                 }
    1611             : 
    1612             :                 err = sscanf(pos, "%9s %lld %lu", clock,
    1613             :                                 &off->val.tv_sec, &off->val.tv_nsec);
    1614             :                 if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
    1615             :                         goto out;
    1616             : 
    1617             :                 clock[sizeof(clock) - 1] = 0;
    1618             :                 if (strcmp(clock, "monotonic") == 0 ||
    1619             :                     strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0)
    1620             :                         off->clockid = CLOCK_MONOTONIC;
    1621             :                 else if (strcmp(clock, "boottime") == 0 ||
    1622             :                          strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0)
    1623             :                         off->clockid = CLOCK_BOOTTIME;
    1624             :                 else
    1625             :                         goto out;
    1626             : 
    1627             :                 noffsets++;
    1628             :                 if (noffsets == ARRAY_SIZE(offsets)) {
    1629             :                         if (next_line)
    1630             :                                 count = next_line - kbuf;
    1631             :                         break;
    1632             :                 }
    1633             :         }
    1634             : 
    1635             :         ret = -ESRCH;
    1636             :         p = get_proc_task(inode);
    1637             :         if (!p)
    1638             :                 goto out;
    1639             :         ret = proc_timens_set_offset(file, p, offsets, noffsets);
    1640             :         put_task_struct(p);
    1641             :         if (ret)
    1642             :                 goto out;
    1643             : 
    1644             :         ret = count;
    1645             : out:
    1646             :         kfree(kbuf);
    1647             :         return ret;
    1648             : }
    1649             : 
    1650             : static int timens_offsets_open(struct inode *inode, struct file *filp)
    1651             : {
    1652             :         return single_open(filp, timens_offsets_show, inode);
    1653             : }
    1654             : 
    1655             : static const struct file_operations proc_timens_offsets_operations = {
    1656             :         .open           = timens_offsets_open,
    1657             :         .read           = seq_read,
    1658             :         .write          = timens_offsets_write,
    1659             :         .llseek         = seq_lseek,
    1660             :         .release        = single_release,
    1661             : };
    1662             : #endif /* CONFIG_TIME_NS */
    1663             : 
    1664           0 : static ssize_t comm_write(struct file *file, const char __user *buf,
    1665             :                                 size_t count, loff_t *offset)
    1666             : {
    1667           0 :         struct inode *inode = file_inode(file);
    1668             :         struct task_struct *p;
    1669             :         char buffer[TASK_COMM_LEN];
    1670           0 :         const size_t maxlen = sizeof(buffer) - 1;
    1671             : 
    1672           0 :         memset(buffer, 0, sizeof(buffer));
    1673           0 :         if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count))
    1674             :                 return -EFAULT;
    1675             : 
    1676           0 :         p = get_proc_task(inode);
    1677           0 :         if (!p)
    1678             :                 return -ESRCH;
    1679             : 
    1680           0 :         if (same_thread_group(current, p)) {
    1681             :                 set_task_comm(p, buffer);
    1682             :                 proc_comm_connector(p);
    1683             :         }
    1684             :         else
    1685             :                 count = -EINVAL;
    1686             : 
    1687           0 :         put_task_struct(p);
    1688             : 
    1689           0 :         return count;
    1690             : }
    1691             : 
    1692           0 : static int comm_show(struct seq_file *m, void *v)
    1693             : {
    1694           0 :         struct inode *inode = m->private;
    1695             :         struct task_struct *p;
    1696             : 
    1697           0 :         p = get_proc_task(inode);
    1698           0 :         if (!p)
    1699             :                 return -ESRCH;
    1700             : 
    1701           0 :         proc_task_name(m, p, false);
    1702           0 :         seq_putc(m, '\n');
    1703             : 
    1704           0 :         put_task_struct(p);
    1705             : 
    1706           0 :         return 0;
    1707             : }
    1708             : 
    1709           0 : static int comm_open(struct inode *inode, struct file *filp)
    1710             : {
    1711           0 :         return single_open(filp, comm_show, inode);
    1712             : }
    1713             : 
    1714             : static const struct file_operations proc_pid_set_comm_operations = {
    1715             :         .open           = comm_open,
    1716             :         .read           = seq_read,
    1717             :         .write          = comm_write,
    1718             :         .llseek         = seq_lseek,
    1719             :         .release        = single_release,
    1720             : };
    1721             : 
    1722           0 : static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
    1723             : {
    1724             :         struct task_struct *task;
    1725             :         struct file *exe_file;
    1726             : 
    1727           0 :         task = get_proc_task(d_inode(dentry));
    1728           0 :         if (!task)
    1729             :                 return -ENOENT;
    1730           0 :         exe_file = get_task_exe_file(task);
    1731           0 :         put_task_struct(task);
    1732           0 :         if (exe_file) {
    1733           0 :                 *exe_path = exe_file->f_path;
    1734           0 :                 path_get(&exe_file->f_path);
    1735           0 :                 fput(exe_file);
    1736           0 :                 return 0;
    1737             :         } else
    1738             :                 return -ENOENT;
    1739             : }
    1740             : 
    1741           0 : static const char *proc_pid_get_link(struct dentry *dentry,
    1742             :                                      struct inode *inode,
    1743             :                                      struct delayed_call *done)
    1744             : {
    1745             :         struct path path;
    1746           0 :         int error = -EACCES;
    1747             : 
    1748           0 :         if (!dentry)
    1749             :                 return ERR_PTR(-ECHILD);
    1750             : 
    1751             :         /* Are we allowed to snoop on the tasks file descriptors? */
    1752           0 :         if (!proc_fd_access_allowed(inode))
    1753             :                 goto out;
    1754             : 
    1755           0 :         error = PROC_I(inode)->op.proc_get_link(dentry, &path);
    1756           0 :         if (error)
    1757             :                 goto out;
    1758             : 
    1759           0 :         error = nd_jump_link(&path);
    1760             : out:
    1761           0 :         return ERR_PTR(error);
    1762             : }
    1763             : 
    1764           0 : static int do_proc_readlink(const struct path *path, char __user *buffer, int buflen)
    1765             : {
    1766           0 :         char *tmp = kmalloc(PATH_MAX, GFP_KERNEL);
    1767             :         char *pathname;
    1768             :         int len;
    1769             : 
    1770           0 :         if (!tmp)
    1771             :                 return -ENOMEM;
    1772             : 
    1773           0 :         pathname = d_path(path, tmp, PATH_MAX);
    1774           0 :         len = PTR_ERR(pathname);
    1775           0 :         if (IS_ERR(pathname))
    1776             :                 goto out;
    1777           0 :         len = tmp + PATH_MAX - 1 - pathname;
    1778             : 
    1779           0 :         if (len > buflen)
    1780           0 :                 len = buflen;
    1781           0 :         if (copy_to_user(buffer, pathname, len))
    1782           0 :                 len = -EFAULT;
    1783             :  out:
    1784           0 :         kfree(tmp);
    1785           0 :         return len;
    1786             : }
    1787             : 
    1788           0 : static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
    1789             : {
    1790           0 :         int error = -EACCES;
    1791           0 :         struct inode *inode = d_inode(dentry);
    1792             :         struct path path;
    1793             : 
    1794             :         /* Are we allowed to snoop on the tasks file descriptors? */
    1795           0 :         if (!proc_fd_access_allowed(inode))
    1796             :                 goto out;
    1797             : 
    1798           0 :         error = PROC_I(inode)->op.proc_get_link(dentry, &path);
    1799           0 :         if (error)
    1800             :                 goto out;
    1801             : 
    1802           0 :         error = do_proc_readlink(&path, buffer, buflen);
    1803           0 :         path_put(&path);
    1804             : out:
    1805           0 :         return error;
    1806             : }
    1807             : 
    1808             : const struct inode_operations proc_pid_link_inode_operations = {
    1809             :         .readlink       = proc_pid_readlink,
    1810             :         .get_link       = proc_pid_get_link,
    1811             :         .setattr        = proc_setattr,
    1812             : };
    1813             : 
    1814             : 
    1815             : /* building an inode */
    1816             : 
    1817           0 : void task_dump_owner(struct task_struct *task, umode_t mode,
    1818             :                      kuid_t *ruid, kgid_t *rgid)
    1819             : {
    1820             :         /* Depending on the state of dumpable compute who should own a
    1821             :          * proc file for a task.
    1822             :          */
    1823             :         const struct cred *cred;
    1824             :         kuid_t uid;
    1825             :         kgid_t gid;
    1826             : 
    1827           0 :         if (unlikely(task->flags & PF_KTHREAD)) {
    1828           0 :                 *ruid = GLOBAL_ROOT_UID;
    1829           0 :                 *rgid = GLOBAL_ROOT_GID;
    1830           0 :                 return;
    1831             :         }
    1832             : 
    1833             :         /* Default to the tasks effective ownership */
    1834             :         rcu_read_lock();
    1835           0 :         cred = __task_cred(task);
    1836           0 :         uid = cred->euid;
    1837           0 :         gid = cred->egid;
    1838             :         rcu_read_unlock();
    1839             : 
    1840             :         /*
    1841             :          * Before the /proc/pid/status file was created the only way to read
    1842             :          * the effective uid of a /process was to stat /proc/pid.  Reading
    1843             :          * /proc/pid/status is slow enough that procps and other packages
    1844             :          * kept stating /proc/pid.  To keep the rules in /proc simple I have
    1845             :          * made this apply to all per process world readable and executable
    1846             :          * directories.
    1847             :          */
    1848           0 :         if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) {
    1849             :                 struct mm_struct *mm;
    1850           0 :                 task_lock(task);
    1851           0 :                 mm = task->mm;
    1852             :                 /* Make non-dumpable tasks owned by some root */
    1853           0 :                 if (mm) {
    1854           0 :                         if (get_dumpable(mm) != SUID_DUMP_USER) {
    1855           0 :                                 struct user_namespace *user_ns = mm->user_ns;
    1856             : 
    1857           0 :                                 uid = make_kuid(user_ns, 0);
    1858           0 :                                 if (!uid_valid(uid))
    1859             :                                         uid = GLOBAL_ROOT_UID;
    1860             : 
    1861           0 :                                 gid = make_kgid(user_ns, 0);
    1862           0 :                                 if (!gid_valid(gid))
    1863             :                                         gid = GLOBAL_ROOT_GID;
    1864             :                         }
    1865             :                 } else {
    1866             :                         uid = GLOBAL_ROOT_UID;
    1867             :                         gid = GLOBAL_ROOT_GID;
    1868             :                 }
    1869           0 :                 task_unlock(task);
    1870             :         }
    1871           0 :         *ruid = uid;
    1872           0 :         *rgid = gid;
    1873             : }
    1874             : 
    1875           0 : void proc_pid_evict_inode(struct proc_inode *ei)
    1876             : {
    1877           0 :         struct pid *pid = ei->pid;
    1878             : 
    1879           0 :         if (S_ISDIR(ei->vfs_inode.i_mode)) {
    1880           0 :                 spin_lock(&pid->lock);
    1881           0 :                 hlist_del_init_rcu(&ei->sibling_inodes);
    1882           0 :                 spin_unlock(&pid->lock);
    1883             :         }
    1884             : 
    1885           0 :         put_pid(pid);
    1886           0 : }
    1887             : 
    1888           0 : struct inode *proc_pid_make_inode(struct super_block *sb,
    1889             :                                   struct task_struct *task, umode_t mode)
    1890             : {
    1891             :         struct inode * inode;
    1892             :         struct proc_inode *ei;
    1893             :         struct pid *pid;
    1894             : 
    1895             :         /* We need a new inode */
    1896             : 
    1897           0 :         inode = new_inode(sb);
    1898           0 :         if (!inode)
    1899             :                 goto out;
    1900             : 
    1901             :         /* Common stuff */
    1902           0 :         ei = PROC_I(inode);
    1903           0 :         inode->i_mode = mode;
    1904           0 :         inode->i_ino = get_next_ino();
    1905           0 :         inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
    1906           0 :         inode->i_op = &proc_def_inode_operations;
    1907             : 
    1908             :         /*
    1909             :          * grab the reference to task.
    1910             :          */
    1911           0 :         pid = get_task_pid(task, PIDTYPE_PID);
    1912           0 :         if (!pid)
    1913             :                 goto out_unlock;
    1914             : 
    1915             :         /* Let the pid remember us for quick removal */
    1916           0 :         ei->pid = pid;
    1917             : 
    1918           0 :         task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
    1919             :         security_task_to_inode(task, inode);
    1920             : 
    1921             : out:
    1922             :         return inode;
    1923             : 
    1924             : out_unlock:
    1925           0 :         iput(inode);
    1926           0 :         return NULL;
    1927             : }
    1928             : 
    1929             : /*
    1930             :  * Generating an inode and adding it into @pid->inodes, so that task will
    1931             :  * invalidate inode's dentry before being released.
    1932             :  *
    1933             :  * This helper is used for creating dir-type entries under '/proc' and
    1934             :  * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>'
    1935             :  * can be released by invalidating '/proc/<tgid>' dentry.
    1936             :  * In theory, dentries under '/proc/<tgid>/task' can also be released by
    1937             :  * invalidating '/proc/<tgid>' dentry, we reserve it to handle single
    1938             :  * thread exiting situation: Any one of threads should invalidate its
    1939             :  * '/proc/<tgid>/task/<pid>' dentry before released.
    1940             :  */
    1941           0 : static struct inode *proc_pid_make_base_inode(struct super_block *sb,
    1942             :                                 struct task_struct *task, umode_t mode)
    1943             : {
    1944             :         struct inode *inode;
    1945             :         struct proc_inode *ei;
    1946             :         struct pid *pid;
    1947             : 
    1948           0 :         inode = proc_pid_make_inode(sb, task, mode);
    1949           0 :         if (!inode)
    1950             :                 return NULL;
    1951             : 
    1952             :         /* Let proc_flush_pid find this directory inode */
    1953           0 :         ei = PROC_I(inode);
    1954           0 :         pid = ei->pid;
    1955           0 :         spin_lock(&pid->lock);
    1956           0 :         hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes);
    1957           0 :         spin_unlock(&pid->lock);
    1958             : 
    1959           0 :         return inode;
    1960             : }
    1961             : 
    1962           0 : int pid_getattr(struct mnt_idmap *idmap, const struct path *path,
    1963             :                 struct kstat *stat, u32 request_mask, unsigned int query_flags)
    1964             : {
    1965           0 :         struct inode *inode = d_inode(path->dentry);
    1966           0 :         struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
    1967             :         struct task_struct *task;
    1968             : 
    1969           0 :         generic_fillattr(&nop_mnt_idmap, inode, stat);
    1970             : 
    1971           0 :         stat->uid = GLOBAL_ROOT_UID;
    1972           0 :         stat->gid = GLOBAL_ROOT_GID;
    1973             :         rcu_read_lock();
    1974           0 :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    1975           0 :         if (task) {
    1976           0 :                 if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) {
    1977             :                         rcu_read_unlock();
    1978             :                         /*
    1979             :                          * This doesn't prevent learning whether PID exists,
    1980             :                          * it only makes getattr() consistent with readdir().
    1981             :                          */
    1982           0 :                         return -ENOENT;
    1983             :                 }
    1984           0 :                 task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid);
    1985             :         }
    1986             :         rcu_read_unlock();
    1987           0 :         return 0;
    1988             : }
    1989             : 
    1990             : /* dentry stuff */
    1991             : 
    1992             : /*
    1993             :  * Set <pid>/... inode ownership (can change due to setuid(), etc.)
    1994             :  */
    1995           0 : void pid_update_inode(struct task_struct *task, struct inode *inode)
    1996             : {
    1997           0 :         task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid);
    1998             : 
    1999           0 :         inode->i_mode &= ~(S_ISUID | S_ISGID);
    2000           0 :         security_task_to_inode(task, inode);
    2001           0 : }
    2002             : 
    2003             : /*
    2004             :  * Rewrite the inode's ownerships here because the owning task may have
    2005             :  * performed a setuid(), etc.
    2006             :  *
    2007             :  */
    2008           0 : static int pid_revalidate(struct dentry *dentry, unsigned int flags)
    2009             : {
    2010             :         struct inode *inode;
    2011             :         struct task_struct *task;
    2012           0 :         int ret = 0;
    2013             : 
    2014             :         rcu_read_lock();
    2015           0 :         inode = d_inode_rcu(dentry);
    2016           0 :         if (!inode)
    2017             :                 goto out;
    2018           0 :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    2019             : 
    2020           0 :         if (task) {
    2021           0 :                 pid_update_inode(task, inode);
    2022           0 :                 ret = 1;
    2023             :         }
    2024             : out:
    2025             :         rcu_read_unlock();
    2026           0 :         return ret;
    2027             : }
    2028             : 
    2029             : static inline bool proc_inode_is_dead(struct inode *inode)
    2030             : {
    2031           0 :         return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
    2032             : }
    2033             : 
    2034           0 : int pid_delete_dentry(const struct dentry *dentry)
    2035             : {
    2036             :         /* Is the task we represent dead?
    2037             :          * If so, then don't put the dentry on the lru list,
    2038             :          * kill it immediately.
    2039             :          */
    2040           0 :         return proc_inode_is_dead(d_inode(dentry));
    2041             : }
    2042             : 
    2043             : const struct dentry_operations pid_dentry_operations =
    2044             : {
    2045             :         .d_revalidate   = pid_revalidate,
    2046             :         .d_delete       = pid_delete_dentry,
    2047             : };
    2048             : 
    2049             : /* Lookups */
    2050             : 
    2051             : /*
    2052             :  * Fill a directory entry.
    2053             :  *
    2054             :  * If possible create the dcache entry and derive our inode number and
    2055             :  * file type from dcache entry.
    2056             :  *
    2057             :  * Since all of the proc inode numbers are dynamically generated, the inode
    2058             :  * numbers do not exist until the inode is cache.  This means creating
    2059             :  * the dcache entry in readdir is necessary to keep the inode numbers
    2060             :  * reported by readdir in sync with the inode numbers reported
    2061             :  * by stat.
    2062             :  */
    2063           0 : bool proc_fill_cache(struct file *file, struct dir_context *ctx,
    2064             :         const char *name, unsigned int len,
    2065             :         instantiate_t instantiate, struct task_struct *task, const void *ptr)
    2066             : {
    2067           0 :         struct dentry *child, *dir = file->f_path.dentry;
    2068           0 :         struct qstr qname = QSTR_INIT(name, len);
    2069             :         struct inode *inode;
    2070           0 :         unsigned type = DT_UNKNOWN;
    2071           0 :         ino_t ino = 1;
    2072             : 
    2073           0 :         child = d_hash_and_lookup(dir, &qname);
    2074           0 :         if (!child) {
    2075           0 :                 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    2076           0 :                 child = d_alloc_parallel(dir, &qname, &wq);
    2077           0 :                 if (IS_ERR(child))
    2078             :                         goto end_instantiate;
    2079           0 :                 if (d_in_lookup(child)) {
    2080             :                         struct dentry *res;
    2081           0 :                         res = instantiate(child, task, ptr);
    2082           0 :                         d_lookup_done(child);
    2083           0 :                         if (unlikely(res)) {
    2084           0 :                                 dput(child);
    2085           0 :                                 child = res;
    2086           0 :                                 if (IS_ERR(child))
    2087             :                                         goto end_instantiate;
    2088             :                         }
    2089             :                 }
    2090             :         }
    2091           0 :         inode = d_inode(child);
    2092           0 :         ino = inode->i_ino;
    2093           0 :         type = inode->i_mode >> 12;
    2094           0 :         dput(child);
    2095             : end_instantiate:
    2096           0 :         return dir_emit(ctx, name, len, ino, type);
    2097             : }
    2098             : 
    2099             : /*
    2100             :  * dname_to_vma_addr - maps a dentry name into two unsigned longs
    2101             :  * which represent vma start and end addresses.
    2102             :  */
    2103           0 : static int dname_to_vma_addr(struct dentry *dentry,
    2104             :                              unsigned long *start, unsigned long *end)
    2105             : {
    2106           0 :         const char *str = dentry->d_name.name;
    2107             :         unsigned long long sval, eval;
    2108             :         unsigned int len;
    2109             : 
    2110           0 :         if (str[0] == '0' && str[1] != '-')
    2111             :                 return -EINVAL;
    2112           0 :         len = _parse_integer(str, 16, &sval);
    2113           0 :         if (len & KSTRTOX_OVERFLOW)
    2114             :                 return -EINVAL;
    2115             :         if (sval != (unsigned long)sval)
    2116             :                 return -EINVAL;
    2117           0 :         str += len;
    2118             : 
    2119           0 :         if (*str != '-')
    2120             :                 return -EINVAL;
    2121           0 :         str++;
    2122             : 
    2123           0 :         if (str[0] == '0' && str[1])
    2124             :                 return -EINVAL;
    2125           0 :         len = _parse_integer(str, 16, &eval);
    2126           0 :         if (len & KSTRTOX_OVERFLOW)
    2127             :                 return -EINVAL;
    2128             :         if (eval != (unsigned long)eval)
    2129             :                 return -EINVAL;
    2130           0 :         str += len;
    2131             : 
    2132           0 :         if (*str != '\0')
    2133             :                 return -EINVAL;
    2134             : 
    2135           0 :         *start = sval;
    2136           0 :         *end = eval;
    2137             : 
    2138             :         return 0;
    2139             : }
    2140             : 
    2141           0 : static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
    2142             : {
    2143             :         unsigned long vm_start, vm_end;
    2144           0 :         bool exact_vma_exists = false;
    2145           0 :         struct mm_struct *mm = NULL;
    2146             :         struct task_struct *task;
    2147             :         struct inode *inode;
    2148           0 :         int status = 0;
    2149             : 
    2150           0 :         if (flags & LOOKUP_RCU)
    2151             :                 return -ECHILD;
    2152             : 
    2153           0 :         inode = d_inode(dentry);
    2154           0 :         task = get_proc_task(inode);
    2155           0 :         if (!task)
    2156             :                 goto out_notask;
    2157             : 
    2158           0 :         mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
    2159           0 :         if (IS_ERR_OR_NULL(mm))
    2160             :                 goto out;
    2161             : 
    2162           0 :         if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) {
    2163           0 :                 status = mmap_read_lock_killable(mm);
    2164           0 :                 if (!status) {
    2165           0 :                         exact_vma_exists = !!find_exact_vma(mm, vm_start,
    2166             :                                                             vm_end);
    2167             :                         mmap_read_unlock(mm);
    2168             :                 }
    2169             :         }
    2170             : 
    2171           0 :         mmput(mm);
    2172             : 
    2173           0 :         if (exact_vma_exists) {
    2174           0 :                 task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid);
    2175             : 
    2176             :                 security_task_to_inode(task, inode);
    2177             :                 status = 1;
    2178             :         }
    2179             : 
    2180             : out:
    2181           0 :         put_task_struct(task);
    2182             : 
    2183             : out_notask:
    2184             :         return status;
    2185             : }
    2186             : 
    2187             : static const struct dentry_operations tid_map_files_dentry_operations = {
    2188             :         .d_revalidate   = map_files_d_revalidate,
    2189             :         .d_delete       = pid_delete_dentry,
    2190             : };
    2191             : 
    2192           0 : static int map_files_get_link(struct dentry *dentry, struct path *path)
    2193             : {
    2194             :         unsigned long vm_start, vm_end;
    2195             :         struct vm_area_struct *vma;
    2196             :         struct task_struct *task;
    2197             :         struct mm_struct *mm;
    2198             :         int rc;
    2199             : 
    2200           0 :         rc = -ENOENT;
    2201           0 :         task = get_proc_task(d_inode(dentry));
    2202           0 :         if (!task)
    2203             :                 goto out;
    2204             : 
    2205           0 :         mm = get_task_mm(task);
    2206           0 :         put_task_struct(task);
    2207           0 :         if (!mm)
    2208             :                 goto out;
    2209             : 
    2210           0 :         rc = dname_to_vma_addr(dentry, &vm_start, &vm_end);
    2211           0 :         if (rc)
    2212             :                 goto out_mmput;
    2213             : 
    2214           0 :         rc = mmap_read_lock_killable(mm);
    2215           0 :         if (rc)
    2216             :                 goto out_mmput;
    2217             : 
    2218           0 :         rc = -ENOENT;
    2219           0 :         vma = find_exact_vma(mm, vm_start, vm_end);
    2220           0 :         if (vma && vma->vm_file) {
    2221           0 :                 *path = vma->vm_file->f_path;
    2222           0 :                 path_get(path);
    2223           0 :                 rc = 0;
    2224             :         }
    2225             :         mmap_read_unlock(mm);
    2226             : 
    2227             : out_mmput:
    2228           0 :         mmput(mm);
    2229             : out:
    2230           0 :         return rc;
    2231             : }
    2232             : 
    2233             : struct map_files_info {
    2234             :         unsigned long   start;
    2235             :         unsigned long   end;
    2236             :         fmode_t         mode;
    2237             : };
    2238             : 
    2239             : /*
    2240             :  * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
    2241             :  * to concerns about how the symlinks may be used to bypass permissions on
    2242             :  * ancestor directories in the path to the file in question.
    2243             :  */
    2244             : static const char *
    2245           0 : proc_map_files_get_link(struct dentry *dentry,
    2246             :                         struct inode *inode,
    2247             :                         struct delayed_call *done)
    2248             : {
    2249           0 :         if (!checkpoint_restore_ns_capable(&init_user_ns))
    2250             :                 return ERR_PTR(-EPERM);
    2251             : 
    2252           0 :         return proc_pid_get_link(dentry, inode, done);
    2253             : }
    2254             : 
    2255             : /*
    2256             :  * Identical to proc_pid_link_inode_operations except for get_link()
    2257             :  */
    2258             : static const struct inode_operations proc_map_files_link_inode_operations = {
    2259             :         .readlink       = proc_pid_readlink,
    2260             :         .get_link       = proc_map_files_get_link,
    2261             :         .setattr        = proc_setattr,
    2262             : };
    2263             : 
    2264             : static struct dentry *
    2265           0 : proc_map_files_instantiate(struct dentry *dentry,
    2266             :                            struct task_struct *task, const void *ptr)
    2267             : {
    2268           0 :         fmode_t mode = (fmode_t)(unsigned long)ptr;
    2269             :         struct proc_inode *ei;
    2270             :         struct inode *inode;
    2271             : 
    2272           0 :         inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK |
    2273           0 :                                     ((mode & FMODE_READ ) ? S_IRUSR : 0) |
    2274           0 :                                     ((mode & FMODE_WRITE) ? S_IWUSR : 0));
    2275           0 :         if (!inode)
    2276             :                 return ERR_PTR(-ENOENT);
    2277             : 
    2278           0 :         ei = PROC_I(inode);
    2279           0 :         ei->op.proc_get_link = map_files_get_link;
    2280             : 
    2281           0 :         inode->i_op = &proc_map_files_link_inode_operations;
    2282           0 :         inode->i_size = 64;
    2283             : 
    2284           0 :         d_set_d_op(dentry, &tid_map_files_dentry_operations);
    2285           0 :         return d_splice_alias(inode, dentry);
    2286             : }
    2287             : 
    2288           0 : static struct dentry *proc_map_files_lookup(struct inode *dir,
    2289             :                 struct dentry *dentry, unsigned int flags)
    2290             : {
    2291             :         unsigned long vm_start, vm_end;
    2292             :         struct vm_area_struct *vma;
    2293             :         struct task_struct *task;
    2294             :         struct dentry *result;
    2295             :         struct mm_struct *mm;
    2296             : 
    2297           0 :         result = ERR_PTR(-ENOENT);
    2298           0 :         task = get_proc_task(dir);
    2299           0 :         if (!task)
    2300             :                 goto out;
    2301             : 
    2302           0 :         result = ERR_PTR(-EACCES);
    2303           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
    2304             :                 goto out_put_task;
    2305             : 
    2306           0 :         result = ERR_PTR(-ENOENT);
    2307           0 :         if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
    2308             :                 goto out_put_task;
    2309             : 
    2310           0 :         mm = get_task_mm(task);
    2311           0 :         if (!mm)
    2312             :                 goto out_put_task;
    2313             : 
    2314           0 :         result = ERR_PTR(-EINTR);
    2315           0 :         if (mmap_read_lock_killable(mm))
    2316             :                 goto out_put_mm;
    2317             : 
    2318           0 :         result = ERR_PTR(-ENOENT);
    2319           0 :         vma = find_exact_vma(mm, vm_start, vm_end);
    2320           0 :         if (!vma)
    2321             :                 goto out_no_vma;
    2322             : 
    2323           0 :         if (vma->vm_file)
    2324           0 :                 result = proc_map_files_instantiate(dentry, task,
    2325           0 :                                 (void *)(unsigned long)vma->vm_file->f_mode);
    2326             : 
    2327             : out_no_vma:
    2328             :         mmap_read_unlock(mm);
    2329             : out_put_mm:
    2330           0 :         mmput(mm);
    2331             : out_put_task:
    2332           0 :         put_task_struct(task);
    2333             : out:
    2334           0 :         return result;
    2335             : }
    2336             : 
    2337             : static const struct inode_operations proc_map_files_inode_operations = {
    2338             :         .lookup         = proc_map_files_lookup,
    2339             :         .permission     = proc_fd_permission,
    2340             :         .setattr        = proc_setattr,
    2341             : };
    2342             : 
    2343             : static int
    2344           0 : proc_map_files_readdir(struct file *file, struct dir_context *ctx)
    2345             : {
    2346             :         struct vm_area_struct *vma;
    2347             :         struct task_struct *task;
    2348             :         struct mm_struct *mm;
    2349             :         unsigned long nr_files, pos, i;
    2350             :         GENRADIX(struct map_files_info) fa;
    2351             :         struct map_files_info *p;
    2352             :         int ret;
    2353             :         struct vma_iterator vmi;
    2354             : 
    2355           0 :         genradix_init(&fa);
    2356             : 
    2357           0 :         ret = -ENOENT;
    2358           0 :         task = get_proc_task(file_inode(file));
    2359           0 :         if (!task)
    2360             :                 goto out;
    2361             : 
    2362           0 :         ret = -EACCES;
    2363           0 :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
    2364             :                 goto out_put_task;
    2365             : 
    2366           0 :         ret = 0;
    2367           0 :         if (!dir_emit_dots(file, ctx))
    2368             :                 goto out_put_task;
    2369             : 
    2370           0 :         mm = get_task_mm(task);
    2371           0 :         if (!mm)
    2372             :                 goto out_put_task;
    2373             : 
    2374           0 :         ret = mmap_read_lock_killable(mm);
    2375           0 :         if (ret) {
    2376           0 :                 mmput(mm);
    2377           0 :                 goto out_put_task;
    2378             :         }
    2379             : 
    2380           0 :         nr_files = 0;
    2381             : 
    2382             :         /*
    2383             :          * We need two passes here:
    2384             :          *
    2385             :          *  1) Collect vmas of mapped files with mmap_lock taken
    2386             :          *  2) Release mmap_lock and instantiate entries
    2387             :          *
    2388             :          * otherwise we get lockdep complained, since filldir()
    2389             :          * routine might require mmap_lock taken in might_fault().
    2390             :          */
    2391             : 
    2392           0 :         pos = 2;
    2393             :         vma_iter_init(&vmi, mm, 0);
    2394           0 :         for_each_vma(vmi, vma) {
    2395           0 :                 if (!vma->vm_file)
    2396           0 :                         continue;
    2397           0 :                 if (++pos <= ctx->pos)
    2398           0 :                         continue;
    2399             : 
    2400           0 :                 p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL);
    2401           0 :                 if (!p) {
    2402           0 :                         ret = -ENOMEM;
    2403           0 :                         mmap_read_unlock(mm);
    2404           0 :                         mmput(mm);
    2405           0 :                         goto out_put_task;
    2406             :                 }
    2407             : 
    2408           0 :                 p->start = vma->vm_start;
    2409           0 :                 p->end = vma->vm_end;
    2410           0 :                 p->mode = vma->vm_file->f_mode;
    2411             :         }
    2412           0 :         mmap_read_unlock(mm);
    2413           0 :         mmput(mm);
    2414             : 
    2415           0 :         for (i = 0; i < nr_files; i++) {
    2416             :                 char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
    2417             :                 unsigned int len;
    2418             : 
    2419           0 :                 p = genradix_ptr(&fa, i);
    2420           0 :                 len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end);
    2421           0 :                 if (!proc_fill_cache(file, ctx,
    2422             :                                       buf, len,
    2423             :                                       proc_map_files_instantiate,
    2424             :                                       task,
    2425           0 :                                       (void *)(unsigned long)p->mode))
    2426             :                         break;
    2427           0 :                 ctx->pos++;
    2428             :         }
    2429             : 
    2430             : out_put_task:
    2431           0 :         put_task_struct(task);
    2432             : out:
    2433           0 :         genradix_free(&fa);
    2434           0 :         return ret;
    2435             : }
    2436             : 
    2437             : static const struct file_operations proc_map_files_operations = {
    2438             :         .read           = generic_read_dir,
    2439             :         .iterate_shared = proc_map_files_readdir,
    2440             :         .llseek         = generic_file_llseek,
    2441             : };
    2442             : 
    2443             : #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
    2444             : struct timers_private {
    2445             :         struct pid *pid;
    2446             :         struct task_struct *task;
    2447             :         struct sighand_struct *sighand;
    2448             :         struct pid_namespace *ns;
    2449             :         unsigned long flags;
    2450             : };
    2451             : 
    2452             : static void *timers_start(struct seq_file *m, loff_t *pos)
    2453             : {
    2454             :         struct timers_private *tp = m->private;
    2455             : 
    2456             :         tp->task = get_pid_task(tp->pid, PIDTYPE_PID);
    2457             :         if (!tp->task)
    2458             :                 return ERR_PTR(-ESRCH);
    2459             : 
    2460             :         tp->sighand = lock_task_sighand(tp->task, &tp->flags);
    2461             :         if (!tp->sighand)
    2462             :                 return ERR_PTR(-ESRCH);
    2463             : 
    2464             :         return seq_list_start(&tp->task->signal->posix_timers, *pos);
    2465             : }
    2466             : 
    2467             : static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
    2468             : {
    2469             :         struct timers_private *tp = m->private;
    2470             :         return seq_list_next(v, &tp->task->signal->posix_timers, pos);
    2471             : }
    2472             : 
    2473             : static void timers_stop(struct seq_file *m, void *v)
    2474             : {
    2475             :         struct timers_private *tp = m->private;
    2476             : 
    2477             :         if (tp->sighand) {
    2478             :                 unlock_task_sighand(tp->task, &tp->flags);
    2479             :                 tp->sighand = NULL;
    2480             :         }
    2481             : 
    2482             :         if (tp->task) {
    2483             :                 put_task_struct(tp->task);
    2484             :                 tp->task = NULL;
    2485             :         }
    2486             : }
    2487             : 
    2488             : static int show_timer(struct seq_file *m, void *v)
    2489             : {
    2490             :         struct k_itimer *timer;
    2491             :         struct timers_private *tp = m->private;
    2492             :         int notify;
    2493             :         static const char * const nstr[] = {
    2494             :                 [SIGEV_SIGNAL] = "signal",
    2495             :                 [SIGEV_NONE] = "none",
    2496             :                 [SIGEV_THREAD] = "thread",
    2497             :         };
    2498             : 
    2499             :         timer = list_entry((struct list_head *)v, struct k_itimer, list);
    2500             :         notify = timer->it_sigev_notify;
    2501             : 
    2502             :         seq_printf(m, "ID: %d\n", timer->it_id);
    2503             :         seq_printf(m, "signal: %d/%px\n",
    2504             :                    timer->sigq->info.si_signo,
    2505             :                    timer->sigq->info.si_value.sival_ptr);
    2506             :         seq_printf(m, "notify: %s/%s.%d\n",
    2507             :                    nstr[notify & ~SIGEV_THREAD_ID],
    2508             :                    (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
    2509             :                    pid_nr_ns(timer->it_pid, tp->ns));
    2510             :         seq_printf(m, "ClockID: %d\n", timer->it_clock);
    2511             : 
    2512             :         return 0;
    2513             : }
    2514             : 
    2515             : static const struct seq_operations proc_timers_seq_ops = {
    2516             :         .start  = timers_start,
    2517             :         .next   = timers_next,
    2518             :         .stop   = timers_stop,
    2519             :         .show   = show_timer,
    2520             : };
    2521             : 
    2522             : static int proc_timers_open(struct inode *inode, struct file *file)
    2523             : {
    2524             :         struct timers_private *tp;
    2525             : 
    2526             :         tp = __seq_open_private(file, &proc_timers_seq_ops,
    2527             :                         sizeof(struct timers_private));
    2528             :         if (!tp)
    2529             :                 return -ENOMEM;
    2530             : 
    2531             :         tp->pid = proc_pid(inode);
    2532             :         tp->ns = proc_pid_ns(inode->i_sb);
    2533             :         return 0;
    2534             : }
    2535             : 
    2536             : static const struct file_operations proc_timers_operations = {
    2537             :         .open           = proc_timers_open,
    2538             :         .read           = seq_read,
    2539             :         .llseek         = seq_lseek,
    2540             :         .release        = seq_release_private,
    2541             : };
    2542             : #endif
    2543             : 
    2544           0 : static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
    2545             :                                         size_t count, loff_t *offset)
    2546             : {
    2547           0 :         struct inode *inode = file_inode(file);
    2548             :         struct task_struct *p;
    2549             :         u64 slack_ns;
    2550             :         int err;
    2551             : 
    2552           0 :         err = kstrtoull_from_user(buf, count, 10, &slack_ns);
    2553           0 :         if (err < 0)
    2554           0 :                 return err;
    2555             : 
    2556           0 :         p = get_proc_task(inode);
    2557           0 :         if (!p)
    2558             :                 return -ESRCH;
    2559             : 
    2560           0 :         if (p != current) {
    2561             :                 rcu_read_lock();
    2562           0 :                 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
    2563             :                         rcu_read_unlock();
    2564           0 :                         count = -EPERM;
    2565           0 :                         goto out;
    2566             :                 }
    2567           0 :                 rcu_read_unlock();
    2568             : 
    2569           0 :                 err = security_task_setscheduler(p);
    2570           0 :                 if (err) {
    2571           0 :                         count = err;
    2572           0 :                         goto out;
    2573             :                 }
    2574             :         }
    2575             : 
    2576             :         task_lock(p);
    2577           0 :         if (slack_ns == 0)
    2578           0 :                 p->timer_slack_ns = p->default_timer_slack_ns;
    2579             :         else
    2580           0 :                 p->timer_slack_ns = slack_ns;
    2581             :         task_unlock(p);
    2582             : 
    2583             : out:
    2584           0 :         put_task_struct(p);
    2585             : 
    2586           0 :         return count;
    2587             : }
    2588             : 
    2589           0 : static int timerslack_ns_show(struct seq_file *m, void *v)
    2590             : {
    2591           0 :         struct inode *inode = m->private;
    2592             :         struct task_struct *p;
    2593           0 :         int err = 0;
    2594             : 
    2595           0 :         p = get_proc_task(inode);
    2596           0 :         if (!p)
    2597             :                 return -ESRCH;
    2598             : 
    2599           0 :         if (p != current) {
    2600             :                 rcu_read_lock();
    2601           0 :                 if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
    2602             :                         rcu_read_unlock();
    2603           0 :                         err = -EPERM;
    2604           0 :                         goto out;
    2605             :                 }
    2606             :                 rcu_read_unlock();
    2607             : 
    2608           0 :                 err = security_task_getscheduler(p);
    2609             :                 if (err)
    2610             :                         goto out;
    2611             :         }
    2612             : 
    2613             :         task_lock(p);
    2614           0 :         seq_printf(m, "%llu\n", p->timer_slack_ns);
    2615             :         task_unlock(p);
    2616             : 
    2617             : out:
    2618           0 :         put_task_struct(p);
    2619             : 
    2620           0 :         return err;
    2621             : }
    2622             : 
    2623           0 : static int timerslack_ns_open(struct inode *inode, struct file *filp)
    2624             : {
    2625           0 :         return single_open(filp, timerslack_ns_show, inode);
    2626             : }
    2627             : 
    2628             : static const struct file_operations proc_pid_set_timerslack_ns_operations = {
    2629             :         .open           = timerslack_ns_open,
    2630             :         .read           = seq_read,
    2631             :         .write          = timerslack_ns_write,
    2632             :         .llseek         = seq_lseek,
    2633             :         .release        = single_release,
    2634             : };
    2635             : 
    2636           0 : static struct dentry *proc_pident_instantiate(struct dentry *dentry,
    2637             :         struct task_struct *task, const void *ptr)
    2638             : {
    2639           0 :         const struct pid_entry *p = ptr;
    2640             :         struct inode *inode;
    2641             :         struct proc_inode *ei;
    2642             : 
    2643           0 :         inode = proc_pid_make_inode(dentry->d_sb, task, p->mode);
    2644           0 :         if (!inode)
    2645             :                 return ERR_PTR(-ENOENT);
    2646             : 
    2647           0 :         ei = PROC_I(inode);
    2648           0 :         if (S_ISDIR(inode->i_mode))
    2649           0 :                 set_nlink(inode, 2);    /* Use getattr to fix if necessary */
    2650           0 :         if (p->iop)
    2651           0 :                 inode->i_op = p->iop;
    2652           0 :         if (p->fop)
    2653           0 :                 inode->i_fop = p->fop;
    2654           0 :         ei->op = p->op;
    2655           0 :         pid_update_inode(task, inode);
    2656           0 :         d_set_d_op(dentry, &pid_dentry_operations);
    2657           0 :         return d_splice_alias(inode, dentry);
    2658             : }
    2659             : 
    2660           0 : static struct dentry *proc_pident_lookup(struct inode *dir, 
    2661             :                                          struct dentry *dentry,
    2662             :                                          const struct pid_entry *p,
    2663             :                                          const struct pid_entry *end)
    2664             : {
    2665           0 :         struct task_struct *task = get_proc_task(dir);
    2666           0 :         struct dentry *res = ERR_PTR(-ENOENT);
    2667             : 
    2668           0 :         if (!task)
    2669             :                 goto out_no_task;
    2670             : 
    2671             :         /*
    2672             :          * Yes, it does not scale. And it should not. Don't add
    2673             :          * new entries into /proc/<tgid>/ without very good reasons.
    2674             :          */
    2675           0 :         for (; p < end; p++) {
    2676           0 :                 if (p->len != dentry->d_name.len)
    2677           0 :                         continue;
    2678           0 :                 if (!memcmp(dentry->d_name.name, p->name, p->len)) {
    2679           0 :                         res = proc_pident_instantiate(dentry, task, p);
    2680           0 :                         break;
    2681             :                 }
    2682             :         }
    2683           0 :         put_task_struct(task);
    2684             : out_no_task:
    2685           0 :         return res;
    2686             : }
    2687             : 
    2688           0 : static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
    2689             :                 const struct pid_entry *ents, unsigned int nents)
    2690             : {
    2691           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    2692             :         const struct pid_entry *p;
    2693             : 
    2694           0 :         if (!task)
    2695             :                 return -ENOENT;
    2696             : 
    2697           0 :         if (!dir_emit_dots(file, ctx))
    2698             :                 goto out;
    2699             : 
    2700           0 :         if (ctx->pos >= nents + 2)
    2701             :                 goto out;
    2702             : 
    2703           0 :         for (p = ents + (ctx->pos - 2); p < ents + nents; p++) {
    2704           0 :                 if (!proc_fill_cache(file, ctx, p->name, p->len,
    2705             :                                 proc_pident_instantiate, task, p))
    2706             :                         break;
    2707           0 :                 ctx->pos++;
    2708             :         }
    2709             : out:
    2710           0 :         put_task_struct(task);
    2711           0 :         return 0;
    2712             : }
    2713             : 
    2714             : #ifdef CONFIG_SECURITY
    2715             : static int proc_pid_attr_open(struct inode *inode, struct file *file)
    2716             : {
    2717             :         file->private_data = NULL;
    2718             :         __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
    2719             :         return 0;
    2720             : }
    2721             : 
    2722             : static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
    2723             :                                   size_t count, loff_t *ppos)
    2724             : {
    2725             :         struct inode * inode = file_inode(file);
    2726             :         char *p = NULL;
    2727             :         ssize_t length;
    2728             :         struct task_struct *task = get_proc_task(inode);
    2729             : 
    2730             :         if (!task)
    2731             :                 return -ESRCH;
    2732             : 
    2733             :         length = security_getprocattr(task, PROC_I(inode)->op.lsm,
    2734             :                                       file->f_path.dentry->d_name.name,
    2735             :                                       &p);
    2736             :         put_task_struct(task);
    2737             :         if (length > 0)
    2738             :                 length = simple_read_from_buffer(buf, count, ppos, p, length);
    2739             :         kfree(p);
    2740             :         return length;
    2741             : }
    2742             : 
    2743             : static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
    2744             :                                    size_t count, loff_t *ppos)
    2745             : {
    2746             :         struct inode * inode = file_inode(file);
    2747             :         struct task_struct *task;
    2748             :         void *page;
    2749             :         int rv;
    2750             : 
    2751             :         /* A task may only write when it was the opener. */
    2752             :         if (file->private_data != current->mm)
    2753             :                 return -EPERM;
    2754             : 
    2755             :         rcu_read_lock();
    2756             :         task = pid_task(proc_pid(inode), PIDTYPE_PID);
    2757             :         if (!task) {
    2758             :                 rcu_read_unlock();
    2759             :                 return -ESRCH;
    2760             :         }
    2761             :         /* A task may only write its own attributes. */
    2762             :         if (current != task) {
    2763             :                 rcu_read_unlock();
    2764             :                 return -EACCES;
    2765             :         }
    2766             :         /* Prevent changes to overridden credentials. */
    2767             :         if (current_cred() != current_real_cred()) {
    2768             :                 rcu_read_unlock();
    2769             :                 return -EBUSY;
    2770             :         }
    2771             :         rcu_read_unlock();
    2772             : 
    2773             :         if (count > PAGE_SIZE)
    2774             :                 count = PAGE_SIZE;
    2775             : 
    2776             :         /* No partial writes. */
    2777             :         if (*ppos != 0)
    2778             :                 return -EINVAL;
    2779             : 
    2780             :         page = memdup_user(buf, count);
    2781             :         if (IS_ERR(page)) {
    2782             :                 rv = PTR_ERR(page);
    2783             :                 goto out;
    2784             :         }
    2785             : 
    2786             :         /* Guard against adverse ptrace interaction */
    2787             :         rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex);
    2788             :         if (rv < 0)
    2789             :                 goto out_free;
    2790             : 
    2791             :         rv = security_setprocattr(PROC_I(inode)->op.lsm,
    2792             :                                   file->f_path.dentry->d_name.name, page,
    2793             :                                   count);
    2794             :         mutex_unlock(&current->signal->cred_guard_mutex);
    2795             : out_free:
    2796             :         kfree(page);
    2797             : out:
    2798             :         return rv;
    2799             : }
    2800             : 
    2801             : static const struct file_operations proc_pid_attr_operations = {
    2802             :         .open           = proc_pid_attr_open,
    2803             :         .read           = proc_pid_attr_read,
    2804             :         .write          = proc_pid_attr_write,
    2805             :         .llseek         = generic_file_llseek,
    2806             :         .release        = mem_release,
    2807             : };
    2808             : 
    2809             : #define LSM_DIR_OPS(LSM) \
    2810             : static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
    2811             :                              struct dir_context *ctx) \
    2812             : { \
    2813             :         return proc_pident_readdir(filp, ctx, \
    2814             :                                    LSM##_attr_dir_stuff, \
    2815             :                                    ARRAY_SIZE(LSM##_attr_dir_stuff)); \
    2816             : } \
    2817             : \
    2818             : static const struct file_operations proc_##LSM##_attr_dir_ops = { \
    2819             :         .read           = generic_read_dir, \
    2820             :         .iterate        = proc_##LSM##_attr_dir_iterate, \
    2821             :         .llseek         = default_llseek, \
    2822             : }; \
    2823             : \
    2824             : static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
    2825             :                                 struct dentry *dentry, unsigned int flags) \
    2826             : { \
    2827             :         return proc_pident_lookup(dir, dentry, \
    2828             :                                   LSM##_attr_dir_stuff, \
    2829             :                                   LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
    2830             : } \
    2831             : \
    2832             : static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
    2833             :         .lookup         = proc_##LSM##_attr_dir_lookup, \
    2834             :         .getattr        = pid_getattr, \
    2835             :         .setattr        = proc_setattr, \
    2836             : }
    2837             : 
    2838             : #ifdef CONFIG_SECURITY_SMACK
    2839             : static const struct pid_entry smack_attr_dir_stuff[] = {
    2840             :         ATTR("smack", "current",    0666),
    2841             : };
    2842             : LSM_DIR_OPS(smack);
    2843             : #endif
    2844             : 
    2845             : #ifdef CONFIG_SECURITY_APPARMOR
    2846             : static const struct pid_entry apparmor_attr_dir_stuff[] = {
    2847             :         ATTR("apparmor", "current", 0666),
    2848             :         ATTR("apparmor", "prev",    0444),
    2849             :         ATTR("apparmor", "exec",    0666),
    2850             : };
    2851             : LSM_DIR_OPS(apparmor);
    2852             : #endif
    2853             : 
    2854             : static const struct pid_entry attr_dir_stuff[] = {
    2855             :         ATTR(NULL, "current",         0666),
    2856             :         ATTR(NULL, "prev",            0444),
    2857             :         ATTR(NULL, "exec",            0666),
    2858             :         ATTR(NULL, "fscreate",                0666),
    2859             :         ATTR(NULL, "keycreate",               0666),
    2860             :         ATTR(NULL, "sockcreate",      0666),
    2861             : #ifdef CONFIG_SECURITY_SMACK
    2862             :         DIR("smack",                  0555,
    2863             :             proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
    2864             : #endif
    2865             : #ifdef CONFIG_SECURITY_APPARMOR
    2866             :         DIR("apparmor",                       0555,
    2867             :             proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops),
    2868             : #endif
    2869             : };
    2870             : 
    2871             : static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
    2872             : {
    2873             :         return proc_pident_readdir(file, ctx, 
    2874             :                                    attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
    2875             : }
    2876             : 
    2877             : static const struct file_operations proc_attr_dir_operations = {
    2878             :         .read           = generic_read_dir,
    2879             :         .iterate_shared = proc_attr_dir_readdir,
    2880             :         .llseek         = generic_file_llseek,
    2881             : };
    2882             : 
    2883             : static struct dentry *proc_attr_dir_lookup(struct inode *dir,
    2884             :                                 struct dentry *dentry, unsigned int flags)
    2885             : {
    2886             :         return proc_pident_lookup(dir, dentry,
    2887             :                                   attr_dir_stuff,
    2888             :                                   attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff));
    2889             : }
    2890             : 
    2891             : static const struct inode_operations proc_attr_dir_inode_operations = {
    2892             :         .lookup         = proc_attr_dir_lookup,
    2893             :         .getattr        = pid_getattr,
    2894             :         .setattr        = proc_setattr,
    2895             : };
    2896             : 
    2897             : #endif
    2898             : 
    2899             : #ifdef CONFIG_ELF_CORE
    2900           0 : static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
    2901             :                                          size_t count, loff_t *ppos)
    2902             : {
    2903           0 :         struct task_struct *task = get_proc_task(file_inode(file));
    2904             :         struct mm_struct *mm;
    2905             :         char buffer[PROC_NUMBUF];
    2906             :         size_t len;
    2907             :         int ret;
    2908             : 
    2909           0 :         if (!task)
    2910             :                 return -ESRCH;
    2911             : 
    2912           0 :         ret = 0;
    2913           0 :         mm = get_task_mm(task);
    2914           0 :         if (mm) {
    2915           0 :                 len = snprintf(buffer, sizeof(buffer), "%08lx\n",
    2916           0 :                                ((mm->flags & MMF_DUMP_FILTER_MASK) >>
    2917             :                                 MMF_DUMP_FILTER_SHIFT));
    2918           0 :                 mmput(mm);
    2919           0 :                 ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
    2920             :         }
    2921             : 
    2922           0 :         put_task_struct(task);
    2923             : 
    2924           0 :         return ret;
    2925             : }
    2926             : 
    2927           0 : static ssize_t proc_coredump_filter_write(struct file *file,
    2928             :                                           const char __user *buf,
    2929             :                                           size_t count,
    2930             :                                           loff_t *ppos)
    2931             : {
    2932             :         struct task_struct *task;
    2933             :         struct mm_struct *mm;
    2934             :         unsigned int val;
    2935             :         int ret;
    2936             :         int i;
    2937             :         unsigned long mask;
    2938             : 
    2939           0 :         ret = kstrtouint_from_user(buf, count, 0, &val);
    2940           0 :         if (ret < 0)
    2941           0 :                 return ret;
    2942             : 
    2943           0 :         ret = -ESRCH;
    2944           0 :         task = get_proc_task(file_inode(file));
    2945           0 :         if (!task)
    2946             :                 goto out_no_task;
    2947             : 
    2948           0 :         mm = get_task_mm(task);
    2949           0 :         if (!mm)
    2950             :                 goto out_no_mm;
    2951             :         ret = 0;
    2952             : 
    2953           0 :         for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
    2954           0 :                 if (val & mask)
    2955           0 :                         set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
    2956             :                 else
    2957           0 :                         clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
    2958             :         }
    2959             : 
    2960           0 :         mmput(mm);
    2961             :  out_no_mm:
    2962           0 :         put_task_struct(task);
    2963             :  out_no_task:
    2964           0 :         if (ret < 0)
    2965           0 :                 return ret;
    2966           0 :         return count;
    2967             : }
    2968             : 
    2969             : static const struct file_operations proc_coredump_filter_operations = {
    2970             :         .read           = proc_coredump_filter_read,
    2971             :         .write          = proc_coredump_filter_write,
    2972             :         .llseek         = generic_file_llseek,
    2973             : };
    2974             : #endif
    2975             : 
    2976             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    2977             : static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole)
    2978             : {
    2979             :         struct task_io_accounting acct = task->ioac;
    2980             :         unsigned long flags;
    2981             :         int result;
    2982             : 
    2983             :         result = down_read_killable(&task->signal->exec_update_lock);
    2984             :         if (result)
    2985             :                 return result;
    2986             : 
    2987             :         if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) {
    2988             :                 result = -EACCES;
    2989             :                 goto out_unlock;
    2990             :         }
    2991             : 
    2992             :         if (whole && lock_task_sighand(task, &flags)) {
    2993             :                 struct task_struct *t = task;
    2994             : 
    2995             :                 task_io_accounting_add(&acct, &task->signal->ioac);
    2996             :                 while_each_thread(task, t)
    2997             :                         task_io_accounting_add(&acct, &t->ioac);
    2998             : 
    2999             :                 unlock_task_sighand(task, &flags);
    3000             :         }
    3001             :         seq_printf(m,
    3002             :                    "rchar: %llu\n"
    3003             :                    "wchar: %llu\n"
    3004             :                    "syscr: %llu\n"
    3005             :                    "syscw: %llu\n"
    3006             :                    "read_bytes: %llu\n"
    3007             :                    "write_bytes: %llu\n"
    3008             :                    "cancelled_write_bytes: %llu\n",
    3009             :                    (unsigned long long)acct.rchar,
    3010             :                    (unsigned long long)acct.wchar,
    3011             :                    (unsigned long long)acct.syscr,
    3012             :                    (unsigned long long)acct.syscw,
    3013             :                    (unsigned long long)acct.read_bytes,
    3014             :                    (unsigned long long)acct.write_bytes,
    3015             :                    (unsigned long long)acct.cancelled_write_bytes);
    3016             :         result = 0;
    3017             : 
    3018             : out_unlock:
    3019             :         up_read(&task->signal->exec_update_lock);
    3020             :         return result;
    3021             : }
    3022             : 
    3023             : static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
    3024             :                                   struct pid *pid, struct task_struct *task)
    3025             : {
    3026             :         return do_io_accounting(task, m, 0);
    3027             : }
    3028             : 
    3029             : static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns,
    3030             :                                    struct pid *pid, struct task_struct *task)
    3031             : {
    3032             :         return do_io_accounting(task, m, 1);
    3033             : }
    3034             : #endif /* CONFIG_TASK_IO_ACCOUNTING */
    3035             : 
    3036             : #ifdef CONFIG_USER_NS
    3037             : static int proc_id_map_open(struct inode *inode, struct file *file,
    3038             :         const struct seq_operations *seq_ops)
    3039             : {
    3040             :         struct user_namespace *ns = NULL;
    3041             :         struct task_struct *task;
    3042             :         struct seq_file *seq;
    3043             :         int ret = -EINVAL;
    3044             : 
    3045             :         task = get_proc_task(inode);
    3046             :         if (task) {
    3047             :                 rcu_read_lock();
    3048             :                 ns = get_user_ns(task_cred_xxx(task, user_ns));
    3049             :                 rcu_read_unlock();
    3050             :                 put_task_struct(task);
    3051             :         }
    3052             :         if (!ns)
    3053             :                 goto err;
    3054             : 
    3055             :         ret = seq_open(file, seq_ops);
    3056             :         if (ret)
    3057             :                 goto err_put_ns;
    3058             : 
    3059             :         seq = file->private_data;
    3060             :         seq->private = ns;
    3061             : 
    3062             :         return 0;
    3063             : err_put_ns:
    3064             :         put_user_ns(ns);
    3065             : err:
    3066             :         return ret;
    3067             : }
    3068             : 
    3069             : static int proc_id_map_release(struct inode *inode, struct file *file)
    3070             : {
    3071             :         struct seq_file *seq = file->private_data;
    3072             :         struct user_namespace *ns = seq->private;
    3073             :         put_user_ns(ns);
    3074             :         return seq_release(inode, file);
    3075             : }
    3076             : 
    3077             : static int proc_uid_map_open(struct inode *inode, struct file *file)
    3078             : {
    3079             :         return proc_id_map_open(inode, file, &proc_uid_seq_operations);
    3080             : }
    3081             : 
    3082             : static int proc_gid_map_open(struct inode *inode, struct file *file)
    3083             : {
    3084             :         return proc_id_map_open(inode, file, &proc_gid_seq_operations);
    3085             : }
    3086             : 
    3087             : static int proc_projid_map_open(struct inode *inode, struct file *file)
    3088             : {
    3089             :         return proc_id_map_open(inode, file, &proc_projid_seq_operations);
    3090             : }
    3091             : 
    3092             : static const struct file_operations proc_uid_map_operations = {
    3093             :         .open           = proc_uid_map_open,
    3094             :         .write          = proc_uid_map_write,
    3095             :         .read           = seq_read,
    3096             :         .llseek         = seq_lseek,
    3097             :         .release        = proc_id_map_release,
    3098             : };
    3099             : 
    3100             : static const struct file_operations proc_gid_map_operations = {
    3101             :         .open           = proc_gid_map_open,
    3102             :         .write          = proc_gid_map_write,
    3103             :         .read           = seq_read,
    3104             :         .llseek         = seq_lseek,
    3105             :         .release        = proc_id_map_release,
    3106             : };
    3107             : 
    3108             : static const struct file_operations proc_projid_map_operations = {
    3109             :         .open           = proc_projid_map_open,
    3110             :         .write          = proc_projid_map_write,
    3111             :         .read           = seq_read,
    3112             :         .llseek         = seq_lseek,
    3113             :         .release        = proc_id_map_release,
    3114             : };
    3115             : 
    3116             : static int proc_setgroups_open(struct inode *inode, struct file *file)
    3117             : {
    3118             :         struct user_namespace *ns = NULL;
    3119             :         struct task_struct *task;
    3120             :         int ret;
    3121             : 
    3122             :         ret = -ESRCH;
    3123             :         task = get_proc_task(inode);
    3124             :         if (task) {
    3125             :                 rcu_read_lock();
    3126             :                 ns = get_user_ns(task_cred_xxx(task, user_ns));
    3127             :                 rcu_read_unlock();
    3128             :                 put_task_struct(task);
    3129             :         }
    3130             :         if (!ns)
    3131             :                 goto err;
    3132             : 
    3133             :         if (file->f_mode & FMODE_WRITE) {
    3134             :                 ret = -EACCES;
    3135             :                 if (!ns_capable(ns, CAP_SYS_ADMIN))
    3136             :                         goto err_put_ns;
    3137             :         }
    3138             : 
    3139             :         ret = single_open(file, &proc_setgroups_show, ns);
    3140             :         if (ret)
    3141             :                 goto err_put_ns;
    3142             : 
    3143             :         return 0;
    3144             : err_put_ns:
    3145             :         put_user_ns(ns);
    3146             : err:
    3147             :         return ret;
    3148             : }
    3149             : 
    3150             : static int proc_setgroups_release(struct inode *inode, struct file *file)
    3151             : {
    3152             :         struct seq_file *seq = file->private_data;
    3153             :         struct user_namespace *ns = seq->private;
    3154             :         int ret = single_release(inode, file);
    3155             :         put_user_ns(ns);
    3156             :         return ret;
    3157             : }
    3158             : 
    3159             : static const struct file_operations proc_setgroups_operations = {
    3160             :         .open           = proc_setgroups_open,
    3161             :         .write          = proc_setgroups_write,
    3162             :         .read           = seq_read,
    3163             :         .llseek         = seq_lseek,
    3164             :         .release        = proc_setgroups_release,
    3165             : };
    3166             : #endif /* CONFIG_USER_NS */
    3167             : 
    3168           0 : static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
    3169             :                                 struct pid *pid, struct task_struct *task)
    3170             : {
    3171           0 :         int err = lock_trace(task);
    3172           0 :         if (!err) {
    3173           0 :                 seq_printf(m, "%08x\n", task->personality);
    3174           0 :                 unlock_trace(task);
    3175             :         }
    3176           0 :         return err;
    3177             : }
    3178             : 
    3179             : #ifdef CONFIG_LIVEPATCH
    3180             : static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
    3181             :                                 struct pid *pid, struct task_struct *task)
    3182             : {
    3183             :         seq_printf(m, "%d\n", task->patch_state);
    3184             :         return 0;
    3185             : }
    3186             : #endif /* CONFIG_LIVEPATCH */
    3187             : 
    3188             : #ifdef CONFIG_KSM
    3189             : static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns,
    3190             :                                 struct pid *pid, struct task_struct *task)
    3191             : {
    3192             :         struct mm_struct *mm;
    3193             : 
    3194             :         mm = get_task_mm(task);
    3195             :         if (mm) {
    3196             :                 seq_printf(m, "%lu\n", mm->ksm_merging_pages);
    3197             :                 mmput(mm);
    3198             :         }
    3199             : 
    3200             :         return 0;
    3201             : }
    3202             : static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns,
    3203             :                                 struct pid *pid, struct task_struct *task)
    3204             : {
    3205             :         struct mm_struct *mm;
    3206             : 
    3207             :         mm = get_task_mm(task);
    3208             :         if (mm) {
    3209             :                 seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items);
    3210             :                 seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages);
    3211             :                 seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm));
    3212             :                 mmput(mm);
    3213             :         }
    3214             : 
    3215             :         return 0;
    3216             : }
    3217             : #endif /* CONFIG_KSM */
    3218             : 
    3219             : #ifdef CONFIG_STACKLEAK_METRICS
    3220             : static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
    3221             :                                 struct pid *pid, struct task_struct *task)
    3222             : {
    3223             :         unsigned long prev_depth = THREAD_SIZE -
    3224             :                                 (task->prev_lowest_stack & (THREAD_SIZE - 1));
    3225             :         unsigned long depth = THREAD_SIZE -
    3226             :                                 (task->lowest_stack & (THREAD_SIZE - 1));
    3227             : 
    3228             :         seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
    3229             :                                                         prev_depth, depth);
    3230             :         return 0;
    3231             : }
    3232             : #endif /* CONFIG_STACKLEAK_METRICS */
    3233             : 
    3234             : /*
    3235             :  * Thread groups
    3236             :  */
    3237             : static const struct file_operations proc_task_operations;
    3238             : static const struct inode_operations proc_task_inode_operations;
    3239             : 
    3240             : static const struct pid_entry tgid_base_stuff[] = {
    3241             :         DIR("task",       S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
    3242             :         DIR("fd",         S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
    3243             :         DIR("map_files",  S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
    3244             :         DIR("fdinfo",     S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
    3245             :         DIR("ns",       S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
    3246             : #ifdef CONFIG_NET
    3247             :         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
    3248             : #endif
    3249             :         REG("environ",    S_IRUSR, proc_environ_operations),
    3250             :         REG("auxv",       S_IRUSR, proc_auxv_operations),
    3251             :         ONE("status",     S_IRUGO, proc_pid_status),
    3252             :         ONE("personality", S_IRUSR, proc_pid_personality),
    3253             :         ONE("limits",   S_IRUGO, proc_pid_limits),
    3254             : #ifdef CONFIG_SCHED_DEBUG
    3255             :         REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
    3256             : #endif
    3257             : #ifdef CONFIG_SCHED_AUTOGROUP
    3258             :         REG("autogroup",  S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
    3259             : #endif
    3260             : #ifdef CONFIG_TIME_NS
    3261             :         REG("timens_offsets",  S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
    3262             : #endif
    3263             :         REG("comm",      S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
    3264             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
    3265             :         ONE("syscall",    S_IRUSR, proc_pid_syscall),
    3266             : #endif
    3267             :         REG("cmdline",    S_IRUGO, proc_pid_cmdline_ops),
    3268             :         ONE("stat",       S_IRUGO, proc_tgid_stat),
    3269             :         ONE("statm",      S_IRUGO, proc_pid_statm),
    3270             :         REG("maps",       S_IRUGO, proc_pid_maps_operations),
    3271             : #ifdef CONFIG_NUMA
    3272             :         REG("numa_maps",  S_IRUGO, proc_pid_numa_maps_operations),
    3273             : #endif
    3274             :         REG("mem",        S_IRUSR|S_IWUSR, proc_mem_operations),
    3275             :         LNK("cwd",        proc_cwd_link),
    3276             :         LNK("root",       proc_root_link),
    3277             :         LNK("exe",        proc_exe_link),
    3278             :         REG("mounts",     S_IRUGO, proc_mounts_operations),
    3279             :         REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
    3280             :         REG("mountstats", S_IRUSR, proc_mountstats_operations),
    3281             : #ifdef CONFIG_PROC_PAGE_MONITOR
    3282             :         REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
    3283             :         REG("smaps",      S_IRUGO, proc_pid_smaps_operations),
    3284             :         REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
    3285             :         REG("pagemap",    S_IRUSR, proc_pagemap_operations),
    3286             : #endif
    3287             : #ifdef CONFIG_SECURITY
    3288             :         DIR("attr",       S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
    3289             : #endif
    3290             : #ifdef CONFIG_KALLSYMS
    3291             :         ONE("wchan",      S_IRUGO, proc_pid_wchan),
    3292             : #endif
    3293             : #ifdef CONFIG_STACKTRACE
    3294             :         ONE("stack",      S_IRUSR, proc_pid_stack),
    3295             : #endif
    3296             : #ifdef CONFIG_SCHED_INFO
    3297             :         ONE("schedstat",  S_IRUGO, proc_pid_schedstat),
    3298             : #endif
    3299             : #ifdef CONFIG_LATENCYTOP
    3300             :         REG("latency",  S_IRUGO, proc_lstats_operations),
    3301             : #endif
    3302             : #ifdef CONFIG_PROC_PID_CPUSET
    3303             :         ONE("cpuset",     S_IRUGO, proc_cpuset_show),
    3304             : #endif
    3305             : #ifdef CONFIG_CGROUPS
    3306             :         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
    3307             : #endif
    3308             : #ifdef CONFIG_PROC_CPU_RESCTRL
    3309             :         ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
    3310             : #endif
    3311             :         ONE("oom_score",  S_IRUGO, proc_oom_score),
    3312             :         REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
    3313             :         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
    3314             : #ifdef CONFIG_AUDIT
    3315             :         REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
    3316             :         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
    3317             : #endif
    3318             : #ifdef CONFIG_FAULT_INJECTION
    3319             :         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
    3320             :         REG("fail-nth", 0644, proc_fail_nth_operations),
    3321             : #endif
    3322             : #ifdef CONFIG_ELF_CORE
    3323             :         REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations),
    3324             : #endif
    3325             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    3326             :         ONE("io",     S_IRUSR, proc_tgid_io_accounting),
    3327             : #endif
    3328             : #ifdef CONFIG_USER_NS
    3329             :         REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
    3330             :         REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
    3331             :         REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
    3332             :         REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
    3333             : #endif
    3334             : #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
    3335             :         REG("timers",   S_IRUGO, proc_timers_operations),
    3336             : #endif
    3337             :         REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
    3338             : #ifdef CONFIG_LIVEPATCH
    3339             :         ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
    3340             : #endif
    3341             : #ifdef CONFIG_STACKLEAK_METRICS
    3342             :         ONE("stack_depth", S_IRUGO, proc_stack_depth),
    3343             : #endif
    3344             : #ifdef CONFIG_PROC_PID_ARCH_STATUS
    3345             :         ONE("arch_status", S_IRUGO, proc_pid_arch_status),
    3346             : #endif
    3347             : #ifdef CONFIG_SECCOMP_CACHE_DEBUG
    3348             :         ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
    3349             : #endif
    3350             : #ifdef CONFIG_KSM
    3351             :         ONE("ksm_merging_pages",  S_IRUSR, proc_pid_ksm_merging_pages),
    3352             :         ONE("ksm_stat",  S_IRUSR, proc_pid_ksm_stat),
    3353             : #endif
    3354             : };
    3355             : 
    3356           0 : static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
    3357             : {
    3358           0 :         return proc_pident_readdir(file, ctx,
    3359             :                                    tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
    3360             : }
    3361             : 
    3362             : static const struct file_operations proc_tgid_base_operations = {
    3363             :         .read           = generic_read_dir,
    3364             :         .iterate_shared = proc_tgid_base_readdir,
    3365             :         .llseek         = generic_file_llseek,
    3366             : };
    3367             : 
    3368           0 : struct pid *tgid_pidfd_to_pid(const struct file *file)
    3369             : {
    3370           0 :         if (file->f_op != &proc_tgid_base_operations)
    3371             :                 return ERR_PTR(-EBADF);
    3372             : 
    3373           0 :         return proc_pid(file_inode(file));
    3374             : }
    3375             : 
    3376           0 : static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
    3377             : {
    3378           0 :         return proc_pident_lookup(dir, dentry,
    3379             :                                   tgid_base_stuff,
    3380             :                                   tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff));
    3381             : }
    3382             : 
    3383             : static const struct inode_operations proc_tgid_base_inode_operations = {
    3384             :         .lookup         = proc_tgid_base_lookup,
    3385             :         .getattr        = pid_getattr,
    3386             :         .setattr        = proc_setattr,
    3387             :         .permission     = proc_pid_permission,
    3388             : };
    3389             : 
    3390             : /**
    3391             :  * proc_flush_pid -  Remove dcache entries for @pid from the /proc dcache.
    3392             :  * @pid: pid that should be flushed.
    3393             :  *
    3394             :  * This function walks a list of inodes (that belong to any proc
    3395             :  * filesystem) that are attached to the pid and flushes them from
    3396             :  * the dentry cache.
    3397             :  *
    3398             :  * It is safe and reasonable to cache /proc entries for a task until
    3399             :  * that task exits.  After that they just clog up the dcache with
    3400             :  * useless entries, possibly causing useful dcache entries to be
    3401             :  * flushed instead.  This routine is provided to flush those useless
    3402             :  * dcache entries when a process is reaped.
    3403             :  *
    3404             :  * NOTE: This routine is just an optimization so it does not guarantee
    3405             :  *       that no dcache entries will exist after a process is reaped
    3406             :  *       it just makes it very unlikely that any will persist.
    3407             :  */
    3408             : 
    3409         367 : void proc_flush_pid(struct pid *pid)
    3410             : {
    3411         367 :         proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock);
    3412         367 : }
    3413             : 
    3414           0 : static struct dentry *proc_pid_instantiate(struct dentry * dentry,
    3415             :                                    struct task_struct *task, const void *ptr)
    3416             : {
    3417             :         struct inode *inode;
    3418             : 
    3419           0 :         inode = proc_pid_make_base_inode(dentry->d_sb, task,
    3420             :                                          S_IFDIR | S_IRUGO | S_IXUGO);
    3421           0 :         if (!inode)
    3422             :                 return ERR_PTR(-ENOENT);
    3423             : 
    3424           0 :         inode->i_op = &proc_tgid_base_inode_operations;
    3425           0 :         inode->i_fop = &proc_tgid_base_operations;
    3426           0 :         inode->i_flags|=S_IMMUTABLE;
    3427             : 
    3428           0 :         set_nlink(inode, nlink_tgid);
    3429           0 :         pid_update_inode(task, inode);
    3430             : 
    3431           0 :         d_set_d_op(dentry, &pid_dentry_operations);
    3432           0 :         return d_splice_alias(inode, dentry);
    3433             : }
    3434             : 
    3435           0 : struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags)
    3436             : {
    3437             :         struct task_struct *task;
    3438             :         unsigned tgid;
    3439             :         struct proc_fs_info *fs_info;
    3440             :         struct pid_namespace *ns;
    3441           0 :         struct dentry *result = ERR_PTR(-ENOENT);
    3442             : 
    3443           0 :         tgid = name_to_int(&dentry->d_name);
    3444           0 :         if (tgid == ~0U)
    3445             :                 goto out;
    3446             : 
    3447           0 :         fs_info = proc_sb_info(dentry->d_sb);
    3448           0 :         ns = fs_info->pid_ns;
    3449             :         rcu_read_lock();
    3450           0 :         task = find_task_by_pid_ns(tgid, ns);
    3451           0 :         if (task)
    3452             :                 get_task_struct(task);
    3453             :         rcu_read_unlock();
    3454           0 :         if (!task)
    3455             :                 goto out;
    3456             : 
    3457             :         /* Limit procfs to only ptraceable tasks */
    3458           0 :         if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) {
    3459           0 :                 if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS))
    3460             :                         goto out_put_task;
    3461             :         }
    3462             : 
    3463           0 :         result = proc_pid_instantiate(dentry, task, NULL);
    3464             : out_put_task:
    3465           0 :         put_task_struct(task);
    3466             : out:
    3467           0 :         return result;
    3468             : }
    3469             : 
    3470             : /*
    3471             :  * Find the first task with tgid >= tgid
    3472             :  *
    3473             :  */
    3474             : struct tgid_iter {
    3475             :         unsigned int tgid;
    3476             :         struct task_struct *task;
    3477             : };
    3478           0 : static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter)
    3479             : {
    3480             :         struct pid *pid;
    3481             : 
    3482           0 :         if (iter.task)
    3483           0 :                 put_task_struct(iter.task);
    3484             :         rcu_read_lock();
    3485             : retry:
    3486           0 :         iter.task = NULL;
    3487           0 :         pid = find_ge_pid(iter.tgid, ns);
    3488           0 :         if (pid) {
    3489           0 :                 iter.tgid = pid_nr_ns(pid, ns);
    3490           0 :                 iter.task = pid_task(pid, PIDTYPE_TGID);
    3491           0 :                 if (!iter.task) {
    3492           0 :                         iter.tgid += 1;
    3493           0 :                         goto retry;
    3494             :                 }
    3495           0 :                 get_task_struct(iter.task);
    3496             :         }
    3497             :         rcu_read_unlock();
    3498           0 :         return iter;
    3499             : }
    3500             : 
    3501             : #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
    3502             : 
    3503             : /* for the /proc/ directory itself, after non-process stuff has been done */
    3504           0 : int proc_pid_readdir(struct file *file, struct dir_context *ctx)
    3505             : {
    3506             :         struct tgid_iter iter;
    3507           0 :         struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb);
    3508           0 :         struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb);
    3509           0 :         loff_t pos = ctx->pos;
    3510             : 
    3511           0 :         if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
    3512             :                 return 0;
    3513             : 
    3514           0 :         if (pos == TGID_OFFSET - 2) {
    3515           0 :                 struct inode *inode = d_inode(fs_info->proc_self);
    3516           0 :                 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
    3517             :                         return 0;
    3518           0 :                 ctx->pos = pos = pos + 1;
    3519             :         }
    3520           0 :         if (pos == TGID_OFFSET - 1) {
    3521           0 :                 struct inode *inode = d_inode(fs_info->proc_thread_self);
    3522           0 :                 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
    3523             :                         return 0;
    3524           0 :                 ctx->pos = pos = pos + 1;
    3525             :         }
    3526           0 :         iter.tgid = pos - TGID_OFFSET;
    3527           0 :         iter.task = NULL;
    3528           0 :         for (iter = next_tgid(ns, iter);
    3529             :              iter.task;
    3530           0 :              iter.tgid += 1, iter = next_tgid(ns, iter)) {
    3531             :                 char name[10 + 1];
    3532             :                 unsigned int len;
    3533             : 
    3534           0 :                 cond_resched();
    3535           0 :                 if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE))
    3536           0 :                         continue;
    3537             : 
    3538           0 :                 len = snprintf(name, sizeof(name), "%u", iter.tgid);
    3539           0 :                 ctx->pos = iter.tgid + TGID_OFFSET;
    3540           0 :                 if (!proc_fill_cache(file, ctx, name, len,
    3541             :                                      proc_pid_instantiate, iter.task, NULL)) {
    3542           0 :                         put_task_struct(iter.task);
    3543           0 :                         return 0;
    3544             :                 }
    3545             :         }
    3546           0 :         ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
    3547           0 :         return 0;
    3548             : }
    3549             : 
    3550             : /*
    3551             :  * proc_tid_comm_permission is a special permission function exclusively
    3552             :  * used for the node /proc/<pid>/task/<tid>/comm.
    3553             :  * It bypasses generic permission checks in the case where a task of the same
    3554             :  * task group attempts to access the node.
    3555             :  * The rationale behind this is that glibc and bionic access this node for
    3556             :  * cross thread naming (pthread_set/getname_np(!self)). However, if
    3557             :  * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
    3558             :  * which locks out the cross thread naming implementation.
    3559             :  * This function makes sure that the node is always accessible for members of
    3560             :  * same thread group.
    3561             :  */
    3562           0 : static int proc_tid_comm_permission(struct mnt_idmap *idmap,
    3563             :                                     struct inode *inode, int mask)
    3564             : {
    3565             :         bool is_same_tgroup;
    3566             :         struct task_struct *task;
    3567             : 
    3568           0 :         task = get_proc_task(inode);
    3569           0 :         if (!task)
    3570             :                 return -ESRCH;
    3571           0 :         is_same_tgroup = same_thread_group(current, task);
    3572           0 :         put_task_struct(task);
    3573             : 
    3574           0 :         if (likely(is_same_tgroup && !(mask & MAY_EXEC))) {
    3575             :                 /* This file (/proc/<pid>/task/<tid>/comm) can always be
    3576             :                  * read or written by the members of the corresponding
    3577             :                  * thread group.
    3578             :                  */
    3579             :                 return 0;
    3580             :         }
    3581             : 
    3582           0 :         return generic_permission(&nop_mnt_idmap, inode, mask);
    3583             : }
    3584             : 
    3585             : static const struct inode_operations proc_tid_comm_inode_operations = {
    3586             :                 .permission = proc_tid_comm_permission,
    3587             : };
    3588             : 
    3589             : /*
    3590             :  * Tasks
    3591             :  */
    3592             : static const struct pid_entry tid_base_stuff[] = {
    3593             :         DIR("fd",        S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
    3594             :         DIR("fdinfo",    S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations),
    3595             :         DIR("ns",      S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
    3596             : #ifdef CONFIG_NET
    3597             :         DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
    3598             : #endif
    3599             :         REG("environ",   S_IRUSR, proc_environ_operations),
    3600             :         REG("auxv",      S_IRUSR, proc_auxv_operations),
    3601             :         ONE("status",    S_IRUGO, proc_pid_status),
    3602             :         ONE("personality", S_IRUSR, proc_pid_personality),
    3603             :         ONE("limits",  S_IRUGO, proc_pid_limits),
    3604             : #ifdef CONFIG_SCHED_DEBUG
    3605             :         REG("sched",     S_IRUGO|S_IWUSR, proc_pid_sched_operations),
    3606             : #endif
    3607             :         NOD("comm",      S_IFREG|S_IRUGO|S_IWUSR,
    3608             :                          &proc_tid_comm_inode_operations,
    3609             :                          &proc_pid_set_comm_operations, {}),
    3610             : #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
    3611             :         ONE("syscall",   S_IRUSR, proc_pid_syscall),
    3612             : #endif
    3613             :         REG("cmdline",   S_IRUGO, proc_pid_cmdline_ops),
    3614             :         ONE("stat",      S_IRUGO, proc_tid_stat),
    3615             :         ONE("statm",     S_IRUGO, proc_pid_statm),
    3616             :         REG("maps",      S_IRUGO, proc_pid_maps_operations),
    3617             : #ifdef CONFIG_PROC_CHILDREN
    3618             :         REG("children",  S_IRUGO, proc_tid_children_operations),
    3619             : #endif
    3620             : #ifdef CONFIG_NUMA
    3621             :         REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations),
    3622             : #endif
    3623             :         REG("mem",       S_IRUSR|S_IWUSR, proc_mem_operations),
    3624             :         LNK("cwd",       proc_cwd_link),
    3625             :         LNK("root",      proc_root_link),
    3626             :         LNK("exe",       proc_exe_link),
    3627             :         REG("mounts",    S_IRUGO, proc_mounts_operations),
    3628             :         REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
    3629             : #ifdef CONFIG_PROC_PAGE_MONITOR
    3630             :         REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
    3631             :         REG("smaps",     S_IRUGO, proc_pid_smaps_operations),
    3632             :         REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations),
    3633             :         REG("pagemap",    S_IRUSR, proc_pagemap_operations),
    3634             : #endif
    3635             : #ifdef CONFIG_SECURITY
    3636             :         DIR("attr",      S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
    3637             : #endif
    3638             : #ifdef CONFIG_KALLSYMS
    3639             :         ONE("wchan",     S_IRUGO, proc_pid_wchan),
    3640             : #endif
    3641             : #ifdef CONFIG_STACKTRACE
    3642             :         ONE("stack",      S_IRUSR, proc_pid_stack),
    3643             : #endif
    3644             : #ifdef CONFIG_SCHED_INFO
    3645             :         ONE("schedstat", S_IRUGO, proc_pid_schedstat),
    3646             : #endif
    3647             : #ifdef CONFIG_LATENCYTOP
    3648             :         REG("latency",  S_IRUGO, proc_lstats_operations),
    3649             : #endif
    3650             : #ifdef CONFIG_PROC_PID_CPUSET
    3651             :         ONE("cpuset",    S_IRUGO, proc_cpuset_show),
    3652             : #endif
    3653             : #ifdef CONFIG_CGROUPS
    3654             :         ONE("cgroup",  S_IRUGO, proc_cgroup_show),
    3655             : #endif
    3656             : #ifdef CONFIG_PROC_CPU_RESCTRL
    3657             :         ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show),
    3658             : #endif
    3659             :         ONE("oom_score", S_IRUGO, proc_oom_score),
    3660             :         REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
    3661             :         REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
    3662             : #ifdef CONFIG_AUDIT
    3663             :         REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
    3664             :         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
    3665             : #endif
    3666             : #ifdef CONFIG_FAULT_INJECTION
    3667             :         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
    3668             :         REG("fail-nth", 0644, proc_fail_nth_operations),
    3669             : #endif
    3670             : #ifdef CONFIG_TASK_IO_ACCOUNTING
    3671             :         ONE("io",     S_IRUSR, proc_tid_io_accounting),
    3672             : #endif
    3673             : #ifdef CONFIG_USER_NS
    3674             :         REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
    3675             :         REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
    3676             :         REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
    3677             :         REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
    3678             : #endif
    3679             : #ifdef CONFIG_LIVEPATCH
    3680             :         ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
    3681             : #endif
    3682             : #ifdef CONFIG_PROC_PID_ARCH_STATUS
    3683             :         ONE("arch_status", S_IRUGO, proc_pid_arch_status),
    3684             : #endif
    3685             : #ifdef CONFIG_SECCOMP_CACHE_DEBUG
    3686             :         ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache),
    3687             : #endif
    3688             : #ifdef CONFIG_KSM
    3689             :         ONE("ksm_merging_pages",  S_IRUSR, proc_pid_ksm_merging_pages),
    3690             :         ONE("ksm_stat",  S_IRUSR, proc_pid_ksm_stat),
    3691             : #endif
    3692             : };
    3693             : 
    3694           0 : static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
    3695             : {
    3696           0 :         return proc_pident_readdir(file, ctx,
    3697             :                                    tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
    3698             : }
    3699             : 
    3700           0 : static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
    3701             : {
    3702           0 :         return proc_pident_lookup(dir, dentry,
    3703             :                                   tid_base_stuff,
    3704             :                                   tid_base_stuff + ARRAY_SIZE(tid_base_stuff));
    3705             : }
    3706             : 
    3707             : static const struct file_operations proc_tid_base_operations = {
    3708             :         .read           = generic_read_dir,
    3709             :         .iterate_shared = proc_tid_base_readdir,
    3710             :         .llseek         = generic_file_llseek,
    3711             : };
    3712             : 
    3713             : static const struct inode_operations proc_tid_base_inode_operations = {
    3714             :         .lookup         = proc_tid_base_lookup,
    3715             :         .getattr        = pid_getattr,
    3716             :         .setattr        = proc_setattr,
    3717             : };
    3718             : 
    3719           0 : static struct dentry *proc_task_instantiate(struct dentry *dentry,
    3720             :         struct task_struct *task, const void *ptr)
    3721             : {
    3722             :         struct inode *inode;
    3723           0 :         inode = proc_pid_make_base_inode(dentry->d_sb, task,
    3724             :                                          S_IFDIR | S_IRUGO | S_IXUGO);
    3725           0 :         if (!inode)
    3726             :                 return ERR_PTR(-ENOENT);
    3727             : 
    3728           0 :         inode->i_op = &proc_tid_base_inode_operations;
    3729           0 :         inode->i_fop = &proc_tid_base_operations;
    3730           0 :         inode->i_flags |= S_IMMUTABLE;
    3731             : 
    3732           0 :         set_nlink(inode, nlink_tid);
    3733           0 :         pid_update_inode(task, inode);
    3734             : 
    3735           0 :         d_set_d_op(dentry, &pid_dentry_operations);
    3736           0 :         return d_splice_alias(inode, dentry);
    3737             : }
    3738             : 
    3739           0 : static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
    3740             : {
    3741             :         struct task_struct *task;
    3742           0 :         struct task_struct *leader = get_proc_task(dir);
    3743             :         unsigned tid;
    3744             :         struct proc_fs_info *fs_info;
    3745             :         struct pid_namespace *ns;
    3746           0 :         struct dentry *result = ERR_PTR(-ENOENT);
    3747             : 
    3748           0 :         if (!leader)
    3749             :                 goto out_no_task;
    3750             : 
    3751           0 :         tid = name_to_int(&dentry->d_name);
    3752           0 :         if (tid == ~0U)
    3753             :                 goto out;
    3754             : 
    3755           0 :         fs_info = proc_sb_info(dentry->d_sb);
    3756           0 :         ns = fs_info->pid_ns;
    3757             :         rcu_read_lock();
    3758           0 :         task = find_task_by_pid_ns(tid, ns);
    3759           0 :         if (task)
    3760             :                 get_task_struct(task);
    3761             :         rcu_read_unlock();
    3762           0 :         if (!task)
    3763             :                 goto out;
    3764           0 :         if (!same_thread_group(leader, task))
    3765             :                 goto out_drop_task;
    3766             : 
    3767           0 :         result = proc_task_instantiate(dentry, task, NULL);
    3768             : out_drop_task:
    3769           0 :         put_task_struct(task);
    3770             : out:
    3771           0 :         put_task_struct(leader);
    3772             : out_no_task:
    3773           0 :         return result;
    3774             : }
    3775             : 
    3776             : /*
    3777             :  * Find the first tid of a thread group to return to user space.
    3778             :  *
    3779             :  * Usually this is just the thread group leader, but if the users
    3780             :  * buffer was too small or there was a seek into the middle of the
    3781             :  * directory we have more work todo.
    3782             :  *
    3783             :  * In the case of a short read we start with find_task_by_pid.
    3784             :  *
    3785             :  * In the case of a seek we start with the leader and walk nr
    3786             :  * threads past it.
    3787             :  */
    3788           0 : static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
    3789             :                                         struct pid_namespace *ns)
    3790             : {
    3791             :         struct task_struct *pos, *task;
    3792           0 :         unsigned long nr = f_pos;
    3793             : 
    3794             :         if (nr != f_pos)        /* 32bit overflow? */
    3795             :                 return NULL;
    3796             : 
    3797             :         rcu_read_lock();
    3798           0 :         task = pid_task(pid, PIDTYPE_PID);
    3799           0 :         if (!task)
    3800             :                 goto fail;
    3801             : 
    3802             :         /* Attempt to start with the tid of a thread */
    3803           0 :         if (tid && nr) {
    3804           0 :                 pos = find_task_by_pid_ns(tid, ns);
    3805           0 :                 if (pos && same_thread_group(pos, task))
    3806             :                         goto found;
    3807             :         }
    3808             : 
    3809             :         /* If nr exceeds the number of threads there is nothing todo */
    3810           0 :         if (nr >= get_nr_threads(task))
    3811             :                 goto fail;
    3812             : 
    3813             :         /* If we haven't found our starting place yet start
    3814             :          * with the leader and walk nr threads forward.
    3815             :          */
    3816           0 :         pos = task = task->group_leader;
    3817             :         do {
    3818           0 :                 if (!nr--)
    3819             :                         goto found;
    3820           0 :         } while_each_thread(task, pos);
    3821             : fail:
    3822             :         pos = NULL;
    3823             :         goto out;
    3824             : found:
    3825             :         get_task_struct(pos);
    3826             : out:
    3827             :         rcu_read_unlock();
    3828             :         return pos;
    3829             : }
    3830             : 
    3831             : /*
    3832             :  * Find the next thread in the thread list.
    3833             :  * Return NULL if there is an error or no next thread.
    3834             :  *
    3835             :  * The reference to the input task_struct is released.
    3836             :  */
    3837           0 : static struct task_struct *next_tid(struct task_struct *start)
    3838             : {
    3839           0 :         struct task_struct *pos = NULL;
    3840             :         rcu_read_lock();
    3841           0 :         if (pid_alive(start)) {
    3842           0 :                 pos = next_thread(start);
    3843           0 :                 if (thread_group_leader(pos))
    3844             :                         pos = NULL;
    3845             :                 else
    3846             :                         get_task_struct(pos);
    3847             :         }
    3848             :         rcu_read_unlock();
    3849           0 :         put_task_struct(start);
    3850           0 :         return pos;
    3851             : }
    3852             : 
    3853             : /* for the /proc/TGID/task/ directories */
    3854           0 : static int proc_task_readdir(struct file *file, struct dir_context *ctx)
    3855             : {
    3856           0 :         struct inode *inode = file_inode(file);
    3857             :         struct task_struct *task;
    3858             :         struct pid_namespace *ns;
    3859             :         int tid;
    3860             : 
    3861           0 :         if (proc_inode_is_dead(inode))
    3862             :                 return -ENOENT;
    3863             : 
    3864           0 :         if (!dir_emit_dots(file, ctx))
    3865             :                 return 0;
    3866             : 
    3867             :         /* f_version caches the tgid value that the last readdir call couldn't
    3868             :          * return. lseek aka telldir automagically resets f_version to 0.
    3869             :          */
    3870           0 :         ns = proc_pid_ns(inode->i_sb);
    3871           0 :         tid = (int)file->f_version;
    3872           0 :         file->f_version = 0;
    3873           0 :         for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
    3874             :              task;
    3875           0 :              task = next_tid(task), ctx->pos++) {
    3876             :                 char name[10 + 1];
    3877             :                 unsigned int len;
    3878             : 
    3879           0 :                 tid = task_pid_nr_ns(task, ns);
    3880           0 :                 if (!tid)
    3881           0 :                         continue;       /* The task has just exited. */
    3882           0 :                 len = snprintf(name, sizeof(name), "%u", tid);
    3883           0 :                 if (!proc_fill_cache(file, ctx, name, len,
    3884             :                                 proc_task_instantiate, task, NULL)) {
    3885             :                         /* returning this tgid failed, save it as the first
    3886             :                          * pid for the next readir call */
    3887           0 :                         file->f_version = (u64)tid;
    3888           0 :                         put_task_struct(task);
    3889           0 :                         break;
    3890             :                 }
    3891             :         }
    3892             : 
    3893             :         return 0;
    3894             : }
    3895             : 
    3896           0 : static int proc_task_getattr(struct mnt_idmap *idmap,
    3897             :                              const struct path *path, struct kstat *stat,
    3898             :                              u32 request_mask, unsigned int query_flags)
    3899             : {
    3900           0 :         struct inode *inode = d_inode(path->dentry);
    3901           0 :         struct task_struct *p = get_proc_task(inode);
    3902           0 :         generic_fillattr(&nop_mnt_idmap, inode, stat);
    3903             : 
    3904           0 :         if (p) {
    3905           0 :                 stat->nlink += get_nr_threads(p);
    3906           0 :                 put_task_struct(p);
    3907             :         }
    3908             : 
    3909           0 :         return 0;
    3910             : }
    3911             : 
    3912             : static const struct inode_operations proc_task_inode_operations = {
    3913             :         .lookup         = proc_task_lookup,
    3914             :         .getattr        = proc_task_getattr,
    3915             :         .setattr        = proc_setattr,
    3916             :         .permission     = proc_pid_permission,
    3917             : };
    3918             : 
    3919             : static const struct file_operations proc_task_operations = {
    3920             :         .read           = generic_read_dir,
    3921             :         .iterate_shared = proc_task_readdir,
    3922             :         .llseek         = generic_file_llseek,
    3923             : };
    3924             : 
    3925           1 : void __init set_proc_pid_nlink(void)
    3926             : {
    3927           1 :         nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
    3928           1 :         nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
    3929           1 : }

Generated by: LCOV version 1.14