Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * linux/init/main.c
4 : *
5 : * Copyright (C) 1991, 1992 Linus Torvalds
6 : *
7 : * GK 2/5/95 - Changed to support mounting root fs via NFS
8 : * Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
9 : * Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
10 : * Simplified starting of init: Michael A. Griffith <grif@acm.org>
11 : */
12 :
13 : #define DEBUG /* Enable initcall_debug */
14 :
15 : #include <linux/types.h>
16 : #include <linux/extable.h>
17 : #include <linux/module.h>
18 : #include <linux/proc_fs.h>
19 : #include <linux/binfmts.h>
20 : #include <linux/kernel.h>
21 : #include <linux/syscalls.h>
22 : #include <linux/stackprotector.h>
23 : #include <linux/string.h>
24 : #include <linux/ctype.h>
25 : #include <linux/delay.h>
26 : #include <linux/ioport.h>
27 : #include <linux/init.h>
28 : #include <linux/initrd.h>
29 : #include <linux/memblock.h>
30 : #include <linux/acpi.h>
31 : #include <linux/bootconfig.h>
32 : #include <linux/console.h>
33 : #include <linux/nmi.h>
34 : #include <linux/percpu.h>
35 : #include <linux/kmod.h>
36 : #include <linux/kprobes.h>
37 : #include <linux/kmsan.h>
38 : #include <linux/vmalloc.h>
39 : #include <linux/kernel_stat.h>
40 : #include <linux/start_kernel.h>
41 : #include <linux/security.h>
42 : #include <linux/smp.h>
43 : #include <linux/profile.h>
44 : #include <linux/kfence.h>
45 : #include <linux/rcupdate.h>
46 : #include <linux/srcu.h>
47 : #include <linux/moduleparam.h>
48 : #include <linux/kallsyms.h>
49 : #include <linux/buildid.h>
50 : #include <linux/writeback.h>
51 : #include <linux/cpu.h>
52 : #include <linux/cpuset.h>
53 : #include <linux/cgroup.h>
54 : #include <linux/efi.h>
55 : #include <linux/tick.h>
56 : #include <linux/sched/isolation.h>
57 : #include <linux/interrupt.h>
58 : #include <linux/taskstats_kern.h>
59 : #include <linux/delayacct.h>
60 : #include <linux/unistd.h>
61 : #include <linux/utsname.h>
62 : #include <linux/rmap.h>
63 : #include <linux/mempolicy.h>
64 : #include <linux/key.h>
65 : #include <linux/debug_locks.h>
66 : #include <linux/debugobjects.h>
67 : #include <linux/lockdep.h>
68 : #include <linux/kmemleak.h>
69 : #include <linux/padata.h>
70 : #include <linux/pid_namespace.h>
71 : #include <linux/device/driver.h>
72 : #include <linux/kthread.h>
73 : #include <linux/sched.h>
74 : #include <linux/sched/init.h>
75 : #include <linux/signal.h>
76 : #include <linux/idr.h>
77 : #include <linux/kgdb.h>
78 : #include <linux/ftrace.h>
79 : #include <linux/async.h>
80 : #include <linux/shmem_fs.h>
81 : #include <linux/slab.h>
82 : #include <linux/perf_event.h>
83 : #include <linux/ptrace.h>
84 : #include <linux/pti.h>
85 : #include <linux/blkdev.h>
86 : #include <linux/sched/clock.h>
87 : #include <linux/sched/task.h>
88 : #include <linux/sched/task_stack.h>
89 : #include <linux/context_tracking.h>
90 : #include <linux/random.h>
91 : #include <linux/list.h>
92 : #include <linux/integrity.h>
93 : #include <linux/proc_ns.h>
94 : #include <linux/io.h>
95 : #include <linux/cache.h>
96 : #include <linux/rodata_test.h>
97 : #include <linux/jump_label.h>
98 : #include <linux/kcsan.h>
99 : #include <linux/init_syscalls.h>
100 : #include <linux/stackdepot.h>
101 : #include <linux/randomize_kstack.h>
102 : #include <net/net_namespace.h>
103 :
104 : #include <asm/io.h>
105 : #include <asm/setup.h>
106 : #include <asm/sections.h>
107 : #include <asm/cacheflush.h>
108 :
109 : #define CREATE_TRACE_POINTS
110 : #include <trace/events/initcall.h>
111 :
112 : #include <kunit/test.h>
113 :
114 : static int kernel_init(void *);
115 :
116 : /*
117 : * Debug helper: via this flag we know that we are in 'early bootup code'
118 : * where only the boot processor is running with IRQ disabled. This means
119 : * two things - IRQ must not be enabled before the flag is cleared and some
120 : * operations which are not allowed with IRQ disabled are allowed while the
121 : * flag is set.
122 : */
123 : bool early_boot_irqs_disabled __read_mostly;
124 :
125 : enum system_states system_state __read_mostly;
126 : EXPORT_SYMBOL(system_state);
127 :
128 : /*
129 : * Boot command-line arguments
130 : */
131 : #define MAX_INIT_ARGS CONFIG_INIT_ENV_ARG_LIMIT
132 : #define MAX_INIT_ENVS CONFIG_INIT_ENV_ARG_LIMIT
133 :
134 : /* Default late time init is NULL. archs can override this later. */
135 : void (*__initdata late_time_init)(void);
136 :
137 : /* Untouched command line saved by arch-specific code. */
138 : char __initdata boot_command_line[COMMAND_LINE_SIZE];
139 : /* Untouched saved command line (eg. for /proc) */
140 : char *saved_command_line __ro_after_init;
141 : unsigned int saved_command_line_len __ro_after_init;
142 : /* Command line for parameter parsing */
143 : static char *static_command_line;
144 : /* Untouched extra command line */
145 : static char *extra_command_line;
146 : /* Extra init arguments */
147 : static char *extra_init_args;
148 :
149 : #ifdef CONFIG_BOOT_CONFIG
150 : /* Is bootconfig on command line? */
151 : static bool bootconfig_found;
152 : static size_t initargs_offs;
153 : #else
154 : # define bootconfig_found false
155 : # define initargs_offs 0
156 : #endif
157 :
158 : static char *execute_command;
159 : static char *ramdisk_execute_command = "/init";
160 :
161 : /*
162 : * Used to generate warnings if static_key manipulation functions are used
163 : * before jump_label_init is called.
164 : */
165 : bool static_key_initialized __read_mostly;
166 : EXPORT_SYMBOL_GPL(static_key_initialized);
167 :
168 : /*
169 : * If set, this is an indication to the drivers that reset the underlying
170 : * device before going ahead with the initialization otherwise driver might
171 : * rely on the BIOS and skip the reset operation.
172 : *
173 : * This is useful if kernel is booting in an unreliable environment.
174 : * For ex. kdump situation where previous kernel has crashed, BIOS has been
175 : * skipped and devices will be in unknown state.
176 : */
177 : unsigned int reset_devices;
178 : EXPORT_SYMBOL(reset_devices);
179 :
180 0 : static int __init set_reset_devices(char *str)
181 : {
182 0 : reset_devices = 1;
183 0 : return 1;
184 : }
185 :
186 : __setup("reset_devices", set_reset_devices);
187 :
188 : static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
189 : const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
190 : static const char *panic_later, *panic_param;
191 :
192 3 : static bool __init obsolete_checksetup(char *line)
193 : {
194 : const struct obs_kernel_param *p;
195 3 : bool had_early_param = false;
196 :
197 3 : p = __setup_start;
198 : do {
199 266 : int n = strlen(p->str);
200 133 : if (parameqn(line, p->str, n)) {
201 3 : if (p->early) {
202 : /* Already done in parse_early_param?
203 : * (Needs exact match on param part).
204 : * Keep iterating, as we can have early
205 : * params and __setups of same names 8( */
206 0 : if (line[n] == '\0' || line[n] == '=')
207 0 : had_early_param = true;
208 3 : } else if (!p->setup_func) {
209 0 : pr_warn("Parameter %s is obsolete, ignored\n",
210 : p->str);
211 0 : return true;
212 3 : } else if (p->setup_func(line + n))
213 : return true;
214 : }
215 131 : p++;
216 131 : } while (p < __setup_end);
217 :
218 : return had_early_param;
219 : }
220 :
221 : /*
222 : * This should be approx 2 Bo*oMips to start (note initial shift), and will
223 : * still work even if initially too large, it will just take slightly longer
224 : */
225 : unsigned long loops_per_jiffy = (1<<12);
226 : EXPORT_SYMBOL(loops_per_jiffy);
227 :
228 0 : static int __init debug_kernel(char *str)
229 : {
230 0 : console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
231 0 : return 0;
232 : }
233 :
234 0 : static int __init quiet_kernel(char *str)
235 : {
236 0 : console_loglevel = CONSOLE_LOGLEVEL_QUIET;
237 0 : return 0;
238 : }
239 :
240 : early_param("debug", debug_kernel);
241 : early_param("quiet", quiet_kernel);
242 :
243 0 : static int __init loglevel(char *str)
244 : {
245 : int newlevel;
246 :
247 : /*
248 : * Only update loglevel value when a correct setting was passed,
249 : * to prevent blind crashes (when loglevel being set to 0) that
250 : * are quite hard to debug
251 : */
252 0 : if (get_option(&str, &newlevel)) {
253 0 : console_loglevel = newlevel;
254 0 : return 0;
255 : }
256 :
257 : return -EINVAL;
258 : }
259 :
260 : early_param("loglevel", loglevel);
261 :
262 : #ifdef CONFIG_BLK_DEV_INITRD
263 : static void * __init get_boot_config_from_initrd(size_t *_size)
264 : {
265 : u32 size, csum;
266 : char *data;
267 : u32 *hdr;
268 : int i;
269 :
270 : if (!initrd_end)
271 : return NULL;
272 :
273 : data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN;
274 : /*
275 : * Since Grub may align the size of initrd to 4, we must
276 : * check the preceding 3 bytes as well.
277 : */
278 : for (i = 0; i < 4; i++) {
279 : if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN))
280 : goto found;
281 : data--;
282 : }
283 : return NULL;
284 :
285 : found:
286 : hdr = (u32 *)(data - 8);
287 : size = le32_to_cpu(hdr[0]);
288 : csum = le32_to_cpu(hdr[1]);
289 :
290 : data = ((void *)hdr) - size;
291 : if ((unsigned long)data < initrd_start) {
292 : pr_err("bootconfig size %d is greater than initrd size %ld\n",
293 : size, initrd_end - initrd_start);
294 : return NULL;
295 : }
296 :
297 : if (xbc_calc_checksum(data, size) != csum) {
298 : pr_err("bootconfig checksum failed\n");
299 : return NULL;
300 : }
301 :
302 : /* Remove bootconfig from initramfs/initrd */
303 : initrd_end = (unsigned long)data;
304 : if (_size)
305 : *_size = size;
306 :
307 : return data;
308 : }
309 : #else
310 : static void * __init get_boot_config_from_initrd(size_t *_size)
311 : {
312 : return NULL;
313 : }
314 : #endif
315 :
316 : #ifdef CONFIG_BOOT_CONFIG
317 :
318 : static char xbc_namebuf[XBC_KEYLEN_MAX] __initdata;
319 :
320 : #define rest(dst, end) ((end) > (dst) ? (end) - (dst) : 0)
321 :
322 : static int __init xbc_snprint_cmdline(char *buf, size_t size,
323 : struct xbc_node *root)
324 : {
325 : struct xbc_node *knode, *vnode;
326 : char *end = buf + size;
327 : const char *val;
328 : int ret;
329 :
330 : xbc_node_for_each_key_value(root, knode, val) {
331 : ret = xbc_node_compose_key_after(root, knode,
332 : xbc_namebuf, XBC_KEYLEN_MAX);
333 : if (ret < 0)
334 : return ret;
335 :
336 : vnode = xbc_node_get_child(knode);
337 : if (!vnode) {
338 : ret = snprintf(buf, rest(buf, end), "%s ", xbc_namebuf);
339 : if (ret < 0)
340 : return ret;
341 : buf += ret;
342 : continue;
343 : }
344 : xbc_array_for_each_value(vnode, val) {
345 : ret = snprintf(buf, rest(buf, end), "%s=\"%s\" ",
346 : xbc_namebuf, val);
347 : if (ret < 0)
348 : return ret;
349 : buf += ret;
350 : }
351 : }
352 :
353 : return buf - (end - size);
354 : }
355 : #undef rest
356 :
357 : /* Make an extra command line under given key word */
358 : static char * __init xbc_make_cmdline(const char *key)
359 : {
360 : struct xbc_node *root;
361 : char *new_cmdline;
362 : int ret, len = 0;
363 :
364 : root = xbc_find_node(key);
365 : if (!root)
366 : return NULL;
367 :
368 : /* Count required buffer size */
369 : len = xbc_snprint_cmdline(NULL, 0, root);
370 : if (len <= 0)
371 : return NULL;
372 :
373 : new_cmdline = memblock_alloc(len + 1, SMP_CACHE_BYTES);
374 : if (!new_cmdline) {
375 : pr_err("Failed to allocate memory for extra kernel cmdline.\n");
376 : return NULL;
377 : }
378 :
379 : ret = xbc_snprint_cmdline(new_cmdline, len + 1, root);
380 : if (ret < 0 || ret > len) {
381 : pr_err("Failed to print extra kernel cmdline.\n");
382 : memblock_free(new_cmdline, len + 1);
383 : return NULL;
384 : }
385 :
386 : return new_cmdline;
387 : }
388 :
389 : static int __init bootconfig_params(char *param, char *val,
390 : const char *unused, void *arg)
391 : {
392 : if (strcmp(param, "bootconfig") == 0) {
393 : bootconfig_found = true;
394 : }
395 : return 0;
396 : }
397 :
398 : static int __init warn_bootconfig(char *str)
399 : {
400 : /* The 'bootconfig' has been handled by bootconfig_params(). */
401 : return 0;
402 : }
403 :
404 : static void __init setup_boot_config(void)
405 : {
406 : static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
407 : const char *msg, *data;
408 : int pos, ret;
409 : size_t size;
410 : char *err;
411 :
412 : /* Cut out the bootconfig data even if we have no bootconfig option */
413 : data = get_boot_config_from_initrd(&size);
414 : /* If there is no bootconfig in initrd, try embedded one. */
415 : if (!data)
416 : data = xbc_get_embedded_bootconfig(&size);
417 :
418 : strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
419 : err = parse_args("bootconfig", tmp_cmdline, NULL, 0, 0, 0, NULL,
420 : bootconfig_params);
421 :
422 : if (IS_ERR(err) || !(bootconfig_found || IS_ENABLED(CONFIG_BOOT_CONFIG_FORCE)))
423 : return;
424 :
425 : /* parse_args() stops at the next param of '--' and returns an address */
426 : if (err)
427 : initargs_offs = err - tmp_cmdline;
428 :
429 : if (!data) {
430 : /* If user intended to use bootconfig, show an error level message */
431 : if (bootconfig_found)
432 : pr_err("'bootconfig' found on command line, but no bootconfig found\n");
433 : else
434 : pr_info("No bootconfig data provided, so skipping bootconfig");
435 : return;
436 : }
437 :
438 : if (size >= XBC_DATA_MAX) {
439 : pr_err("bootconfig size %ld greater than max size %d\n",
440 : (long)size, XBC_DATA_MAX);
441 : return;
442 : }
443 :
444 : ret = xbc_init(data, size, &msg, &pos);
445 : if (ret < 0) {
446 : if (pos < 0)
447 : pr_err("Failed to init bootconfig: %s.\n", msg);
448 : else
449 : pr_err("Failed to parse bootconfig: %s at %d.\n",
450 : msg, pos);
451 : } else {
452 : xbc_get_info(&ret, NULL);
453 : pr_info("Load bootconfig: %ld bytes %d nodes\n", (long)size, ret);
454 : /* keys starting with "kernel." are passed via cmdline */
455 : extra_command_line = xbc_make_cmdline("kernel");
456 : /* Also, "init." keys are init arguments */
457 : extra_init_args = xbc_make_cmdline("init");
458 : }
459 : return;
460 : }
461 :
462 : static void __init exit_boot_config(void)
463 : {
464 : xbc_exit();
465 : }
466 :
467 : #else /* !CONFIG_BOOT_CONFIG */
468 :
469 : static void __init setup_boot_config(void)
470 : {
471 : /* Remove bootconfig data from initrd */
472 1 : get_boot_config_from_initrd(NULL);
473 : }
474 :
475 0 : static int __init warn_bootconfig(char *str)
476 : {
477 0 : pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
478 0 : return 0;
479 : }
480 :
481 : #define exit_boot_config() do {} while (0)
482 :
483 : #endif /* CONFIG_BOOT_CONFIG */
484 :
485 : early_param("bootconfig", warn_bootconfig);
486 :
487 : /* Change NUL term back to "=", to make "param" the whole string. */
488 3 : static void __init repair_env_string(char *param, char *val)
489 : {
490 3 : if (val) {
491 : /* param=val or param="val"? */
492 3 : if (val == param+strlen(param)+1)
493 3 : val[-1] = '=';
494 0 : else if (val == param+strlen(param)+2) {
495 0 : val[-2] = '=';
496 0 : memmove(val-1, val, strlen(val)+1);
497 : } else
498 0 : BUG();
499 : }
500 3 : }
501 :
502 : /* Anything after -- gets handed straight to init. */
503 0 : static int __init set_init_arg(char *param, char *val,
504 : const char *unused, void *arg)
505 : {
506 : unsigned int i;
507 :
508 0 : if (panic_later)
509 : return 0;
510 :
511 0 : repair_env_string(param, val);
512 :
513 0 : for (i = 0; argv_init[i]; i++) {
514 0 : if (i == MAX_INIT_ARGS) {
515 0 : panic_later = "init";
516 0 : panic_param = param;
517 0 : return 0;
518 : }
519 : }
520 0 : argv_init[i] = param;
521 0 : return 0;
522 : }
523 :
524 : /*
525 : * Unknown boot options get handed to init, unless they look like
526 : * unused parameters (modprobe will find them in /proc/cmdline).
527 : */
528 3 : static int __init unknown_bootoption(char *param, char *val,
529 : const char *unused, void *arg)
530 : {
531 3 : size_t len = strlen(param);
532 :
533 3 : repair_env_string(param, val);
534 :
535 : /* Handle obsolete-style parameters */
536 3 : if (obsolete_checksetup(param))
537 : return 0;
538 :
539 : /* Unused module parameter. */
540 1 : if (strnchr(param, len, '.'))
541 : return 0;
542 :
543 1 : if (panic_later)
544 : return 0;
545 :
546 1 : if (val) {
547 : /* Environment option */
548 : unsigned int i;
549 2 : for (i = 0; envp_init[i]; i++) {
550 2 : if (i == MAX_INIT_ENVS) {
551 0 : panic_later = "env";
552 0 : panic_param = param;
553 : }
554 2 : if (!strncmp(param, envp_init[i], len+1))
555 : break;
556 : }
557 1 : envp_init[i] = param;
558 : } else {
559 : /* Command line option */
560 : unsigned int i;
561 0 : for (i = 0; argv_init[i]; i++) {
562 0 : if (i == MAX_INIT_ARGS) {
563 0 : panic_later = "init";
564 0 : panic_param = param;
565 : }
566 : }
567 0 : argv_init[i] = param;
568 : }
569 : return 0;
570 : }
571 :
572 0 : static int __init init_setup(char *str)
573 : {
574 : unsigned int i;
575 :
576 0 : execute_command = str;
577 : /*
578 : * In case LILO is going to boot us with default command line,
579 : * it prepends "auto" before the whole cmdline which makes
580 : * the shell think it should execute a script with such name.
581 : * So we ignore all arguments entered _before_ init=... [MJ]
582 : */
583 0 : for (i = 1; i < MAX_INIT_ARGS; i++)
584 0 : argv_init[i] = NULL;
585 0 : return 1;
586 : }
587 : __setup("init=", init_setup);
588 :
589 0 : static int __init rdinit_setup(char *str)
590 : {
591 : unsigned int i;
592 :
593 0 : ramdisk_execute_command = str;
594 : /* See "auto" comment in init_setup */
595 0 : for (i = 1; i < MAX_INIT_ARGS; i++)
596 0 : argv_init[i] = NULL;
597 0 : return 1;
598 : }
599 : __setup("rdinit=", rdinit_setup);
600 :
601 : #ifndef CONFIG_SMP
602 : static const unsigned int setup_max_cpus = NR_CPUS;
603 : static inline void setup_nr_cpu_ids(void) { }
604 : static inline void smp_prepare_cpus(unsigned int maxcpus) { }
605 : #endif
606 :
607 : /*
608 : * We need to store the untouched command line for future reference.
609 : * We also need to store the touched command line since the parameter
610 : * parsing is performed in place, and we should allow a component to
611 : * store reference of name/value for future reference.
612 : */
613 1 : static void __init setup_command_line(char *command_line)
614 : {
615 1 : size_t len, xlen = 0, ilen = 0;
616 :
617 1 : if (extra_command_line)
618 0 : xlen = strlen(extra_command_line);
619 1 : if (extra_init_args)
620 0 : ilen = strlen(extra_init_args) + 4; /* for " -- " */
621 :
622 1 : len = xlen + strlen(boot_command_line) + 1;
623 :
624 2 : saved_command_line = memblock_alloc(len + ilen, SMP_CACHE_BYTES);
625 1 : if (!saved_command_line)
626 0 : panic("%s: Failed to allocate %zu bytes\n", __func__, len + ilen);
627 :
628 1 : static_command_line = memblock_alloc(len, SMP_CACHE_BYTES);
629 1 : if (!static_command_line)
630 0 : panic("%s: Failed to allocate %zu bytes\n", __func__, len);
631 :
632 1 : if (xlen) {
633 : /*
634 : * We have to put extra_command_line before boot command
635 : * lines because there could be dashes (separator of init
636 : * command line) in the command lines.
637 : */
638 0 : strcpy(saved_command_line, extra_command_line);
639 0 : strcpy(static_command_line, extra_command_line);
640 : }
641 2 : strcpy(saved_command_line + xlen, boot_command_line);
642 2 : strcpy(static_command_line + xlen, command_line);
643 :
644 1 : if (ilen) {
645 : /*
646 : * Append supplemental init boot args to saved_command_line
647 : * so that user can check what command line options passed
648 : * to init.
649 : * The order should always be
650 : * " -- "[bootconfig init-param][cmdline init-param]
651 : */
652 : if (initargs_offs) {
653 : len = xlen + initargs_offs;
654 : strcpy(saved_command_line + len, extra_init_args);
655 : len += ilen - 4; /* strlen(extra_init_args) */
656 : strcpy(saved_command_line + len,
657 : boot_command_line + initargs_offs - 1);
658 : } else {
659 0 : len = strlen(saved_command_line);
660 0 : strcpy(saved_command_line + len, " -- ");
661 0 : len += 4;
662 0 : strcpy(saved_command_line + len, extra_init_args);
663 : }
664 : }
665 :
666 2 : saved_command_line_len = strlen(saved_command_line);
667 1 : }
668 :
669 : /*
670 : * We need to finalize in a non-__init function or else race conditions
671 : * between the root thread and the init thread may cause start_kernel to
672 : * be reaped by free_initmem before the root thread has proceeded to
673 : * cpu_idle.
674 : *
675 : * gcc-3.4 accidentally inlines this function, so use noinline.
676 : */
677 :
678 : static __initdata DECLARE_COMPLETION(kthreadd_done);
679 :
680 1 : noinline void __ref __noreturn rest_init(void)
681 : {
682 : struct task_struct *tsk;
683 : int pid;
684 :
685 1 : rcu_scheduler_starting();
686 : /*
687 : * We need to spawn init first so that it obtains pid 1, however
688 : * the init task will end up wanting to create kthreads, which, if
689 : * we schedule it before we create kthreadd, will OOPS.
690 : */
691 1 : pid = user_mode_thread(kernel_init, NULL, CLONE_FS);
692 : /*
693 : * Pin init on the boot CPU. Task migration is not properly working
694 : * until sched_init_smp() has been run. It will set the allowed
695 : * CPUs for init to the non isolated CPUs.
696 : */
697 : rcu_read_lock();
698 1 : tsk = find_task_by_pid_ns(pid, &init_pid_ns);
699 1 : tsk->flags |= PF_NO_SETAFFINITY;
700 2 : set_cpus_allowed_ptr(tsk, cpumask_of(smp_processor_id()));
701 : rcu_read_unlock();
702 :
703 : numa_default_policy();
704 1 : pid = kernel_thread(kthreadd, NULL, NULL, CLONE_FS | CLONE_FILES);
705 : rcu_read_lock();
706 1 : kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
707 : rcu_read_unlock();
708 :
709 : /*
710 : * Enable might_sleep() and smp_processor_id() checks.
711 : * They cannot be enabled earlier because with CONFIG_PREEMPTION=y
712 : * kernel_thread() would trigger might_sleep() splats. With
713 : * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
714 : * already, but it's stuck on the kthreadd_done completion.
715 : */
716 1 : system_state = SYSTEM_SCHEDULING;
717 :
718 1 : complete(&kthreadd_done);
719 :
720 : /*
721 : * The boot idle thread must execute schedule()
722 : * at least once to get things moving:
723 : */
724 1 : schedule_preempt_disabled();
725 : /* Call into cpu_idle with preempt disabled */
726 0 : cpu_startup_entry(CPUHP_ONLINE);
727 : }
728 :
729 : /* Check for early params. */
730 6 : static int __init do_early_param(char *param, char *val,
731 : const char *unused, void *arg)
732 : {
733 : const struct obs_kernel_param *p;
734 :
735 504 : for (p = __setup_start; p < __setup_end; p++) {
736 996 : if ((p->early && parameq(param, p->str)) ||
737 581 : (strcmp(param, "console") == 0 &&
738 83 : strcmp(p->str, "earlycon") == 0)
739 : ) {
740 0 : if (p->setup_func(val) != 0)
741 0 : pr_warn("Malformed early option '%s'\n", param);
742 : }
743 : }
744 : /* We accept everything at this stage. */
745 6 : return 0;
746 : }
747 :
748 1 : void __init parse_early_options(char *cmdline)
749 : {
750 1 : parse_args("early options", cmdline, NULL, 0, 0, 0, NULL,
751 : do_early_param);
752 1 : }
753 :
754 : /* Arch code calls this early on, or if not, just before other parsing. */
755 1 : void __init parse_early_param(void)
756 : {
757 : static int done __initdata;
758 : static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
759 :
760 1 : if (done)
761 : return;
762 :
763 : /* All fall through to do_early_param. */
764 1 : strscpy(tmp_cmdline, boot_command_line, COMMAND_LINE_SIZE);
765 1 : parse_early_options(tmp_cmdline);
766 1 : done = 1;
767 : }
768 :
769 1 : void __init __weak arch_post_acpi_subsys_init(void) { }
770 :
771 1 : void __init __weak smp_setup_processor_id(void)
772 : {
773 1 : }
774 :
775 : # if THREAD_SIZE >= PAGE_SIZE
776 1 : void __init __weak thread_stack_cache_init(void)
777 : {
778 1 : }
779 : #endif
780 :
781 1 : void __init __weak poking_init(void) { }
782 :
783 1 : void __init __weak pgtable_cache_init(void) { }
784 :
785 1 : void __init __weak trap_init(void) { }
786 :
787 : bool initcall_debug;
788 : core_param(initcall_debug, initcall_debug, bool, 0644);
789 :
790 : #ifdef TRACEPOINTS_ENABLED
791 : static void __init initcall_debug_enable(void);
792 : #else
793 : static inline void initcall_debug_enable(void)
794 : {
795 : }
796 : #endif
797 :
798 : #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
799 : DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
800 : randomize_kstack_offset);
801 : DEFINE_PER_CPU(u32, kstack_offset);
802 :
803 : static int __init early_randomize_kstack_offset(char *buf)
804 : {
805 : int ret;
806 : bool bool_result;
807 :
808 : ret = kstrtobool(buf, &bool_result);
809 : if (ret)
810 : return ret;
811 :
812 : if (bool_result)
813 : static_branch_enable(&randomize_kstack_offset);
814 : else
815 : static_branch_disable(&randomize_kstack_offset);
816 : return 0;
817 : }
818 : early_param("randomize_kstack_offset", early_randomize_kstack_offset);
819 : #endif
820 :
821 1 : void __init __weak __noreturn arch_call_rest_init(void)
822 : {
823 1 : rest_init();
824 : }
825 :
826 1 : static void __init print_unknown_bootoptions(void)
827 : {
828 : char *unknown_options;
829 : char *end;
830 : const char *const *p;
831 : size_t len;
832 :
833 1 : if (panic_later || (!argv_init[1] && !envp_init[2]))
834 : return;
835 :
836 : /*
837 : * Determine how many options we have to print out, plus a space
838 : * before each
839 : */
840 : len = 1; /* null terminator */
841 0 : for (p = &argv_init[1]; *p; p++) {
842 0 : len++;
843 0 : len += strlen(*p);
844 : }
845 1 : for (p = &envp_init[2]; *p; p++) {
846 1 : len++;
847 2 : len += strlen(*p);
848 : }
849 :
850 1 : unknown_options = memblock_alloc(len, SMP_CACHE_BYTES);
851 1 : if (!unknown_options) {
852 0 : pr_err("%s: Failed to allocate %zu bytes\n",
853 : __func__, len);
854 0 : return;
855 : }
856 : end = unknown_options;
857 :
858 0 : for (p = &argv_init[1]; *p; p++)
859 0 : end += sprintf(end, " %s", *p);
860 1 : for (p = &envp_init[2]; *p; p++)
861 1 : end += sprintf(end, " %s", *p);
862 :
863 : /* Start at unknown_options[1] to skip the initial space */
864 1 : pr_notice("Unknown kernel command line parameters \"%s\", will be passed to user space.\n",
865 : &unknown_options[1]);
866 1 : memblock_free(unknown_options, len);
867 : }
868 :
869 : asmlinkage __visible __init __no_sanitize_address __noreturn __no_stack_protector
870 1 : void start_kernel(void)
871 : {
872 : char *command_line;
873 : char *after_dashes;
874 :
875 1 : set_task_stack_end_magic(&init_task);
876 1 : smp_setup_processor_id();
877 : debug_objects_early_init();
878 : init_vmlinux_build_id();
879 :
880 : cgroup_init_early();
881 :
882 : local_irq_disable();
883 1 : early_boot_irqs_disabled = true;
884 :
885 : /*
886 : * Interrupts are still disabled. Do necessary setups, then
887 : * enable them.
888 : */
889 1 : boot_cpu_init();
890 : page_address_init();
891 1 : pr_notice("%s", linux_banner);
892 : early_security_init();
893 1 : setup_arch(&command_line);
894 : setup_boot_config();
895 1 : setup_command_line(command_line);
896 : setup_nr_cpu_ids();
897 1 : setup_per_cpu_areas();
898 : smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
899 1 : boot_cpu_hotplug_init();
900 :
901 1 : pr_notice("Kernel command line: %s\n", saved_command_line);
902 : /* parameters may set static keys */
903 : jump_label_init();
904 1 : parse_early_param();
905 1 : after_dashes = parse_args("Booting kernel",
906 : static_command_line, __start___param,
907 1 : __stop___param - __start___param,
908 : -1, -1, NULL, &unknown_bootoption);
909 1 : print_unknown_bootoptions();
910 1 : if (!IS_ERR_OR_NULL(after_dashes))
911 0 : parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
912 : NULL, set_init_arg);
913 1 : if (extra_init_args)
914 0 : parse_args("Setting extra init args", extra_init_args,
915 : NULL, 0, -1, -1, NULL, set_init_arg);
916 :
917 : /* Architectural and non-timekeeping rng init, before allocator init */
918 1 : random_init_early(command_line);
919 :
920 : /*
921 : * These use large bootmem allocations and must precede
922 : * initalization of page allocator
923 : */
924 1 : setup_log_buf(0);
925 1 : vfs_caches_init_early();
926 1 : sort_main_extable();
927 1 : trap_init();
928 1 : mm_core_init();
929 1 : poking_init();
930 : ftrace_init();
931 :
932 : /* trace_printk can be enabled here */
933 : early_trace_init();
934 :
935 : /*
936 : * Set up the scheduler prior starting any interrupts (such as the
937 : * timer interrupt). Full topology setup happens at smp_init()
938 : * time - but meanwhile we still have a functioning scheduler.
939 : */
940 1 : sched_init();
941 :
942 1 : if (WARN(!irqs_disabled(),
943 : "Interrupts were enabled *very* early, fixing it\n"))
944 : local_irq_disable();
945 1 : radix_tree_init();
946 1 : maple_tree_init();
947 :
948 : /*
949 : * Set up housekeeping before setting up workqueues to allow the unbound
950 : * workqueue to take non-housekeeping into account.
951 : */
952 : housekeeping_init();
953 :
954 : /*
955 : * Allow workqueue creation and work item queueing/cancelling
956 : * early. Work item execution depends on kthreads and starts after
957 : * workqueue_init().
958 : */
959 1 : workqueue_init_early();
960 :
961 1 : rcu_init();
962 :
963 : /* Trace events are available after this */
964 : trace_init();
965 :
966 : if (initcall_debug)
967 : initcall_debug_enable();
968 :
969 : context_tracking_init();
970 : /* init some links before init_ISA_irqs() */
971 1 : early_irq_init();
972 1 : init_IRQ();
973 1 : tick_init();
974 : rcu_init_nohz();
975 1 : init_timers();
976 1 : srcu_init();
977 1 : hrtimers_init();
978 1 : softirq_init();
979 1 : timekeeping_init();
980 1 : time_init();
981 :
982 : /* This must be after timekeeping is initialized */
983 1 : random_init();
984 :
985 : /* These make use of the fully initialized rng */
986 : kfence_init();
987 : boot_init_stack_canary();
988 :
989 : perf_event_init();
990 : profile_init();
991 : call_function_init();
992 1 : WARN(!irqs_disabled(), "Interrupts were enabled early\n");
993 :
994 1 : early_boot_irqs_disabled = false;
995 : local_irq_enable();
996 :
997 1 : kmem_cache_init_late();
998 :
999 : /*
1000 : * HACK ALERT! This is early. We're enabling the console before
1001 : * we've done PCI setups etc, and console_init() must be aware of
1002 : * this. But we do want output early, in case something goes wrong.
1003 : */
1004 1 : console_init();
1005 1 : if (panic_later)
1006 0 : panic("Too many boot %s vars at `%s'", panic_later,
1007 : panic_param);
1008 :
1009 : lockdep_init();
1010 :
1011 : /*
1012 : * Need to run this when irqs are enabled, because it wants
1013 : * to self-test [hard/soft]-irqs on/off lock inversion bugs
1014 : * too:
1015 : */
1016 : locking_selftest();
1017 :
1018 : #ifdef CONFIG_BLK_DEV_INITRD
1019 : if (initrd_start && !initrd_below_start_ok &&
1020 : page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) {
1021 : pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n",
1022 : page_to_pfn(virt_to_page((void *)initrd_start)),
1023 : min_low_pfn);
1024 : initrd_start = 0;
1025 : }
1026 : #endif
1027 1 : setup_per_cpu_pageset();
1028 : numa_policy_init();
1029 : acpi_early_init();
1030 1 : if (late_time_init)
1031 1 : late_time_init();
1032 1 : sched_clock_init();
1033 1 : calibrate_delay();
1034 :
1035 1 : arch_cpu_finalize_init();
1036 :
1037 1 : pid_idr_init();
1038 1 : anon_vma_init();
1039 : #ifdef CONFIG_X86
1040 : if (efi_enabled(EFI_RUNTIME_SERVICES))
1041 : efi_enter_virtual_mode();
1042 : #endif
1043 1 : thread_stack_cache_init();
1044 1 : cred_init();
1045 1 : fork_init();
1046 1 : proc_caches_init();
1047 1 : uts_ns_init();
1048 : key_init();
1049 : security_init();
1050 : dbg_late_init();
1051 : net_ns_init();
1052 1 : vfs_caches_init();
1053 1 : pagecache_init();
1054 1 : signals_init();
1055 1 : seq_file_init();
1056 1 : proc_root_init();
1057 1 : nsfs_init();
1058 : cpuset_init();
1059 : cgroup_init();
1060 : taskstats_init_early();
1061 : delayacct_init();
1062 :
1063 : acpi_subsystem_init();
1064 1 : arch_post_acpi_subsys_init();
1065 : kcsan_init();
1066 :
1067 : /* Do the rest non-__init'ed, we're now alive */
1068 1 : arch_call_rest_init();
1069 :
1070 : /*
1071 : * Avoid stack canaries in callers of boot_init_stack_canary for gcc-10
1072 : * and older.
1073 : */
1074 : #if !__has_attribute(__no_stack_protector__)
1075 : prevent_tail_call_optimization();
1076 : #endif
1077 : }
1078 :
1079 : /* Call all constructor functions linked into the kernel. */
1080 : static void __init do_ctors(void)
1081 : {
1082 : /*
1083 : * For UML, the constructors have already been called by the
1084 : * normal setup code as it's just a normal ELF binary, so we
1085 : * cannot do it again - but we do need CONFIG_CONSTRUCTORS
1086 : * even on UML for modules.
1087 : */
1088 : #if defined(CONFIG_CONSTRUCTORS) && !defined(CONFIG_UML)
1089 : ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
1090 :
1091 : for (; fn < (ctor_fn_t *) __ctors_end; fn++)
1092 : (*fn)();
1093 : #endif
1094 : }
1095 :
1096 : #ifdef CONFIG_KALLSYMS
1097 : struct blacklist_entry {
1098 : struct list_head next;
1099 : char *buf;
1100 : };
1101 :
1102 : static __initdata_or_module LIST_HEAD(blacklisted_initcalls);
1103 :
1104 0 : static int __init initcall_blacklist(char *str)
1105 : {
1106 : char *str_entry;
1107 : struct blacklist_entry *entry;
1108 :
1109 : /* str argument is a comma-separated list of functions */
1110 : do {
1111 0 : str_entry = strsep(&str, ",");
1112 0 : if (str_entry) {
1113 0 : pr_debug("blacklisting initcall %s\n", str_entry);
1114 0 : entry = memblock_alloc(sizeof(*entry),
1115 : SMP_CACHE_BYTES);
1116 0 : if (!entry)
1117 0 : panic("%s: Failed to allocate %zu bytes\n",
1118 : __func__, sizeof(*entry));
1119 0 : entry->buf = memblock_alloc(strlen(str_entry) + 1,
1120 : SMP_CACHE_BYTES);
1121 0 : if (!entry->buf)
1122 0 : panic("%s: Failed to allocate %zu bytes\n",
1123 0 : __func__, strlen(str_entry) + 1);
1124 0 : strcpy(entry->buf, str_entry);
1125 0 : list_add(&entry->next, &blacklisted_initcalls);
1126 : }
1127 0 : } while (str_entry);
1128 :
1129 0 : return 1;
1130 : }
1131 :
1132 181 : static bool __init_or_module initcall_blacklisted(initcall_t fn)
1133 : {
1134 : struct blacklist_entry *entry;
1135 : char fn_name[KSYM_SYMBOL_LEN];
1136 : unsigned long addr;
1137 :
1138 181 : if (list_empty(&blacklisted_initcalls))
1139 : return false;
1140 :
1141 0 : addr = (unsigned long) dereference_function_descriptor(fn);
1142 0 : sprint_symbol_no_offset(fn_name, addr);
1143 :
1144 : /*
1145 : * fn will be "function_name [module_name]" where [module_name] is not
1146 : * displayed for built-in init functions. Strip off the [module_name].
1147 : */
1148 0 : strreplace(fn_name, ' ', '\0');
1149 :
1150 0 : list_for_each_entry(entry, &blacklisted_initcalls, next) {
1151 0 : if (!strcmp(fn_name, entry->buf)) {
1152 0 : pr_debug("initcall %s blacklisted\n", fn_name);
1153 0 : return true;
1154 : }
1155 : }
1156 :
1157 : return false;
1158 : }
1159 : #else
1160 : static int __init initcall_blacklist(char *str)
1161 : {
1162 : pr_warn("initcall_blacklist requires CONFIG_KALLSYMS\n");
1163 : return 0;
1164 : }
1165 :
1166 : static bool __init_or_module initcall_blacklisted(initcall_t fn)
1167 : {
1168 : return false;
1169 : }
1170 : #endif
1171 : __setup("initcall_blacklist=", initcall_blacklist);
1172 :
1173 : static __init_or_module void
1174 0 : trace_initcall_start_cb(void *data, initcall_t fn)
1175 : {
1176 0 : ktime_t *calltime = data;
1177 :
1178 0 : printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current));
1179 0 : *calltime = ktime_get();
1180 0 : }
1181 :
1182 : static __init_or_module void
1183 0 : trace_initcall_finish_cb(void *data, initcall_t fn, int ret)
1184 : {
1185 0 : ktime_t rettime, *calltime = data;
1186 :
1187 0 : rettime = ktime_get();
1188 0 : printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
1189 : fn, ret, (unsigned long long)ktime_us_delta(rettime, *calltime));
1190 0 : }
1191 :
1192 : static ktime_t initcall_calltime;
1193 :
1194 : #ifdef TRACEPOINTS_ENABLED
1195 : static void __init initcall_debug_enable(void)
1196 : {
1197 : int ret;
1198 :
1199 : ret = register_trace_initcall_start(trace_initcall_start_cb,
1200 : &initcall_calltime);
1201 : ret |= register_trace_initcall_finish(trace_initcall_finish_cb,
1202 : &initcall_calltime);
1203 : WARN(ret, "Failed to register initcall tracepoints\n");
1204 : }
1205 : # define do_trace_initcall_start trace_initcall_start
1206 : # define do_trace_initcall_finish trace_initcall_finish
1207 : #else
1208 : static inline void do_trace_initcall_start(initcall_t fn)
1209 : {
1210 181 : if (!initcall_debug)
1211 : return;
1212 0 : trace_initcall_start_cb(&initcall_calltime, fn);
1213 : }
1214 : static inline void do_trace_initcall_finish(initcall_t fn, int ret)
1215 : {
1216 181 : if (!initcall_debug)
1217 : return;
1218 0 : trace_initcall_finish_cb(&initcall_calltime, fn, ret);
1219 : }
1220 : #endif /* !TRACEPOINTS_ENABLED */
1221 :
1222 181 : int __init_or_module do_one_initcall(initcall_t fn)
1223 : {
1224 181 : int count = preempt_count();
1225 : char msgbuf[64];
1226 : int ret;
1227 :
1228 181 : if (initcall_blacklisted(fn))
1229 : return -EPERM;
1230 :
1231 181 : do_trace_initcall_start(fn);
1232 181 : ret = fn();
1233 181 : do_trace_initcall_finish(fn, ret);
1234 :
1235 181 : msgbuf[0] = 0;
1236 :
1237 181 : if (preempt_count() != count) {
1238 0 : sprintf(msgbuf, "preemption imbalance ");
1239 : preempt_count_set(count);
1240 : }
1241 181 : if (irqs_disabled()) {
1242 0 : strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
1243 : local_irq_enable();
1244 : }
1245 181 : WARN(msgbuf[0], "initcall %pS returned with %s\n", fn, msgbuf);
1246 :
1247 : add_latent_entropy();
1248 181 : return ret;
1249 : }
1250 :
1251 :
1252 : static initcall_entry_t *initcall_levels[] __initdata = {
1253 : __initcall0_start,
1254 : __initcall1_start,
1255 : __initcall2_start,
1256 : __initcall3_start,
1257 : __initcall4_start,
1258 : __initcall5_start,
1259 : __initcall6_start,
1260 : __initcall7_start,
1261 : __initcall_end,
1262 : };
1263 :
1264 : /* Keep these in sync with initcalls in include/linux/init.h */
1265 : static const char *initcall_level_names[] __initdata = {
1266 : "pure",
1267 : "core",
1268 : "postcore",
1269 : "arch",
1270 : "subsys",
1271 : "fs",
1272 : "device",
1273 : "late",
1274 : };
1275 :
1276 24 : static int __init ignore_unknown_bootoption(char *param, char *val,
1277 : const char *unused, void *arg)
1278 : {
1279 24 : return 0;
1280 : }
1281 :
1282 8 : static void __init do_initcall_level(int level, char *command_line)
1283 : {
1284 : initcall_entry_t *fn;
1285 :
1286 16 : parse_args(initcall_level_names[level],
1287 : command_line, __start___param,
1288 8 : __stop___param - __start___param,
1289 : level, level,
1290 : NULL, ignore_unknown_bootoption);
1291 :
1292 8 : trace_initcall_level(initcall_level_names[level]);
1293 181 : for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
1294 173 : do_one_initcall(initcall_from_entry(fn));
1295 8 : }
1296 :
1297 1 : static void __init do_initcalls(void)
1298 : {
1299 : int level;
1300 1 : size_t len = saved_command_line_len + 1;
1301 : char *command_line;
1302 :
1303 1 : command_line = kzalloc(len, GFP_KERNEL);
1304 1 : if (!command_line)
1305 0 : panic("%s: Failed to allocate %zu bytes\n", __func__, len);
1306 :
1307 8 : for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) {
1308 : /* Parser modifies command_line, restore it each time */
1309 16 : strcpy(command_line, saved_command_line);
1310 8 : do_initcall_level(level, command_line);
1311 : }
1312 :
1313 1 : kfree(command_line);
1314 1 : }
1315 :
1316 : /*
1317 : * Ok, the machine is now initialized. None of the devices
1318 : * have been touched yet, but the CPU subsystem is up and
1319 : * running, and memory and process management works.
1320 : *
1321 : * Now we can finally start doing some real work..
1322 : */
1323 1 : static void __init do_basic_setup(void)
1324 : {
1325 : cpuset_init_smp();
1326 1 : driver_init();
1327 1 : init_irq_proc();
1328 : do_ctors();
1329 1 : do_initcalls();
1330 1 : }
1331 :
1332 1 : static void __init do_pre_smp_initcalls(void)
1333 : {
1334 : initcall_entry_t *fn;
1335 :
1336 1 : trace_initcall_level("early");
1337 9 : for (fn = __initcall_start; fn < __initcall0_start; fn++)
1338 8 : do_one_initcall(initcall_from_entry(fn));
1339 1 : }
1340 :
1341 0 : static int run_init_process(const char *init_filename)
1342 : {
1343 : const char *const *p;
1344 :
1345 0 : argv_init[0] = init_filename;
1346 0 : pr_info("Run %s as init process\n", init_filename);
1347 0 : pr_debug(" with arguments:\n");
1348 0 : for (p = argv_init; *p; p++)
1349 0 : pr_debug(" %s\n", *p);
1350 0 : pr_debug(" with environment:\n");
1351 0 : for (p = envp_init; *p; p++)
1352 0 : pr_debug(" %s\n", *p);
1353 0 : return kernel_execve(init_filename, argv_init, envp_init);
1354 : }
1355 :
1356 0 : static int try_to_run_init_process(const char *init_filename)
1357 : {
1358 : int ret;
1359 :
1360 0 : ret = run_init_process(init_filename);
1361 :
1362 0 : if (ret && ret != -ENOENT) {
1363 0 : pr_err("Starting init: %s exists but couldn't execute it (error %d)\n",
1364 : init_filename, ret);
1365 : }
1366 :
1367 0 : return ret;
1368 : }
1369 :
1370 : static noinline void __init kernel_init_freeable(void);
1371 :
1372 : #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
1373 : bool rodata_enabled __ro_after_init = true;
1374 :
1375 : #ifndef arch_parse_debug_rodata
1376 : static inline bool arch_parse_debug_rodata(char *str) { return false; }
1377 : #endif
1378 :
1379 : static int __init set_debug_rodata(char *str)
1380 : {
1381 : if (arch_parse_debug_rodata(str))
1382 : return 0;
1383 :
1384 : if (str && !strcmp(str, "on"))
1385 : rodata_enabled = true;
1386 : else if (str && !strcmp(str, "off"))
1387 : rodata_enabled = false;
1388 : else
1389 : pr_warn("Invalid option string for rodata: '%s'\n", str);
1390 : return 0;
1391 : }
1392 : early_param("rodata", set_debug_rodata);
1393 : #endif
1394 :
1395 : #ifdef CONFIG_STRICT_KERNEL_RWX
1396 : static void mark_readonly(void)
1397 : {
1398 : if (rodata_enabled) {
1399 : /*
1400 : * load_module() results in W+X mappings, which are cleaned
1401 : * up with call_rcu(). Let's make sure that queued work is
1402 : * flushed so that we don't hit false positives looking for
1403 : * insecure pages which are W+X.
1404 : */
1405 : rcu_barrier();
1406 : mark_rodata_ro();
1407 : rodata_test();
1408 : } else
1409 : pr_info("Kernel memory protection disabled.\n");
1410 : }
1411 : #elif defined(CONFIG_ARCH_HAS_STRICT_KERNEL_RWX)
1412 : static inline void mark_readonly(void)
1413 : {
1414 : pr_warn("Kernel memory protection not selected by kernel config.\n");
1415 : }
1416 : #else
1417 : static inline void mark_readonly(void)
1418 : {
1419 0 : pr_warn("This architecture does not have kernel memory protection.\n");
1420 : }
1421 : #endif
1422 :
1423 0 : void __weak free_initmem(void)
1424 : {
1425 0 : free_initmem_default(POISON_FREE_INITMEM);
1426 0 : }
1427 :
1428 1 : static int __ref kernel_init(void *unused)
1429 : {
1430 : int ret;
1431 :
1432 : /*
1433 : * Wait until kthreadd is all set-up.
1434 : */
1435 1 : wait_for_completion(&kthreadd_done);
1436 :
1437 1 : kernel_init_freeable();
1438 : /* need to finish all async __init code before freeing the memory */
1439 0 : async_synchronize_full();
1440 :
1441 0 : system_state = SYSTEM_FREEING_INITMEM;
1442 : kprobe_free_init_mem();
1443 : ftrace_free_init_mem();
1444 : kgdb_free_init_mem();
1445 : exit_boot_config();
1446 0 : free_initmem();
1447 : mark_readonly();
1448 :
1449 : /*
1450 : * Kernel mappings are now finalized - update the userspace page-table
1451 : * to finalize PTI.
1452 : */
1453 : pti_finalize();
1454 :
1455 0 : system_state = SYSTEM_RUNNING;
1456 : numa_default_policy();
1457 :
1458 : rcu_end_inkernel_boot();
1459 :
1460 0 : do_sysctl_args();
1461 :
1462 0 : if (ramdisk_execute_command) {
1463 0 : ret = run_init_process(ramdisk_execute_command);
1464 0 : if (!ret)
1465 : return 0;
1466 0 : pr_err("Failed to execute %s (error %d)\n",
1467 : ramdisk_execute_command, ret);
1468 : }
1469 :
1470 : /*
1471 : * We try each of these until one succeeds.
1472 : *
1473 : * The Bourne shell can be used instead of init if we are
1474 : * trying to recover a really broken machine.
1475 : */
1476 0 : if (execute_command) {
1477 0 : ret = run_init_process(execute_command);
1478 0 : if (!ret)
1479 : return 0;
1480 0 : panic("Requested init %s failed (error %d).",
1481 : execute_command, ret);
1482 : }
1483 :
1484 : if (CONFIG_DEFAULT_INIT[0] != '\0') {
1485 : ret = run_init_process(CONFIG_DEFAULT_INIT);
1486 : if (ret)
1487 : pr_err("Default init %s failed (error %d)\n",
1488 : CONFIG_DEFAULT_INIT, ret);
1489 : else
1490 : return 0;
1491 : }
1492 :
1493 0 : if (!try_to_run_init_process("/sbin/init") ||
1494 0 : !try_to_run_init_process("/etc/init") ||
1495 0 : !try_to_run_init_process("/bin/init") ||
1496 0 : !try_to_run_init_process("/bin/sh"))
1497 : return 0;
1498 :
1499 0 : panic("No working init found. Try passing init= option to kernel. "
1500 : "See Linux Documentation/admin-guide/init.rst for guidance.");
1501 : }
1502 :
1503 : /* Open /dev/console, for stdin/stdout/stderr, this should never fail */
1504 0 : void __init console_on_rootfs(void)
1505 : {
1506 0 : struct file *file = filp_open("/dev/console", O_RDWR, 0);
1507 :
1508 0 : if (IS_ERR(file)) {
1509 0 : pr_err("Warning: unable to open an initial console.\n");
1510 0 : return;
1511 : }
1512 0 : init_dup(file);
1513 0 : init_dup(file);
1514 0 : init_dup(file);
1515 0 : fput(file);
1516 : }
1517 :
1518 1 : static noinline void __init kernel_init_freeable(void)
1519 : {
1520 : /* Now the scheduler is fully set up and can do blocking allocations */
1521 1 : gfp_allowed_mask = __GFP_BITS_MASK;
1522 :
1523 : /*
1524 : * init can allocate pages on any node
1525 : */
1526 1 : set_mems_allowed(node_states[N_MEMORY]);
1527 :
1528 2 : cad_pid = get_pid(task_pid(current));
1529 :
1530 1 : smp_prepare_cpus(setup_max_cpus);
1531 :
1532 1 : workqueue_init();
1533 :
1534 1 : init_mm_internals();
1535 :
1536 : rcu_init_tasks_generic();
1537 1 : do_pre_smp_initcalls();
1538 : lockup_detector_init();
1539 :
1540 : smp_init();
1541 1 : sched_init_smp();
1542 :
1543 : padata_init();
1544 1 : page_alloc_init_late();
1545 :
1546 1 : do_basic_setup();
1547 :
1548 1 : kunit_run_all_tests();
1549 :
1550 : wait_for_initramfs();
1551 0 : console_on_rootfs();
1552 :
1553 : /*
1554 : * check if there is an early userspace init. If yes, let it do all
1555 : * the work
1556 : */
1557 0 : if (init_eaccess(ramdisk_execute_command) != 0) {
1558 0 : ramdisk_execute_command = NULL;
1559 0 : prepare_namespace();
1560 : }
1561 :
1562 : /*
1563 : * Ok, we have completed the initial bootup, and
1564 : * we're essentially up and running. Get rid of the
1565 : * initmem segments and start the user-mode stuff..
1566 : *
1567 : * rootfs is available now, try loading the public keys
1568 : * and default modules
1569 : */
1570 :
1571 : integrity_load_keys();
1572 0 : }
|