Line data Source code
1 : /* CPU control.
2 : * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 : *
4 : * This code is licenced under the GPL.
5 : */
6 : #include <linux/sched/mm.h>
7 : #include <linux/proc_fs.h>
8 : #include <linux/smp.h>
9 : #include <linux/init.h>
10 : #include <linux/notifier.h>
11 : #include <linux/sched/signal.h>
12 : #include <linux/sched/hotplug.h>
13 : #include <linux/sched/isolation.h>
14 : #include <linux/sched/task.h>
15 : #include <linux/sched/smt.h>
16 : #include <linux/unistd.h>
17 : #include <linux/cpu.h>
18 : #include <linux/oom.h>
19 : #include <linux/rcupdate.h>
20 : #include <linux/delay.h>
21 : #include <linux/export.h>
22 : #include <linux/bug.h>
23 : #include <linux/kthread.h>
24 : #include <linux/stop_machine.h>
25 : #include <linux/mutex.h>
26 : #include <linux/gfp.h>
27 : #include <linux/suspend.h>
28 : #include <linux/lockdep.h>
29 : #include <linux/tick.h>
30 : #include <linux/irq.h>
31 : #include <linux/nmi.h>
32 : #include <linux/smpboot.h>
33 : #include <linux/relay.h>
34 : #include <linux/slab.h>
35 : #include <linux/scs.h>
36 : #include <linux/percpu-rwsem.h>
37 : #include <linux/cpuset.h>
38 : #include <linux/random.h>
39 : #include <linux/cc_platform.h>
40 :
41 : #include <trace/events/power.h>
42 : #define CREATE_TRACE_POINTS
43 : #include <trace/events/cpuhp.h>
44 :
45 : #include "smpboot.h"
46 :
47 : /**
48 : * struct cpuhp_cpu_state - Per cpu hotplug state storage
49 : * @state: The current cpu state
50 : * @target: The target state
51 : * @fail: Current CPU hotplug callback state
52 : * @thread: Pointer to the hotplug thread
53 : * @should_run: Thread should execute
54 : * @rollback: Perform a rollback
55 : * @single: Single callback invocation
56 : * @bringup: Single callback bringup or teardown selector
57 : * @cpu: CPU number
58 : * @node: Remote CPU node; for multi-instance, do a
59 : * single entry callback for install/remove
60 : * @last: For multi-instance rollback, remember how far we got
61 : * @cb_state: The state for a single callback (install/uninstall)
62 : * @result: Result of the operation
63 : * @ap_sync_state: State for AP synchronization
64 : * @done_up: Signal completion to the issuer of the task for cpu-up
65 : * @done_down: Signal completion to the issuer of the task for cpu-down
66 : */
67 : struct cpuhp_cpu_state {
68 : enum cpuhp_state state;
69 : enum cpuhp_state target;
70 : enum cpuhp_state fail;
71 : #ifdef CONFIG_SMP
72 : struct task_struct *thread;
73 : bool should_run;
74 : bool rollback;
75 : bool single;
76 : bool bringup;
77 : struct hlist_node *node;
78 : struct hlist_node *last;
79 : enum cpuhp_state cb_state;
80 : int result;
81 : atomic_t ap_sync_state;
82 : struct completion done_up;
83 : struct completion done_down;
84 : #endif
85 : };
86 :
87 : static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
88 : .fail = CPUHP_INVALID,
89 : };
90 :
91 : #ifdef CONFIG_SMP
92 : cpumask_t cpus_booted_once_mask;
93 : #endif
94 :
95 : #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
96 : static struct lockdep_map cpuhp_state_up_map =
97 : STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
98 : static struct lockdep_map cpuhp_state_down_map =
99 : STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
100 :
101 :
102 : static inline void cpuhp_lock_acquire(bool bringup)
103 : {
104 : lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
105 : }
106 :
107 : static inline void cpuhp_lock_release(bool bringup)
108 : {
109 : lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
110 : }
111 : #else
112 :
113 : static inline void cpuhp_lock_acquire(bool bringup) { }
114 : static inline void cpuhp_lock_release(bool bringup) { }
115 :
116 : #endif
117 :
118 : /**
119 : * struct cpuhp_step - Hotplug state machine step
120 : * @name: Name of the step
121 : * @startup: Startup function of the step
122 : * @teardown: Teardown function of the step
123 : * @cant_stop: Bringup/teardown can't be stopped at this step
124 : * @multi_instance: State has multiple instances which get added afterwards
125 : */
126 : struct cpuhp_step {
127 : const char *name;
128 : union {
129 : int (*single)(unsigned int cpu);
130 : int (*multi)(unsigned int cpu,
131 : struct hlist_node *node);
132 : } startup;
133 : union {
134 : int (*single)(unsigned int cpu);
135 : int (*multi)(unsigned int cpu,
136 : struct hlist_node *node);
137 : } teardown;
138 : /* private: */
139 : struct hlist_head list;
140 : /* public: */
141 : bool cant_stop;
142 : bool multi_instance;
143 : };
144 :
145 : static DEFINE_MUTEX(cpuhp_state_mutex);
146 : static struct cpuhp_step cpuhp_hp_states[];
147 :
148 : static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
149 : {
150 27 : return cpuhp_hp_states + state;
151 : }
152 :
153 : static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
154 : {
155 6 : return bringup ? !step->startup.single : !step->teardown.single;
156 : }
157 :
158 : /**
159 : * cpuhp_invoke_callback - Invoke the callbacks for a given state
160 : * @cpu: The cpu for which the callback should be invoked
161 : * @state: The state to do callbacks for
162 : * @bringup: True if the bringup callback should be invoked
163 : * @node: For multi-instance, do a single entry callback for install/remove
164 : * @lastp: For multi-instance rollback, remember how far we got
165 : *
166 : * Called from cpu hotplug and from the state register machinery.
167 : *
168 : * Return: %0 on success or a negative errno code
169 : */
170 3 : static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
171 : bool bringup, struct hlist_node *node,
172 : struct hlist_node **lastp)
173 : {
174 3 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
175 3 : struct cpuhp_step *step = cpuhp_get_step(state);
176 : int (*cbm)(unsigned int cpu, struct hlist_node *node);
177 : int (*cb)(unsigned int cpu);
178 : int ret, cnt;
179 :
180 3 : if (st->fail == state) {
181 0 : st->fail = CPUHP_INVALID;
182 0 : return -EAGAIN;
183 : }
184 :
185 3 : if (cpuhp_step_empty(bringup, step)) {
186 0 : WARN_ON_ONCE(1);
187 : return 0;
188 : }
189 :
190 3 : if (!step->multi_instance) {
191 3 : WARN_ON_ONCE(lastp && *lastp);
192 3 : cb = bringup ? step->startup.single : step->teardown.single;
193 :
194 3 : trace_cpuhp_enter(cpu, st->target, state, cb);
195 3 : ret = cb(cpu);
196 3 : trace_cpuhp_exit(cpu, st->state, state, ret);
197 3 : return ret;
198 : }
199 0 : cbm = bringup ? step->startup.multi : step->teardown.multi;
200 :
201 : /* Single invocation for instance add/remove */
202 0 : if (node) {
203 0 : WARN_ON_ONCE(lastp && *lastp);
204 0 : trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
205 0 : ret = cbm(cpu, node);
206 0 : trace_cpuhp_exit(cpu, st->state, state, ret);
207 0 : return ret;
208 : }
209 :
210 : /* State transition. Invoke on all instances */
211 0 : cnt = 0;
212 0 : hlist_for_each(node, &step->list) {
213 0 : if (lastp && node == *lastp)
214 : break;
215 :
216 0 : trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
217 0 : ret = cbm(cpu, node);
218 0 : trace_cpuhp_exit(cpu, st->state, state, ret);
219 0 : if (ret) {
220 0 : if (!lastp)
221 : goto err;
222 :
223 0 : *lastp = node;
224 0 : return ret;
225 : }
226 0 : cnt++;
227 : }
228 0 : if (lastp)
229 0 : *lastp = NULL;
230 : return 0;
231 : err:
232 : /* Rollback the instances if one failed */
233 0 : cbm = !bringup ? step->startup.multi : step->teardown.multi;
234 0 : if (!cbm)
235 : return ret;
236 :
237 0 : hlist_for_each(node, &step->list) {
238 0 : if (!cnt--)
239 : break;
240 :
241 0 : trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
242 0 : ret = cbm(cpu, node);
243 0 : trace_cpuhp_exit(cpu, st->state, state, ret);
244 : /*
245 : * Rollback must not fail,
246 : */
247 0 : WARN_ON_ONCE(ret);
248 : }
249 : return ret;
250 : }
251 :
252 : #ifdef CONFIG_SMP
253 : static bool cpuhp_is_ap_state(enum cpuhp_state state)
254 : {
255 : /*
256 : * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
257 : * purposes as that state is handled explicitly in cpu_down.
258 : */
259 : return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
260 : }
261 :
262 : static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
263 : {
264 : struct completion *done = bringup ? &st->done_up : &st->done_down;
265 : wait_for_completion(done);
266 : }
267 :
268 : static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
269 : {
270 : struct completion *done = bringup ? &st->done_up : &st->done_down;
271 : complete(done);
272 : }
273 :
274 : /*
275 : * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
276 : */
277 : static bool cpuhp_is_atomic_state(enum cpuhp_state state)
278 : {
279 : return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
280 : }
281 :
282 : /* Synchronization state management */
283 : enum cpuhp_sync_state {
284 : SYNC_STATE_DEAD,
285 : SYNC_STATE_KICKED,
286 : SYNC_STATE_SHOULD_DIE,
287 : SYNC_STATE_ALIVE,
288 : SYNC_STATE_SHOULD_ONLINE,
289 : SYNC_STATE_ONLINE,
290 : };
291 :
292 : #ifdef CONFIG_HOTPLUG_CORE_SYNC
293 : /**
294 : * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
295 : * @state: The synchronization state to set
296 : *
297 : * No synchronization point. Just update of the synchronization state, but implies
298 : * a full barrier so that the AP changes are visible before the control CPU proceeds.
299 : */
300 : static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
301 : {
302 : atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
303 :
304 : (void)atomic_xchg(st, state);
305 : }
306 :
307 : void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
308 :
309 : static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
310 : enum cpuhp_sync_state next_state)
311 : {
312 : atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
313 : ktime_t now, end, start = ktime_get();
314 : int sync;
315 :
316 : end = start + 10ULL * NSEC_PER_SEC;
317 :
318 : sync = atomic_read(st);
319 : while (1) {
320 : if (sync == state) {
321 : if (!atomic_try_cmpxchg(st, &sync, next_state))
322 : continue;
323 : return true;
324 : }
325 :
326 : now = ktime_get();
327 : if (now > end) {
328 : /* Timeout. Leave the state unchanged */
329 : return false;
330 : } else if (now - start < NSEC_PER_MSEC) {
331 : /* Poll for one millisecond */
332 : arch_cpuhp_sync_state_poll();
333 : } else {
334 : usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
335 : }
336 : sync = atomic_read(st);
337 : }
338 : return true;
339 : }
340 : #else /* CONFIG_HOTPLUG_CORE_SYNC */
341 : static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
342 : #endif /* !CONFIG_HOTPLUG_CORE_SYNC */
343 :
344 : #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
345 : /**
346 : * cpuhp_ap_report_dead - Update synchronization state to DEAD
347 : *
348 : * No synchronization point. Just update of the synchronization state.
349 : */
350 : void cpuhp_ap_report_dead(void)
351 : {
352 : cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
353 : }
354 :
355 : void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
356 :
357 : /*
358 : * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
359 : * because the AP cannot issue complete() at this stage.
360 : */
361 : static void cpuhp_bp_sync_dead(unsigned int cpu)
362 : {
363 : atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
364 : int sync = atomic_read(st);
365 :
366 : do {
367 : /* CPU can have reported dead already. Don't overwrite that! */
368 : if (sync == SYNC_STATE_DEAD)
369 : break;
370 : } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
371 :
372 : if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
373 : /* CPU reached dead state. Invoke the cleanup function */
374 : arch_cpuhp_cleanup_dead_cpu(cpu);
375 : return;
376 : }
377 :
378 : /* No further action possible. Emit message and give up. */
379 : pr_err("CPU%u failed to report dead state\n", cpu);
380 : }
381 : #else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
382 : static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
383 : #endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
384 :
385 : #ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
386 : /**
387 : * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
388 : *
389 : * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
390 : * for the BP to release it.
391 : */
392 : void cpuhp_ap_sync_alive(void)
393 : {
394 : atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
395 :
396 : cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
397 :
398 : /* Wait for the control CPU to release it. */
399 : while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
400 : cpu_relax();
401 : }
402 :
403 : static bool cpuhp_can_boot_ap(unsigned int cpu)
404 : {
405 : atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
406 : int sync = atomic_read(st);
407 :
408 : again:
409 : switch (sync) {
410 : case SYNC_STATE_DEAD:
411 : /* CPU is properly dead */
412 : break;
413 : case SYNC_STATE_KICKED:
414 : /* CPU did not come up in previous attempt */
415 : break;
416 : case SYNC_STATE_ALIVE:
417 : /* CPU is stuck cpuhp_ap_sync_alive(). */
418 : break;
419 : default:
420 : /* CPU failed to report online or dead and is in limbo state. */
421 : return false;
422 : }
423 :
424 : /* Prepare for booting */
425 : if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
426 : goto again;
427 :
428 : return true;
429 : }
430 :
431 : void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
432 :
433 : /*
434 : * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
435 : * because the AP cannot issue complete() so early in the bringup.
436 : */
437 : static int cpuhp_bp_sync_alive(unsigned int cpu)
438 : {
439 : int ret = 0;
440 :
441 : if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
442 : return 0;
443 :
444 : if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
445 : pr_err("CPU%u failed to report alive state\n", cpu);
446 : ret = -EIO;
447 : }
448 :
449 : /* Let the architecture cleanup the kick alive mechanics. */
450 : arch_cpuhp_cleanup_kick_cpu(cpu);
451 : return ret;
452 : }
453 : #else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
454 : static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
455 : static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
456 : #endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
457 :
458 : /* Serializes the updates to cpu_online_mask, cpu_present_mask */
459 : static DEFINE_MUTEX(cpu_add_remove_lock);
460 : bool cpuhp_tasks_frozen;
461 : EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
462 :
463 : /*
464 : * The following two APIs (cpu_maps_update_begin/done) must be used when
465 : * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
466 : */
467 : void cpu_maps_update_begin(void)
468 : {
469 : mutex_lock(&cpu_add_remove_lock);
470 : }
471 :
472 : void cpu_maps_update_done(void)
473 : {
474 : mutex_unlock(&cpu_add_remove_lock);
475 : }
476 :
477 : /*
478 : * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
479 : * Should always be manipulated under cpu_add_remove_lock
480 : */
481 : static int cpu_hotplug_disabled;
482 :
483 : #ifdef CONFIG_HOTPLUG_CPU
484 :
485 : DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
486 :
487 : void cpus_read_lock(void)
488 : {
489 : percpu_down_read(&cpu_hotplug_lock);
490 : }
491 : EXPORT_SYMBOL_GPL(cpus_read_lock);
492 :
493 : int cpus_read_trylock(void)
494 : {
495 : return percpu_down_read_trylock(&cpu_hotplug_lock);
496 : }
497 : EXPORT_SYMBOL_GPL(cpus_read_trylock);
498 :
499 : void cpus_read_unlock(void)
500 : {
501 : percpu_up_read(&cpu_hotplug_lock);
502 : }
503 : EXPORT_SYMBOL_GPL(cpus_read_unlock);
504 :
505 : void cpus_write_lock(void)
506 : {
507 : percpu_down_write(&cpu_hotplug_lock);
508 : }
509 :
510 : void cpus_write_unlock(void)
511 : {
512 : percpu_up_write(&cpu_hotplug_lock);
513 : }
514 :
515 : void lockdep_assert_cpus_held(void)
516 : {
517 : /*
518 : * We can't have hotplug operations before userspace starts running,
519 : * and some init codepaths will knowingly not take the hotplug lock.
520 : * This is all valid, so mute lockdep until it makes sense to report
521 : * unheld locks.
522 : */
523 : if (system_state < SYSTEM_RUNNING)
524 : return;
525 :
526 : percpu_rwsem_assert_held(&cpu_hotplug_lock);
527 : }
528 :
529 : #ifdef CONFIG_LOCKDEP
530 : int lockdep_is_cpus_held(void)
531 : {
532 : return percpu_rwsem_is_held(&cpu_hotplug_lock);
533 : }
534 : #endif
535 :
536 : static void lockdep_acquire_cpus_lock(void)
537 : {
538 : rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
539 : }
540 :
541 : static void lockdep_release_cpus_lock(void)
542 : {
543 : rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
544 : }
545 :
546 : /*
547 : * Wait for currently running CPU hotplug operations to complete (if any) and
548 : * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
549 : * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
550 : * hotplug path before performing hotplug operations. So acquiring that lock
551 : * guarantees mutual exclusion from any currently running hotplug operations.
552 : */
553 : void cpu_hotplug_disable(void)
554 : {
555 : cpu_maps_update_begin();
556 : cpu_hotplug_disabled++;
557 : cpu_maps_update_done();
558 : }
559 : EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
560 :
561 : static void __cpu_hotplug_enable(void)
562 : {
563 : if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
564 : return;
565 : cpu_hotplug_disabled--;
566 : }
567 :
568 : void cpu_hotplug_enable(void)
569 : {
570 : cpu_maps_update_begin();
571 : __cpu_hotplug_enable();
572 : cpu_maps_update_done();
573 : }
574 : EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
575 :
576 : #else
577 :
578 : static void lockdep_acquire_cpus_lock(void)
579 : {
580 : }
581 :
582 : static void lockdep_release_cpus_lock(void)
583 : {
584 : }
585 :
586 : #endif /* CONFIG_HOTPLUG_CPU */
587 :
588 : /*
589 : * Architectures that need SMT-specific errata handling during SMT hotplug
590 : * should override this.
591 : */
592 : void __weak arch_smt_update(void) { }
593 :
594 : #ifdef CONFIG_HOTPLUG_SMT
595 : enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
596 :
597 : void __init cpu_smt_disable(bool force)
598 : {
599 : if (!cpu_smt_possible())
600 : return;
601 :
602 : if (force) {
603 : pr_info("SMT: Force disabled\n");
604 : cpu_smt_control = CPU_SMT_FORCE_DISABLED;
605 : } else {
606 : pr_info("SMT: disabled\n");
607 : cpu_smt_control = CPU_SMT_DISABLED;
608 : }
609 : }
610 :
611 : /*
612 : * The decision whether SMT is supported can only be done after the full
613 : * CPU identification. Called from architecture code.
614 : */
615 : void __init cpu_smt_check_topology(void)
616 : {
617 : if (!topology_smt_supported())
618 : cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
619 : }
620 :
621 : static int __init smt_cmdline_disable(char *str)
622 : {
623 : cpu_smt_disable(str && !strcmp(str, "force"));
624 : return 0;
625 : }
626 : early_param("nosmt", smt_cmdline_disable);
627 :
628 : static inline bool cpu_smt_allowed(unsigned int cpu)
629 : {
630 : if (cpu_smt_control == CPU_SMT_ENABLED)
631 : return true;
632 :
633 : if (topology_is_primary_thread(cpu))
634 : return true;
635 :
636 : /*
637 : * On x86 it's required to boot all logical CPUs at least once so
638 : * that the init code can get a chance to set CR4.MCE on each
639 : * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
640 : * core will shutdown the machine.
641 : */
642 : return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
643 : }
644 :
645 : /* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
646 : bool cpu_smt_possible(void)
647 : {
648 : return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
649 : cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
650 : }
651 : EXPORT_SYMBOL_GPL(cpu_smt_possible);
652 :
653 : static inline bool cpuhp_smt_aware(void)
654 : {
655 : return topology_smt_supported();
656 : }
657 :
658 : static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
659 : {
660 : return cpu_primary_thread_mask;
661 : }
662 : #else
663 : static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
664 : static inline bool cpuhp_smt_aware(void) { return false; }
665 : static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
666 : {
667 : return cpu_present_mask;
668 : }
669 : #endif
670 :
671 : static inline enum cpuhp_state
672 : cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
673 : {
674 : enum cpuhp_state prev_state = st->state;
675 : bool bringup = st->state < target;
676 :
677 : st->rollback = false;
678 : st->last = NULL;
679 :
680 : st->target = target;
681 : st->single = false;
682 : st->bringup = bringup;
683 : if (cpu_dying(cpu) != !bringup)
684 : set_cpu_dying(cpu, !bringup);
685 :
686 : return prev_state;
687 : }
688 :
689 : static inline void
690 : cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
691 : enum cpuhp_state prev_state)
692 : {
693 : bool bringup = !st->bringup;
694 :
695 : st->target = prev_state;
696 :
697 : /*
698 : * Already rolling back. No need invert the bringup value or to change
699 : * the current state.
700 : */
701 : if (st->rollback)
702 : return;
703 :
704 : st->rollback = true;
705 :
706 : /*
707 : * If we have st->last we need to undo partial multi_instance of this
708 : * state first. Otherwise start undo at the previous state.
709 : */
710 : if (!st->last) {
711 : if (st->bringup)
712 : st->state--;
713 : else
714 : st->state++;
715 : }
716 :
717 : st->bringup = bringup;
718 : if (cpu_dying(cpu) != !bringup)
719 : set_cpu_dying(cpu, !bringup);
720 : }
721 :
722 : /* Regular hotplug invocation of the AP hotplug thread */
723 : static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
724 : {
725 : if (!st->single && st->state == st->target)
726 : return;
727 :
728 : st->result = 0;
729 : /*
730 : * Make sure the above stores are visible before should_run becomes
731 : * true. Paired with the mb() above in cpuhp_thread_fun()
732 : */
733 : smp_mb();
734 : st->should_run = true;
735 : wake_up_process(st->thread);
736 : wait_for_ap_thread(st, st->bringup);
737 : }
738 :
739 : static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
740 : enum cpuhp_state target)
741 : {
742 : enum cpuhp_state prev_state;
743 : int ret;
744 :
745 : prev_state = cpuhp_set_state(cpu, st, target);
746 : __cpuhp_kick_ap(st);
747 : if ((ret = st->result)) {
748 : cpuhp_reset_state(cpu, st, prev_state);
749 : __cpuhp_kick_ap(st);
750 : }
751 :
752 : return ret;
753 : }
754 :
755 : static int bringup_wait_for_ap_online(unsigned int cpu)
756 : {
757 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
758 :
759 : /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
760 : wait_for_ap_thread(st, true);
761 : if (WARN_ON_ONCE((!cpu_online(cpu))))
762 : return -ECANCELED;
763 :
764 : /* Unpark the hotplug thread of the target cpu */
765 : kthread_unpark(st->thread);
766 :
767 : /*
768 : * SMT soft disabling on X86 requires to bring the CPU out of the
769 : * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
770 : * CPU marked itself as booted_once in notify_cpu_starting() so the
771 : * cpu_smt_allowed() check will now return false if this is not the
772 : * primary sibling.
773 : */
774 : if (!cpu_smt_allowed(cpu))
775 : return -ECANCELED;
776 : return 0;
777 : }
778 :
779 : #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
780 : static int cpuhp_kick_ap_alive(unsigned int cpu)
781 : {
782 : if (!cpuhp_can_boot_ap(cpu))
783 : return -EAGAIN;
784 :
785 : return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
786 : }
787 :
788 : static int cpuhp_bringup_ap(unsigned int cpu)
789 : {
790 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
791 : int ret;
792 :
793 : /*
794 : * Some architectures have to walk the irq descriptors to
795 : * setup the vector space for the cpu which comes online.
796 : * Prevent irq alloc/free across the bringup.
797 : */
798 : irq_lock_sparse();
799 :
800 : ret = cpuhp_bp_sync_alive(cpu);
801 : if (ret)
802 : goto out_unlock;
803 :
804 : ret = bringup_wait_for_ap_online(cpu);
805 : if (ret)
806 : goto out_unlock;
807 :
808 : irq_unlock_sparse();
809 :
810 : if (st->target <= CPUHP_AP_ONLINE_IDLE)
811 : return 0;
812 :
813 : return cpuhp_kick_ap(cpu, st, st->target);
814 :
815 : out_unlock:
816 : irq_unlock_sparse();
817 : return ret;
818 : }
819 : #else
820 : static int bringup_cpu(unsigned int cpu)
821 : {
822 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
823 : struct task_struct *idle = idle_thread_get(cpu);
824 : int ret;
825 :
826 : if (!cpuhp_can_boot_ap(cpu))
827 : return -EAGAIN;
828 :
829 : /*
830 : * Some architectures have to walk the irq descriptors to
831 : * setup the vector space for the cpu which comes online.
832 : *
833 : * Prevent irq alloc/free across the bringup by acquiring the
834 : * sparse irq lock. Hold it until the upcoming CPU completes the
835 : * startup in cpuhp_online_idle() which allows to avoid
836 : * intermediate synchronization points in the architecture code.
837 : */
838 : irq_lock_sparse();
839 :
840 : ret = __cpu_up(cpu, idle);
841 : if (ret)
842 : goto out_unlock;
843 :
844 : ret = cpuhp_bp_sync_alive(cpu);
845 : if (ret)
846 : goto out_unlock;
847 :
848 : ret = bringup_wait_for_ap_online(cpu);
849 : if (ret)
850 : goto out_unlock;
851 :
852 : irq_unlock_sparse();
853 :
854 : if (st->target <= CPUHP_AP_ONLINE_IDLE)
855 : return 0;
856 :
857 : return cpuhp_kick_ap(cpu, st, st->target);
858 :
859 : out_unlock:
860 : irq_unlock_sparse();
861 : return ret;
862 : }
863 : #endif
864 :
865 : static int finish_cpu(unsigned int cpu)
866 : {
867 : struct task_struct *idle = idle_thread_get(cpu);
868 : struct mm_struct *mm = idle->active_mm;
869 :
870 : /*
871 : * idle_task_exit() will have switched to &init_mm, now
872 : * clean up any remaining active_mm state.
873 : */
874 : if (mm != &init_mm)
875 : idle->active_mm = &init_mm;
876 : mmdrop_lazy_tlb(mm);
877 : return 0;
878 : }
879 :
880 : /*
881 : * Hotplug state machine related functions
882 : */
883 :
884 : /*
885 : * Get the next state to run. Empty ones will be skipped. Returns true if a
886 : * state must be run.
887 : *
888 : * st->state will be modified ahead of time, to match state_to_run, as if it
889 : * has already ran.
890 : */
891 : static bool cpuhp_next_state(bool bringup,
892 : enum cpuhp_state *state_to_run,
893 : struct cpuhp_cpu_state *st,
894 : enum cpuhp_state target)
895 : {
896 : do {
897 : if (bringup) {
898 : if (st->state >= target)
899 : return false;
900 :
901 : *state_to_run = ++st->state;
902 : } else {
903 : if (st->state <= target)
904 : return false;
905 :
906 : *state_to_run = st->state--;
907 : }
908 :
909 : if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
910 : break;
911 : } while (true);
912 :
913 : return true;
914 : }
915 :
916 : static int __cpuhp_invoke_callback_range(bool bringup,
917 : unsigned int cpu,
918 : struct cpuhp_cpu_state *st,
919 : enum cpuhp_state target,
920 : bool nofail)
921 : {
922 : enum cpuhp_state state;
923 : int ret = 0;
924 :
925 : while (cpuhp_next_state(bringup, &state, st, target)) {
926 : int err;
927 :
928 : err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
929 : if (!err)
930 : continue;
931 :
932 : if (nofail) {
933 : pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
934 : cpu, bringup ? "UP" : "DOWN",
935 : cpuhp_get_step(st->state)->name,
936 : st->state, err);
937 : ret = -1;
938 : } else {
939 : ret = err;
940 : break;
941 : }
942 : }
943 :
944 : return ret;
945 : }
946 :
947 : static inline int cpuhp_invoke_callback_range(bool bringup,
948 : unsigned int cpu,
949 : struct cpuhp_cpu_state *st,
950 : enum cpuhp_state target)
951 : {
952 : return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
953 : }
954 :
955 : static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
956 : unsigned int cpu,
957 : struct cpuhp_cpu_state *st,
958 : enum cpuhp_state target)
959 : {
960 : __cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
961 : }
962 :
963 : static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
964 : {
965 : if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
966 : return true;
967 : /*
968 : * When CPU hotplug is disabled, then taking the CPU down is not
969 : * possible because takedown_cpu() and the architecture and
970 : * subsystem specific mechanisms are not available. So the CPU
971 : * which would be completely unplugged again needs to stay around
972 : * in the current state.
973 : */
974 : return st->state <= CPUHP_BRINGUP_CPU;
975 : }
976 :
977 : static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
978 : enum cpuhp_state target)
979 : {
980 : enum cpuhp_state prev_state = st->state;
981 : int ret = 0;
982 :
983 : ret = cpuhp_invoke_callback_range(true, cpu, st, target);
984 : if (ret) {
985 : pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
986 : ret, cpu, cpuhp_get_step(st->state)->name,
987 : st->state);
988 :
989 : cpuhp_reset_state(cpu, st, prev_state);
990 : if (can_rollback_cpu(st))
991 : WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
992 : prev_state));
993 : }
994 : return ret;
995 : }
996 :
997 : /*
998 : * The cpu hotplug threads manage the bringup and teardown of the cpus
999 : */
1000 : static int cpuhp_should_run(unsigned int cpu)
1001 : {
1002 : struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1003 :
1004 : return st->should_run;
1005 : }
1006 :
1007 : /*
1008 : * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
1009 : * callbacks when a state gets [un]installed at runtime.
1010 : *
1011 : * Each invocation of this function by the smpboot thread does a single AP
1012 : * state callback.
1013 : *
1014 : * It has 3 modes of operation:
1015 : * - single: runs st->cb_state
1016 : * - up: runs ++st->state, while st->state < st->target
1017 : * - down: runs st->state--, while st->state > st->target
1018 : *
1019 : * When complete or on error, should_run is cleared and the completion is fired.
1020 : */
1021 : static void cpuhp_thread_fun(unsigned int cpu)
1022 : {
1023 : struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1024 : bool bringup = st->bringup;
1025 : enum cpuhp_state state;
1026 :
1027 : if (WARN_ON_ONCE(!st->should_run))
1028 : return;
1029 :
1030 : /*
1031 : * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
1032 : * that if we see ->should_run we also see the rest of the state.
1033 : */
1034 : smp_mb();
1035 :
1036 : /*
1037 : * The BP holds the hotplug lock, but we're now running on the AP,
1038 : * ensure that anybody asserting the lock is held, will actually find
1039 : * it so.
1040 : */
1041 : lockdep_acquire_cpus_lock();
1042 : cpuhp_lock_acquire(bringup);
1043 :
1044 : if (st->single) {
1045 : state = st->cb_state;
1046 : st->should_run = false;
1047 : } else {
1048 : st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
1049 : if (!st->should_run)
1050 : goto end;
1051 : }
1052 :
1053 : WARN_ON_ONCE(!cpuhp_is_ap_state(state));
1054 :
1055 : if (cpuhp_is_atomic_state(state)) {
1056 : local_irq_disable();
1057 : st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
1058 : local_irq_enable();
1059 :
1060 : /*
1061 : * STARTING/DYING must not fail!
1062 : */
1063 : WARN_ON_ONCE(st->result);
1064 : } else {
1065 : st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
1066 : }
1067 :
1068 : if (st->result) {
1069 : /*
1070 : * If we fail on a rollback, we're up a creek without no
1071 : * paddle, no way forward, no way back. We loose, thanks for
1072 : * playing.
1073 : */
1074 : WARN_ON_ONCE(st->rollback);
1075 : st->should_run = false;
1076 : }
1077 :
1078 : end:
1079 : cpuhp_lock_release(bringup);
1080 : lockdep_release_cpus_lock();
1081 :
1082 : if (!st->should_run)
1083 : complete_ap_thread(st, bringup);
1084 : }
1085 :
1086 : /* Invoke a single callback on a remote cpu */
1087 : static int
1088 : cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
1089 : struct hlist_node *node)
1090 : {
1091 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1092 : int ret;
1093 :
1094 : if (!cpu_online(cpu))
1095 : return 0;
1096 :
1097 : cpuhp_lock_acquire(false);
1098 : cpuhp_lock_release(false);
1099 :
1100 : cpuhp_lock_acquire(true);
1101 : cpuhp_lock_release(true);
1102 :
1103 : /*
1104 : * If we are up and running, use the hotplug thread. For early calls
1105 : * we invoke the thread function directly.
1106 : */
1107 : if (!st->thread)
1108 : return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1109 :
1110 : st->rollback = false;
1111 : st->last = NULL;
1112 :
1113 : st->node = node;
1114 : st->bringup = bringup;
1115 : st->cb_state = state;
1116 : st->single = true;
1117 :
1118 : __cpuhp_kick_ap(st);
1119 :
1120 : /*
1121 : * If we failed and did a partial, do a rollback.
1122 : */
1123 : if ((ret = st->result) && st->last) {
1124 : st->rollback = true;
1125 : st->bringup = !bringup;
1126 :
1127 : __cpuhp_kick_ap(st);
1128 : }
1129 :
1130 : /*
1131 : * Clean up the leftovers so the next hotplug operation wont use stale
1132 : * data.
1133 : */
1134 : st->node = st->last = NULL;
1135 : return ret;
1136 : }
1137 :
1138 : static int cpuhp_kick_ap_work(unsigned int cpu)
1139 : {
1140 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1141 : enum cpuhp_state prev_state = st->state;
1142 : int ret;
1143 :
1144 : cpuhp_lock_acquire(false);
1145 : cpuhp_lock_release(false);
1146 :
1147 : cpuhp_lock_acquire(true);
1148 : cpuhp_lock_release(true);
1149 :
1150 : trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
1151 : ret = cpuhp_kick_ap(cpu, st, st->target);
1152 : trace_cpuhp_exit(cpu, st->state, prev_state, ret);
1153 :
1154 : return ret;
1155 : }
1156 :
1157 : static struct smp_hotplug_thread cpuhp_threads = {
1158 : .store = &cpuhp_state.thread,
1159 : .thread_should_run = cpuhp_should_run,
1160 : .thread_fn = cpuhp_thread_fun,
1161 : .thread_comm = "cpuhp/%u",
1162 : .selfparking = true,
1163 : };
1164 :
1165 : static __init void cpuhp_init_state(void)
1166 : {
1167 : struct cpuhp_cpu_state *st;
1168 : int cpu;
1169 :
1170 : for_each_possible_cpu(cpu) {
1171 : st = per_cpu_ptr(&cpuhp_state, cpu);
1172 : init_completion(&st->done_up);
1173 : init_completion(&st->done_down);
1174 : }
1175 : }
1176 :
1177 : void __init cpuhp_threads_init(void)
1178 : {
1179 : cpuhp_init_state();
1180 : BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
1181 : kthread_unpark(this_cpu_read(cpuhp_state.thread));
1182 : }
1183 :
1184 : /*
1185 : *
1186 : * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
1187 : * protected region.
1188 : *
1189 : * The operation is still serialized against concurrent CPU hotplug via
1190 : * cpu_add_remove_lock, i.e. CPU map protection. But it is _not_
1191 : * serialized against other hotplug related activity like adding or
1192 : * removing of state callbacks and state instances, which invoke either the
1193 : * startup or the teardown callback of the affected state.
1194 : *
1195 : * This is required for subsystems which are unfixable vs. CPU hotplug and
1196 : * evade lock inversion problems by scheduling work which has to be
1197 : * completed _before_ cpu_up()/_cpu_down() returns.
1198 : *
1199 : * Don't even think about adding anything to this for any new code or even
1200 : * drivers. It's only purpose is to keep existing lock order trainwrecks
1201 : * working.
1202 : *
1203 : * For cpu_down() there might be valid reasons to finish cleanups which are
1204 : * not required to be done under cpu_hotplug_lock, but that's a different
1205 : * story and would be not invoked via this.
1206 : */
1207 : static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
1208 : {
1209 : /*
1210 : * cpusets delegate hotplug operations to a worker to "solve" the
1211 : * lock order problems. Wait for the worker, but only if tasks are
1212 : * _not_ frozen (suspend, hibernate) as that would wait forever.
1213 : *
1214 : * The wait is required because otherwise the hotplug operation
1215 : * returns with inconsistent state, which could even be observed in
1216 : * user space when a new CPU is brought up. The CPU plug uevent
1217 : * would be delivered and user space reacting on it would fail to
1218 : * move tasks to the newly plugged CPU up to the point where the
1219 : * work has finished because up to that point the newly plugged CPU
1220 : * is not assignable in cpusets/cgroups. On unplug that's not
1221 : * necessarily a visible issue, but it is still inconsistent state,
1222 : * which is the real problem which needs to be "fixed". This can't
1223 : * prevent the transient state between scheduling the work and
1224 : * returning from waiting for it.
1225 : */
1226 : if (!tasks_frozen)
1227 : cpuset_wait_for_hotplug();
1228 : }
1229 :
1230 : #ifdef CONFIG_HOTPLUG_CPU
1231 : #ifndef arch_clear_mm_cpumask_cpu
1232 : #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
1233 : #endif
1234 :
1235 : /**
1236 : * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
1237 : * @cpu: a CPU id
1238 : *
1239 : * This function walks all processes, finds a valid mm struct for each one and
1240 : * then clears a corresponding bit in mm's cpumask. While this all sounds
1241 : * trivial, there are various non-obvious corner cases, which this function
1242 : * tries to solve in a safe manner.
1243 : *
1244 : * Also note that the function uses a somewhat relaxed locking scheme, so it may
1245 : * be called only for an already offlined CPU.
1246 : */
1247 : void clear_tasks_mm_cpumask(int cpu)
1248 : {
1249 : struct task_struct *p;
1250 :
1251 : /*
1252 : * This function is called after the cpu is taken down and marked
1253 : * offline, so its not like new tasks will ever get this cpu set in
1254 : * their mm mask. -- Peter Zijlstra
1255 : * Thus, we may use rcu_read_lock() here, instead of grabbing
1256 : * full-fledged tasklist_lock.
1257 : */
1258 : WARN_ON(cpu_online(cpu));
1259 : rcu_read_lock();
1260 : for_each_process(p) {
1261 : struct task_struct *t;
1262 :
1263 : /*
1264 : * Main thread might exit, but other threads may still have
1265 : * a valid mm. Find one.
1266 : */
1267 : t = find_lock_task_mm(p);
1268 : if (!t)
1269 : continue;
1270 : arch_clear_mm_cpumask_cpu(cpu, t->mm);
1271 : task_unlock(t);
1272 : }
1273 : rcu_read_unlock();
1274 : }
1275 :
1276 : /* Take this CPU down. */
1277 : static int take_cpu_down(void *_param)
1278 : {
1279 : struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1280 : enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
1281 : int err, cpu = smp_processor_id();
1282 :
1283 : /* Ensure this CPU doesn't handle any more interrupts. */
1284 : err = __cpu_disable();
1285 : if (err < 0)
1286 : return err;
1287 :
1288 : /*
1289 : * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
1290 : * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
1291 : */
1292 : WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
1293 :
1294 : /*
1295 : * Invoke the former CPU_DYING callbacks. DYING must not fail!
1296 : */
1297 : cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
1298 :
1299 : /* Give up timekeeping duties */
1300 : tick_handover_do_timer();
1301 : /* Remove CPU from timer broadcasting */
1302 : tick_offline_cpu(cpu);
1303 : /* Park the stopper thread */
1304 : stop_machine_park(cpu);
1305 : return 0;
1306 : }
1307 :
1308 : static int takedown_cpu(unsigned int cpu)
1309 : {
1310 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1311 : int err;
1312 :
1313 : /* Park the smpboot threads */
1314 : kthread_park(st->thread);
1315 :
1316 : /*
1317 : * Prevent irq alloc/free while the dying cpu reorganizes the
1318 : * interrupt affinities.
1319 : */
1320 : irq_lock_sparse();
1321 :
1322 : /*
1323 : * So now all preempt/rcu users must observe !cpu_active().
1324 : */
1325 : err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
1326 : if (err) {
1327 : /* CPU refused to die */
1328 : irq_unlock_sparse();
1329 : /* Unpark the hotplug thread so we can rollback there */
1330 : kthread_unpark(st->thread);
1331 : return err;
1332 : }
1333 : BUG_ON(cpu_online(cpu));
1334 :
1335 : /*
1336 : * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1337 : * all runnable tasks from the CPU, there's only the idle task left now
1338 : * that the migration thread is done doing the stop_machine thing.
1339 : *
1340 : * Wait for the stop thread to go away.
1341 : */
1342 : wait_for_ap_thread(st, false);
1343 : BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1344 :
1345 : /* Interrupts are moved away from the dying cpu, reenable alloc/free */
1346 : irq_unlock_sparse();
1347 :
1348 : hotplug_cpu__broadcast_tick_pull(cpu);
1349 : /* This actually kills the CPU. */
1350 : __cpu_die(cpu);
1351 :
1352 : cpuhp_bp_sync_dead(cpu);
1353 :
1354 : tick_cleanup_dead_cpu(cpu);
1355 : rcutree_migrate_callbacks(cpu);
1356 : return 0;
1357 : }
1358 :
1359 : static void cpuhp_complete_idle_dead(void *arg)
1360 : {
1361 : struct cpuhp_cpu_state *st = arg;
1362 :
1363 : complete_ap_thread(st, false);
1364 : }
1365 :
1366 : void cpuhp_report_idle_dead(void)
1367 : {
1368 : struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1369 :
1370 : BUG_ON(st->state != CPUHP_AP_OFFLINE);
1371 : rcu_report_dead(smp_processor_id());
1372 : st->state = CPUHP_AP_IDLE_DEAD;
1373 : /*
1374 : * We cannot call complete after rcu_report_dead() so we delegate it
1375 : * to an online cpu.
1376 : */
1377 : smp_call_function_single(cpumask_first(cpu_online_mask),
1378 : cpuhp_complete_idle_dead, st, 0);
1379 : }
1380 :
1381 : static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
1382 : enum cpuhp_state target)
1383 : {
1384 : enum cpuhp_state prev_state = st->state;
1385 : int ret = 0;
1386 :
1387 : ret = cpuhp_invoke_callback_range(false, cpu, st, target);
1388 : if (ret) {
1389 : pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
1390 : ret, cpu, cpuhp_get_step(st->state)->name,
1391 : st->state);
1392 :
1393 : cpuhp_reset_state(cpu, st, prev_state);
1394 :
1395 : if (st->state < prev_state)
1396 : WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
1397 : prev_state));
1398 : }
1399 :
1400 : return ret;
1401 : }
1402 :
1403 : /* Requires cpu_add_remove_lock to be held */
1404 : static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
1405 : enum cpuhp_state target)
1406 : {
1407 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1408 : int prev_state, ret = 0;
1409 :
1410 : if (num_online_cpus() == 1)
1411 : return -EBUSY;
1412 :
1413 : if (!cpu_present(cpu))
1414 : return -EINVAL;
1415 :
1416 : cpus_write_lock();
1417 :
1418 : cpuhp_tasks_frozen = tasks_frozen;
1419 :
1420 : prev_state = cpuhp_set_state(cpu, st, target);
1421 : /*
1422 : * If the current CPU state is in the range of the AP hotplug thread,
1423 : * then we need to kick the thread.
1424 : */
1425 : if (st->state > CPUHP_TEARDOWN_CPU) {
1426 : st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1427 : ret = cpuhp_kick_ap_work(cpu);
1428 : /*
1429 : * The AP side has done the error rollback already. Just
1430 : * return the error code..
1431 : */
1432 : if (ret)
1433 : goto out;
1434 :
1435 : /*
1436 : * We might have stopped still in the range of the AP hotplug
1437 : * thread. Nothing to do anymore.
1438 : */
1439 : if (st->state > CPUHP_TEARDOWN_CPU)
1440 : goto out;
1441 :
1442 : st->target = target;
1443 : }
1444 : /*
1445 : * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1446 : * to do the further cleanups.
1447 : */
1448 : ret = cpuhp_down_callbacks(cpu, st, target);
1449 : if (ret && st->state < prev_state) {
1450 : if (st->state == CPUHP_TEARDOWN_CPU) {
1451 : cpuhp_reset_state(cpu, st, prev_state);
1452 : __cpuhp_kick_ap(st);
1453 : } else {
1454 : WARN(1, "DEAD callback error for CPU%d", cpu);
1455 : }
1456 : }
1457 :
1458 : out:
1459 : cpus_write_unlock();
1460 : /*
1461 : * Do post unplug cleanup. This is still protected against
1462 : * concurrent CPU hotplug via cpu_add_remove_lock.
1463 : */
1464 : lockup_detector_cleanup();
1465 : arch_smt_update();
1466 : cpu_up_down_serialize_trainwrecks(tasks_frozen);
1467 : return ret;
1468 : }
1469 :
1470 : static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1471 : {
1472 : /*
1473 : * If the platform does not support hotplug, report it explicitly to
1474 : * differentiate it from a transient offlining failure.
1475 : */
1476 : if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
1477 : return -EOPNOTSUPP;
1478 : if (cpu_hotplug_disabled)
1479 : return -EBUSY;
1480 : return _cpu_down(cpu, 0, target);
1481 : }
1482 :
1483 : static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1484 : {
1485 : int err;
1486 :
1487 : cpu_maps_update_begin();
1488 : err = cpu_down_maps_locked(cpu, target);
1489 : cpu_maps_update_done();
1490 : return err;
1491 : }
1492 :
1493 : /**
1494 : * cpu_device_down - Bring down a cpu device
1495 : * @dev: Pointer to the cpu device to offline
1496 : *
1497 : * This function is meant to be used by device core cpu subsystem only.
1498 : *
1499 : * Other subsystems should use remove_cpu() instead.
1500 : *
1501 : * Return: %0 on success or a negative errno code
1502 : */
1503 : int cpu_device_down(struct device *dev)
1504 : {
1505 : return cpu_down(dev->id, CPUHP_OFFLINE);
1506 : }
1507 :
1508 : int remove_cpu(unsigned int cpu)
1509 : {
1510 : int ret;
1511 :
1512 : lock_device_hotplug();
1513 : ret = device_offline(get_cpu_device(cpu));
1514 : unlock_device_hotplug();
1515 :
1516 : return ret;
1517 : }
1518 : EXPORT_SYMBOL_GPL(remove_cpu);
1519 :
1520 : void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1521 : {
1522 : unsigned int cpu;
1523 : int error;
1524 :
1525 : cpu_maps_update_begin();
1526 :
1527 : /*
1528 : * Make certain the cpu I'm about to reboot on is online.
1529 : *
1530 : * This is inline to what migrate_to_reboot_cpu() already do.
1531 : */
1532 : if (!cpu_online(primary_cpu))
1533 : primary_cpu = cpumask_first(cpu_online_mask);
1534 :
1535 : for_each_online_cpu(cpu) {
1536 : if (cpu == primary_cpu)
1537 : continue;
1538 :
1539 : error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1540 : if (error) {
1541 : pr_err("Failed to offline CPU%d - error=%d",
1542 : cpu, error);
1543 : break;
1544 : }
1545 : }
1546 :
1547 : /*
1548 : * Ensure all but the reboot CPU are offline.
1549 : */
1550 : BUG_ON(num_online_cpus() > 1);
1551 :
1552 : /*
1553 : * Make sure the CPUs won't be enabled by someone else after this
1554 : * point. Kexec will reboot to a new kernel shortly resetting
1555 : * everything along the way.
1556 : */
1557 : cpu_hotplug_disabled++;
1558 :
1559 : cpu_maps_update_done();
1560 : }
1561 :
1562 : #else
1563 : #define takedown_cpu NULL
1564 : #endif /*CONFIG_HOTPLUG_CPU*/
1565 :
1566 : /**
1567 : * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1568 : * @cpu: cpu that just started
1569 : *
1570 : * It must be called by the arch code on the new cpu, before the new cpu
1571 : * enables interrupts and before the "boot" cpu returns from __cpu_up().
1572 : */
1573 : void notify_cpu_starting(unsigned int cpu)
1574 : {
1575 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1576 : enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1577 :
1578 : rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1579 : cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1580 :
1581 : /*
1582 : * STARTING must not fail!
1583 : */
1584 : cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
1585 : }
1586 :
1587 : /*
1588 : * Called from the idle task. Wake up the controlling task which brings the
1589 : * hotplug thread of the upcoming CPU up and then delegates the rest of the
1590 : * online bringup to the hotplug thread.
1591 : */
1592 : void cpuhp_online_idle(enum cpuhp_state state)
1593 : {
1594 : struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1595 :
1596 : /* Happens for the boot cpu */
1597 : if (state != CPUHP_AP_ONLINE_IDLE)
1598 : return;
1599 :
1600 : cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
1601 :
1602 : /*
1603 : * Unpark the stopper thread before we start the idle loop (and start
1604 : * scheduling); this ensures the stopper task is always available.
1605 : */
1606 : stop_machine_unpark(smp_processor_id());
1607 :
1608 : st->state = CPUHP_AP_ONLINE_IDLE;
1609 : complete_ap_thread(st, true);
1610 : }
1611 :
1612 : /* Requires cpu_add_remove_lock to be held */
1613 : static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1614 : {
1615 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1616 : struct task_struct *idle;
1617 : int ret = 0;
1618 :
1619 : cpus_write_lock();
1620 :
1621 : if (!cpu_present(cpu)) {
1622 : ret = -EINVAL;
1623 : goto out;
1624 : }
1625 :
1626 : /*
1627 : * The caller of cpu_up() might have raced with another
1628 : * caller. Nothing to do.
1629 : */
1630 : if (st->state >= target)
1631 : goto out;
1632 :
1633 : if (st->state == CPUHP_OFFLINE) {
1634 : /* Let it fail before we try to bring the cpu up */
1635 : idle = idle_thread_get(cpu);
1636 : if (IS_ERR(idle)) {
1637 : ret = PTR_ERR(idle);
1638 : goto out;
1639 : }
1640 :
1641 : /*
1642 : * Reset stale stack state from the last time this CPU was online.
1643 : */
1644 : scs_task_reset(idle);
1645 : kasan_unpoison_task_stack(idle);
1646 : }
1647 :
1648 : cpuhp_tasks_frozen = tasks_frozen;
1649 :
1650 : cpuhp_set_state(cpu, st, target);
1651 : /*
1652 : * If the current CPU state is in the range of the AP hotplug thread,
1653 : * then we need to kick the thread once more.
1654 : */
1655 : if (st->state > CPUHP_BRINGUP_CPU) {
1656 : ret = cpuhp_kick_ap_work(cpu);
1657 : /*
1658 : * The AP side has done the error rollback already. Just
1659 : * return the error code..
1660 : */
1661 : if (ret)
1662 : goto out;
1663 : }
1664 :
1665 : /*
1666 : * Try to reach the target state. We max out on the BP at
1667 : * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1668 : * responsible for bringing it up to the target state.
1669 : */
1670 : target = min((int)target, CPUHP_BRINGUP_CPU);
1671 : ret = cpuhp_up_callbacks(cpu, st, target);
1672 : out:
1673 : cpus_write_unlock();
1674 : arch_smt_update();
1675 : cpu_up_down_serialize_trainwrecks(tasks_frozen);
1676 : return ret;
1677 : }
1678 :
1679 : static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1680 : {
1681 : int err = 0;
1682 :
1683 : if (!cpu_possible(cpu)) {
1684 : pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
1685 : cpu);
1686 : #if defined(CONFIG_IA64)
1687 : pr_err("please check additional_cpus= boot parameter\n");
1688 : #endif
1689 : return -EINVAL;
1690 : }
1691 :
1692 : err = try_online_node(cpu_to_node(cpu));
1693 : if (err)
1694 : return err;
1695 :
1696 : cpu_maps_update_begin();
1697 :
1698 : if (cpu_hotplug_disabled) {
1699 : err = -EBUSY;
1700 : goto out;
1701 : }
1702 : if (!cpu_smt_allowed(cpu)) {
1703 : err = -EPERM;
1704 : goto out;
1705 : }
1706 :
1707 : err = _cpu_up(cpu, 0, target);
1708 : out:
1709 : cpu_maps_update_done();
1710 : return err;
1711 : }
1712 :
1713 : /**
1714 : * cpu_device_up - Bring up a cpu device
1715 : * @dev: Pointer to the cpu device to online
1716 : *
1717 : * This function is meant to be used by device core cpu subsystem only.
1718 : *
1719 : * Other subsystems should use add_cpu() instead.
1720 : *
1721 : * Return: %0 on success or a negative errno code
1722 : */
1723 : int cpu_device_up(struct device *dev)
1724 : {
1725 : return cpu_up(dev->id, CPUHP_ONLINE);
1726 : }
1727 :
1728 : int add_cpu(unsigned int cpu)
1729 : {
1730 : int ret;
1731 :
1732 : lock_device_hotplug();
1733 : ret = device_online(get_cpu_device(cpu));
1734 : unlock_device_hotplug();
1735 :
1736 : return ret;
1737 : }
1738 : EXPORT_SYMBOL_GPL(add_cpu);
1739 :
1740 : /**
1741 : * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1742 : * @sleep_cpu: The cpu we hibernated on and should be brought up.
1743 : *
1744 : * On some architectures like arm64, we can hibernate on any CPU, but on
1745 : * wake up the CPU we hibernated on might be offline as a side effect of
1746 : * using maxcpus= for example.
1747 : *
1748 : * Return: %0 on success or a negative errno code
1749 : */
1750 : int bringup_hibernate_cpu(unsigned int sleep_cpu)
1751 : {
1752 : int ret;
1753 :
1754 : if (!cpu_online(sleep_cpu)) {
1755 : pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1756 : ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
1757 : if (ret) {
1758 : pr_err("Failed to bring hibernate-CPU up!\n");
1759 : return ret;
1760 : }
1761 : }
1762 : return 0;
1763 : }
1764 :
1765 : static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
1766 : enum cpuhp_state target)
1767 : {
1768 : unsigned int cpu;
1769 :
1770 : for_each_cpu(cpu, mask) {
1771 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1772 :
1773 : if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
1774 : /*
1775 : * If this failed then cpu_up() might have only
1776 : * rolled back to CPUHP_BP_KICK_AP for the final
1777 : * online. Clean it up. NOOP if already rolled back.
1778 : */
1779 : WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
1780 : }
1781 :
1782 : if (!--ncpus)
1783 : break;
1784 : }
1785 : }
1786 :
1787 : #ifdef CONFIG_HOTPLUG_PARALLEL
1788 : static bool __cpuhp_parallel_bringup __ro_after_init = true;
1789 :
1790 : static int __init parallel_bringup_parse_param(char *arg)
1791 : {
1792 : return kstrtobool(arg, &__cpuhp_parallel_bringup);
1793 : }
1794 : early_param("cpuhp.parallel", parallel_bringup_parse_param);
1795 :
1796 : /*
1797 : * On architectures which have enabled parallel bringup this invokes all BP
1798 : * prepare states for each of the to be onlined APs first. The last state
1799 : * sends the startup IPI to the APs. The APs proceed through the low level
1800 : * bringup code in parallel and then wait for the control CPU to release
1801 : * them one by one for the final onlining procedure.
1802 : *
1803 : * This avoids waiting for each AP to respond to the startup IPI in
1804 : * CPUHP_BRINGUP_CPU.
1805 : */
1806 : static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
1807 : {
1808 : const struct cpumask *mask = cpu_present_mask;
1809 :
1810 : if (__cpuhp_parallel_bringup)
1811 : __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
1812 : if (!__cpuhp_parallel_bringup)
1813 : return false;
1814 :
1815 : if (cpuhp_smt_aware()) {
1816 : const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
1817 : static struct cpumask tmp_mask __initdata;
1818 :
1819 : /*
1820 : * X86 requires to prevent that SMT siblings stopped while
1821 : * the primary thread does a microcode update for various
1822 : * reasons. Bring the primary threads up first.
1823 : */
1824 : cpumask_and(&tmp_mask, mask, pmask);
1825 : cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
1826 : cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
1827 : /* Account for the online CPUs */
1828 : ncpus -= num_online_cpus();
1829 : if (!ncpus)
1830 : return true;
1831 : /* Create the mask for secondary CPUs */
1832 : cpumask_andnot(&tmp_mask, mask, pmask);
1833 : mask = &tmp_mask;
1834 : }
1835 :
1836 : /* Bring the not-yet started CPUs up */
1837 : cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
1838 : cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
1839 : return true;
1840 : }
1841 : #else
1842 : static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
1843 : #endif /* CONFIG_HOTPLUG_PARALLEL */
1844 :
1845 : void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
1846 : {
1847 : /* Try parallel bringup optimization if enabled */
1848 : if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
1849 : return;
1850 :
1851 : /* Full per CPU serialized bringup */
1852 : cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
1853 : }
1854 :
1855 : #ifdef CONFIG_PM_SLEEP_SMP
1856 : static cpumask_var_t frozen_cpus;
1857 :
1858 : int freeze_secondary_cpus(int primary)
1859 : {
1860 : int cpu, error = 0;
1861 :
1862 : cpu_maps_update_begin();
1863 : if (primary == -1) {
1864 : primary = cpumask_first(cpu_online_mask);
1865 : if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
1866 : primary = housekeeping_any_cpu(HK_TYPE_TIMER);
1867 : } else {
1868 : if (!cpu_online(primary))
1869 : primary = cpumask_first(cpu_online_mask);
1870 : }
1871 :
1872 : /*
1873 : * We take down all of the non-boot CPUs in one shot to avoid races
1874 : * with the userspace trying to use the CPU hotplug at the same time
1875 : */
1876 : cpumask_clear(frozen_cpus);
1877 :
1878 : pr_info("Disabling non-boot CPUs ...\n");
1879 : for_each_online_cpu(cpu) {
1880 : if (cpu == primary)
1881 : continue;
1882 :
1883 : if (pm_wakeup_pending()) {
1884 : pr_info("Wakeup pending. Abort CPU freeze\n");
1885 : error = -EBUSY;
1886 : break;
1887 : }
1888 :
1889 : trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1890 : error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1891 : trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1892 : if (!error)
1893 : cpumask_set_cpu(cpu, frozen_cpus);
1894 : else {
1895 : pr_err("Error taking CPU%d down: %d\n", cpu, error);
1896 : break;
1897 : }
1898 : }
1899 :
1900 : if (!error)
1901 : BUG_ON(num_online_cpus() > 1);
1902 : else
1903 : pr_err("Non-boot CPUs are not disabled\n");
1904 :
1905 : /*
1906 : * Make sure the CPUs won't be enabled by someone else. We need to do
1907 : * this even in case of failure as all freeze_secondary_cpus() users are
1908 : * supposed to do thaw_secondary_cpus() on the failure path.
1909 : */
1910 : cpu_hotplug_disabled++;
1911 :
1912 : cpu_maps_update_done();
1913 : return error;
1914 : }
1915 :
1916 : void __weak arch_thaw_secondary_cpus_begin(void)
1917 : {
1918 : }
1919 :
1920 : void __weak arch_thaw_secondary_cpus_end(void)
1921 : {
1922 : }
1923 :
1924 : void thaw_secondary_cpus(void)
1925 : {
1926 : int cpu, error;
1927 :
1928 : /* Allow everyone to use the CPU hotplug again */
1929 : cpu_maps_update_begin();
1930 : __cpu_hotplug_enable();
1931 : if (cpumask_empty(frozen_cpus))
1932 : goto out;
1933 :
1934 : pr_info("Enabling non-boot CPUs ...\n");
1935 :
1936 : arch_thaw_secondary_cpus_begin();
1937 :
1938 : for_each_cpu(cpu, frozen_cpus) {
1939 : trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1940 : error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1941 : trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1942 : if (!error) {
1943 : pr_info("CPU%d is up\n", cpu);
1944 : continue;
1945 : }
1946 : pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1947 : }
1948 :
1949 : arch_thaw_secondary_cpus_end();
1950 :
1951 : cpumask_clear(frozen_cpus);
1952 : out:
1953 : cpu_maps_update_done();
1954 : }
1955 :
1956 : static int __init alloc_frozen_cpus(void)
1957 : {
1958 : if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
1959 : return -ENOMEM;
1960 : return 0;
1961 : }
1962 : core_initcall(alloc_frozen_cpus);
1963 :
1964 : /*
1965 : * When callbacks for CPU hotplug notifications are being executed, we must
1966 : * ensure that the state of the system with respect to the tasks being frozen
1967 : * or not, as reported by the notification, remains unchanged *throughout the
1968 : * duration* of the execution of the callbacks.
1969 : * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1970 : *
1971 : * This synchronization is implemented by mutually excluding regular CPU
1972 : * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1973 : * Hibernate notifications.
1974 : */
1975 : static int
1976 : cpu_hotplug_pm_callback(struct notifier_block *nb,
1977 : unsigned long action, void *ptr)
1978 : {
1979 : switch (action) {
1980 :
1981 : case PM_SUSPEND_PREPARE:
1982 : case PM_HIBERNATION_PREPARE:
1983 : cpu_hotplug_disable();
1984 : break;
1985 :
1986 : case PM_POST_SUSPEND:
1987 : case PM_POST_HIBERNATION:
1988 : cpu_hotplug_enable();
1989 : break;
1990 :
1991 : default:
1992 : return NOTIFY_DONE;
1993 : }
1994 :
1995 : return NOTIFY_OK;
1996 : }
1997 :
1998 :
1999 : static int __init cpu_hotplug_pm_sync_init(void)
2000 : {
2001 : /*
2002 : * cpu_hotplug_pm_callback has higher priority than x86
2003 : * bsp_pm_callback which depends on cpu_hotplug_pm_callback
2004 : * to disable cpu hotplug to avoid cpu hotplug race.
2005 : */
2006 : pm_notifier(cpu_hotplug_pm_callback, 0);
2007 : return 0;
2008 : }
2009 : core_initcall(cpu_hotplug_pm_sync_init);
2010 :
2011 : #endif /* CONFIG_PM_SLEEP_SMP */
2012 :
2013 : int __boot_cpu_id;
2014 :
2015 : #endif /* CONFIG_SMP */
2016 :
2017 : /* Boot processor state steps */
2018 : static struct cpuhp_step cpuhp_hp_states[] = {
2019 : [CPUHP_OFFLINE] = {
2020 : .name = "offline",
2021 : .startup.single = NULL,
2022 : .teardown.single = NULL,
2023 : },
2024 : #ifdef CONFIG_SMP
2025 : [CPUHP_CREATE_THREADS]= {
2026 : .name = "threads:prepare",
2027 : .startup.single = smpboot_create_threads,
2028 : .teardown.single = NULL,
2029 : .cant_stop = true,
2030 : },
2031 : [CPUHP_PERF_PREPARE] = {
2032 : .name = "perf:prepare",
2033 : .startup.single = perf_event_init_cpu,
2034 : .teardown.single = perf_event_exit_cpu,
2035 : },
2036 : [CPUHP_RANDOM_PREPARE] = {
2037 : .name = "random:prepare",
2038 : .startup.single = random_prepare_cpu,
2039 : .teardown.single = NULL,
2040 : },
2041 : [CPUHP_WORKQUEUE_PREP] = {
2042 : .name = "workqueue:prepare",
2043 : .startup.single = workqueue_prepare_cpu,
2044 : .teardown.single = NULL,
2045 : },
2046 : [CPUHP_HRTIMERS_PREPARE] = {
2047 : .name = "hrtimers:prepare",
2048 : .startup.single = hrtimers_prepare_cpu,
2049 : .teardown.single = hrtimers_dead_cpu,
2050 : },
2051 : [CPUHP_SMPCFD_PREPARE] = {
2052 : .name = "smpcfd:prepare",
2053 : .startup.single = smpcfd_prepare_cpu,
2054 : .teardown.single = smpcfd_dead_cpu,
2055 : },
2056 : [CPUHP_RELAY_PREPARE] = {
2057 : .name = "relay:prepare",
2058 : .startup.single = relay_prepare_cpu,
2059 : .teardown.single = NULL,
2060 : },
2061 : [CPUHP_SLAB_PREPARE] = {
2062 : .name = "slab:prepare",
2063 : .startup.single = slab_prepare_cpu,
2064 : .teardown.single = slab_dead_cpu,
2065 : },
2066 : [CPUHP_RCUTREE_PREP] = {
2067 : .name = "RCU/tree:prepare",
2068 : .startup.single = rcutree_prepare_cpu,
2069 : .teardown.single = rcutree_dead_cpu,
2070 : },
2071 : /*
2072 : * On the tear-down path, timers_dead_cpu() must be invoked
2073 : * before blk_mq_queue_reinit_notify() from notify_dead(),
2074 : * otherwise a RCU stall occurs.
2075 : */
2076 : [CPUHP_TIMERS_PREPARE] = {
2077 : .name = "timers:prepare",
2078 : .startup.single = timers_prepare_cpu,
2079 : .teardown.single = timers_dead_cpu,
2080 : },
2081 :
2082 : #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
2083 : /*
2084 : * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
2085 : * the next step will release it.
2086 : */
2087 : [CPUHP_BP_KICK_AP] = {
2088 : .name = "cpu:kick_ap",
2089 : .startup.single = cpuhp_kick_ap_alive,
2090 : },
2091 :
2092 : /*
2093 : * Waits for the AP to reach cpuhp_ap_sync_alive() and then
2094 : * releases it for the complete bringup.
2095 : */
2096 : [CPUHP_BRINGUP_CPU] = {
2097 : .name = "cpu:bringup",
2098 : .startup.single = cpuhp_bringup_ap,
2099 : .teardown.single = finish_cpu,
2100 : .cant_stop = true,
2101 : },
2102 : #else
2103 : /*
2104 : * All-in-one CPU bringup state which includes the kick alive.
2105 : */
2106 : [CPUHP_BRINGUP_CPU] = {
2107 : .name = "cpu:bringup",
2108 : .startup.single = bringup_cpu,
2109 : .teardown.single = finish_cpu,
2110 : .cant_stop = true,
2111 : },
2112 : #endif
2113 : /* Final state before CPU kills itself */
2114 : [CPUHP_AP_IDLE_DEAD] = {
2115 : .name = "idle:dead",
2116 : },
2117 : /*
2118 : * Last state before CPU enters the idle loop to die. Transient state
2119 : * for synchronization.
2120 : */
2121 : [CPUHP_AP_OFFLINE] = {
2122 : .name = "ap:offline",
2123 : .cant_stop = true,
2124 : },
2125 : /* First state is scheduler control. Interrupts are disabled */
2126 : [CPUHP_AP_SCHED_STARTING] = {
2127 : .name = "sched:starting",
2128 : .startup.single = sched_cpu_starting,
2129 : .teardown.single = sched_cpu_dying,
2130 : },
2131 : [CPUHP_AP_RCUTREE_DYING] = {
2132 : .name = "RCU/tree:dying",
2133 : .startup.single = NULL,
2134 : .teardown.single = rcutree_dying_cpu,
2135 : },
2136 : [CPUHP_AP_SMPCFD_DYING] = {
2137 : .name = "smpcfd:dying",
2138 : .startup.single = NULL,
2139 : .teardown.single = smpcfd_dying_cpu,
2140 : },
2141 : /* Entry state on starting. Interrupts enabled from here on. Transient
2142 : * state for synchronsization */
2143 : [CPUHP_AP_ONLINE] = {
2144 : .name = "ap:online",
2145 : },
2146 : /*
2147 : * Handled on control processor until the plugged processor manages
2148 : * this itself.
2149 : */
2150 : [CPUHP_TEARDOWN_CPU] = {
2151 : .name = "cpu:teardown",
2152 : .startup.single = NULL,
2153 : .teardown.single = takedown_cpu,
2154 : .cant_stop = true,
2155 : },
2156 :
2157 : [CPUHP_AP_SCHED_WAIT_EMPTY] = {
2158 : .name = "sched:waitempty",
2159 : .startup.single = NULL,
2160 : .teardown.single = sched_cpu_wait_empty,
2161 : },
2162 :
2163 : /* Handle smpboot threads park/unpark */
2164 : [CPUHP_AP_SMPBOOT_THREADS] = {
2165 : .name = "smpboot/threads:online",
2166 : .startup.single = smpboot_unpark_threads,
2167 : .teardown.single = smpboot_park_threads,
2168 : },
2169 : [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
2170 : .name = "irq/affinity:online",
2171 : .startup.single = irq_affinity_online_cpu,
2172 : .teardown.single = NULL,
2173 : },
2174 : [CPUHP_AP_PERF_ONLINE] = {
2175 : .name = "perf:online",
2176 : .startup.single = perf_event_init_cpu,
2177 : .teardown.single = perf_event_exit_cpu,
2178 : },
2179 : [CPUHP_AP_WATCHDOG_ONLINE] = {
2180 : .name = "lockup_detector:online",
2181 : .startup.single = lockup_detector_online_cpu,
2182 : .teardown.single = lockup_detector_offline_cpu,
2183 : },
2184 : [CPUHP_AP_WORKQUEUE_ONLINE] = {
2185 : .name = "workqueue:online",
2186 : .startup.single = workqueue_online_cpu,
2187 : .teardown.single = workqueue_offline_cpu,
2188 : },
2189 : [CPUHP_AP_RANDOM_ONLINE] = {
2190 : .name = "random:online",
2191 : .startup.single = random_online_cpu,
2192 : .teardown.single = NULL,
2193 : },
2194 : [CPUHP_AP_RCUTREE_ONLINE] = {
2195 : .name = "RCU/tree:online",
2196 : .startup.single = rcutree_online_cpu,
2197 : .teardown.single = rcutree_offline_cpu,
2198 : },
2199 : #endif
2200 : /*
2201 : * The dynamically registered state space is here
2202 : */
2203 :
2204 : #ifdef CONFIG_SMP
2205 : /* Last state is scheduler control setting the cpu active */
2206 : [CPUHP_AP_ACTIVE] = {
2207 : .name = "sched:active",
2208 : .startup.single = sched_cpu_activate,
2209 : .teardown.single = sched_cpu_deactivate,
2210 : },
2211 : #endif
2212 :
2213 : /* CPU is fully up and running. */
2214 : [CPUHP_ONLINE] = {
2215 : .name = "online",
2216 : .startup.single = NULL,
2217 : .teardown.single = NULL,
2218 : },
2219 : };
2220 :
2221 : /* Sanity check for callbacks */
2222 : static int cpuhp_cb_check(enum cpuhp_state state)
2223 : {
2224 18 : if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
2225 : return -EINVAL;
2226 : return 0;
2227 : }
2228 :
2229 : /*
2230 : * Returns a free for dynamic slot assignment of the Online state. The states
2231 : * are protected by the cpuhp_slot_states mutex and an empty slot is identified
2232 : * by having no name assigned.
2233 : */
2234 5 : static int cpuhp_reserve_state(enum cpuhp_state state)
2235 : {
2236 : enum cpuhp_state i, end;
2237 : struct cpuhp_step *step;
2238 :
2239 5 : switch (state) {
2240 : case CPUHP_AP_ONLINE_DYN:
2241 : step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
2242 : end = CPUHP_AP_ONLINE_DYN_END;
2243 : break;
2244 : case CPUHP_BP_PREPARE_DYN:
2245 1 : step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
2246 1 : end = CPUHP_BP_PREPARE_DYN_END;
2247 1 : break;
2248 : default:
2249 : return -EINVAL;
2250 : }
2251 :
2252 11 : for (i = state; i <= end; i++, step++) {
2253 11 : if (!step->name)
2254 : return i;
2255 : }
2256 0 : WARN(1, "No more dynamic states available for CPU hotplug\n");
2257 0 : return -ENOSPC;
2258 : }
2259 :
2260 18 : static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
2261 : int (*startup)(unsigned int cpu),
2262 : int (*teardown)(unsigned int cpu),
2263 : bool multi_instance)
2264 : {
2265 : /* (Un)Install the callbacks for further cpu hotplug operations */
2266 : struct cpuhp_step *sp;
2267 19 : int ret = 0;
2268 :
2269 : /*
2270 : * If name is NULL, then the state gets removed.
2271 : *
2272 : * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
2273 : * the first allocation from these dynamic ranges, so the removal
2274 : * would trigger a new allocation and clear the wrong (already
2275 : * empty) state, leaving the callbacks of the to be cleared state
2276 : * dangling, which causes wreckage on the next hotplug operation.
2277 : */
2278 36 : if (name && (state == CPUHP_AP_ONLINE_DYN ||
2279 18 : state == CPUHP_BP_PREPARE_DYN)) {
2280 5 : ret = cpuhp_reserve_state(state);
2281 5 : if (ret < 0)
2282 : return ret;
2283 : state = ret;
2284 : }
2285 19 : sp = cpuhp_get_step(state);
2286 18 : if (name && sp->name)
2287 : return -EBUSY;
2288 :
2289 19 : sp->startup.single = startup;
2290 19 : sp->teardown.single = teardown;
2291 19 : sp->name = name;
2292 19 : sp->multi_instance = multi_instance;
2293 19 : INIT_HLIST_HEAD(&sp->list);
2294 18 : return ret;
2295 : }
2296 :
2297 : static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
2298 : {
2299 0 : return cpuhp_get_step(state)->teardown.single;
2300 : }
2301 :
2302 : /*
2303 : * Call the startup/teardown function for a step either on the AP or
2304 : * on the current CPU.
2305 : */
2306 3 : static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
2307 : struct hlist_node *node)
2308 : {
2309 3 : struct cpuhp_step *sp = cpuhp_get_step(state);
2310 : int ret;
2311 :
2312 : /*
2313 : * If there's nothing to do, we done.
2314 : * Relies on the union for multi_instance.
2315 : */
2316 6 : if (cpuhp_step_empty(bringup, sp))
2317 : return 0;
2318 : /*
2319 : * The non AP bound callbacks can fail on bringup. On teardown
2320 : * e.g. module removal we crash for now.
2321 : */
2322 : #ifdef CONFIG_SMP
2323 : if (cpuhp_is_ap_state(state))
2324 : ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
2325 : else
2326 : ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2327 : #else
2328 3 : ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
2329 : #endif
2330 3 : BUG_ON(ret && !bringup);
2331 : return ret;
2332 : }
2333 :
2334 : /*
2335 : * Called from __cpuhp_setup_state on a recoverable failure.
2336 : *
2337 : * Note: The teardown callbacks for rollback are not allowed to fail!
2338 : */
2339 1 : static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
2340 : struct hlist_node *node)
2341 : {
2342 : int cpu;
2343 :
2344 : /* Roll back the already executed steps on the other cpus */
2345 1 : for_each_present_cpu(cpu) {
2346 1 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2347 1 : int cpustate = st->state;
2348 :
2349 1 : if (cpu >= failedcpu)
2350 : break;
2351 :
2352 : /* Did we invoke the startup call on that cpu ? */
2353 0 : if (cpustate >= state)
2354 0 : cpuhp_issue_call(cpu, state, false, node);
2355 : }
2356 1 : }
2357 :
2358 2 : int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
2359 : struct hlist_node *node,
2360 : bool invoke)
2361 : {
2362 : struct cpuhp_step *sp;
2363 : int cpu;
2364 : int ret;
2365 :
2366 2 : lockdep_assert_cpus_held();
2367 :
2368 2 : sp = cpuhp_get_step(state);
2369 2 : if (sp->multi_instance == false)
2370 : return -EINVAL;
2371 :
2372 2 : mutex_lock(&cpuhp_state_mutex);
2373 :
2374 2 : if (!invoke || !sp->startup.multi)
2375 : goto add_node;
2376 :
2377 : /*
2378 : * Try to call the startup callback for each present cpu
2379 : * depending on the hotplug state of the cpu.
2380 : */
2381 0 : for_each_present_cpu(cpu) {
2382 0 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2383 0 : int cpustate = st->state;
2384 :
2385 0 : if (cpustate < state)
2386 0 : continue;
2387 :
2388 0 : ret = cpuhp_issue_call(cpu, state, true, node);
2389 0 : if (ret) {
2390 0 : if (sp->teardown.multi)
2391 0 : cpuhp_rollback_install(cpu, state, node);
2392 : goto unlock;
2393 : }
2394 : }
2395 : add_node:
2396 2 : ret = 0;
2397 2 : hlist_add_head(node, &sp->list);
2398 : unlock:
2399 2 : mutex_unlock(&cpuhp_state_mutex);
2400 2 : return ret;
2401 : }
2402 :
2403 2 : int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
2404 : bool invoke)
2405 : {
2406 : int ret;
2407 :
2408 : cpus_read_lock();
2409 2 : ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
2410 : cpus_read_unlock();
2411 2 : return ret;
2412 : }
2413 : EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2414 :
2415 : /**
2416 : * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
2417 : * @state: The state to setup
2418 : * @name: Name of the step
2419 : * @invoke: If true, the startup function is invoked for cpus where
2420 : * cpu state >= @state
2421 : * @startup: startup callback function
2422 : * @teardown: teardown callback function
2423 : * @multi_instance: State is set up for multiple instances which get
2424 : * added afterwards.
2425 : *
2426 : * The caller needs to hold cpus read locked while calling this function.
2427 : * Return:
2428 : * On success:
2429 : * Positive state number if @state is CPUHP_AP_ONLINE_DYN;
2430 : * 0 for all other states
2431 : * On failure: proper (negative) error code
2432 : */
2433 18 : int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
2434 : const char *name, bool invoke,
2435 : int (*startup)(unsigned int cpu),
2436 : int (*teardown)(unsigned int cpu),
2437 : bool multi_instance)
2438 : {
2439 18 : int cpu, ret = 0;
2440 : bool dynstate;
2441 :
2442 18 : lockdep_assert_cpus_held();
2443 :
2444 18 : if (cpuhp_cb_check(state) || !name)
2445 : return -EINVAL;
2446 :
2447 18 : mutex_lock(&cpuhp_state_mutex);
2448 :
2449 18 : ret = cpuhp_store_callbacks(state, name, startup, teardown,
2450 : multi_instance);
2451 :
2452 18 : dynstate = state == CPUHP_AP_ONLINE_DYN;
2453 18 : if (ret > 0 && dynstate) {
2454 4 : state = ret;
2455 4 : ret = 0;
2456 : }
2457 :
2458 18 : if (ret || !invoke || !startup)
2459 : goto out;
2460 :
2461 : /*
2462 : * Try to call the startup callback for each present cpu
2463 : * depending on the hotplug state of the cpu.
2464 : */
2465 2 : for_each_present_cpu(cpu) {
2466 3 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2467 3 : int cpustate = st->state;
2468 :
2469 3 : if (cpustate < state)
2470 0 : continue;
2471 :
2472 3 : ret = cpuhp_issue_call(cpu, state, true, NULL);
2473 3 : if (ret) {
2474 1 : if (teardown)
2475 1 : cpuhp_rollback_install(cpu, state, NULL);
2476 : cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2477 : goto out;
2478 : }
2479 : }
2480 : out:
2481 18 : mutex_unlock(&cpuhp_state_mutex);
2482 : /*
2483 : * If the requested state is CPUHP_AP_ONLINE_DYN, return the
2484 : * dynamically allocated state in case of success.
2485 : */
2486 18 : if (!ret && dynstate)
2487 : return state;
2488 14 : return ret;
2489 : }
2490 : EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2491 :
2492 18 : int __cpuhp_setup_state(enum cpuhp_state state,
2493 : const char *name, bool invoke,
2494 : int (*startup)(unsigned int cpu),
2495 : int (*teardown)(unsigned int cpu),
2496 : bool multi_instance)
2497 : {
2498 : int ret;
2499 :
2500 : cpus_read_lock();
2501 18 : ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
2502 : teardown, multi_instance);
2503 : cpus_read_unlock();
2504 18 : return ret;
2505 : }
2506 : EXPORT_SYMBOL(__cpuhp_setup_state);
2507 :
2508 0 : int __cpuhp_state_remove_instance(enum cpuhp_state state,
2509 : struct hlist_node *node, bool invoke)
2510 : {
2511 0 : struct cpuhp_step *sp = cpuhp_get_step(state);
2512 : int cpu;
2513 :
2514 0 : BUG_ON(cpuhp_cb_check(state));
2515 :
2516 0 : if (!sp->multi_instance)
2517 : return -EINVAL;
2518 :
2519 : cpus_read_lock();
2520 0 : mutex_lock(&cpuhp_state_mutex);
2521 :
2522 0 : if (!invoke || !cpuhp_get_teardown_cb(state))
2523 : goto remove;
2524 : /*
2525 : * Call the teardown callback for each present cpu depending
2526 : * on the hotplug state of the cpu. This function is not
2527 : * allowed to fail currently!
2528 : */
2529 0 : for_each_present_cpu(cpu) {
2530 0 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2531 0 : int cpustate = st->state;
2532 :
2533 0 : if (cpustate >= state)
2534 0 : cpuhp_issue_call(cpu, state, false, node);
2535 : }
2536 :
2537 : remove:
2538 0 : hlist_del(node);
2539 0 : mutex_unlock(&cpuhp_state_mutex);
2540 : cpus_read_unlock();
2541 :
2542 0 : return 0;
2543 : }
2544 : EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2545 :
2546 : /**
2547 : * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2548 : * @state: The state to remove
2549 : * @invoke: If true, the teardown function is invoked for cpus where
2550 : * cpu state >= @state
2551 : *
2552 : * The caller needs to hold cpus read locked while calling this function.
2553 : * The teardown callback is currently not allowed to fail. Think
2554 : * about module removal!
2555 : */
2556 0 : void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2557 : {
2558 0 : struct cpuhp_step *sp = cpuhp_get_step(state);
2559 : int cpu;
2560 :
2561 0 : BUG_ON(cpuhp_cb_check(state));
2562 :
2563 : lockdep_assert_cpus_held();
2564 :
2565 0 : mutex_lock(&cpuhp_state_mutex);
2566 0 : if (sp->multi_instance) {
2567 0 : WARN(!hlist_empty(&sp->list),
2568 : "Error: Removing state %d which has instances left.\n",
2569 : state);
2570 : goto remove;
2571 : }
2572 :
2573 0 : if (!invoke || !cpuhp_get_teardown_cb(state))
2574 : goto remove;
2575 :
2576 : /*
2577 : * Call the teardown callback for each present cpu depending
2578 : * on the hotplug state of the cpu. This function is not
2579 : * allowed to fail currently!
2580 : */
2581 0 : for_each_present_cpu(cpu) {
2582 0 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2583 0 : int cpustate = st->state;
2584 :
2585 0 : if (cpustate >= state)
2586 0 : cpuhp_issue_call(cpu, state, false, NULL);
2587 : }
2588 : remove:
2589 0 : cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2590 0 : mutex_unlock(&cpuhp_state_mutex);
2591 0 : }
2592 : EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2593 :
2594 0 : void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2595 : {
2596 : cpus_read_lock();
2597 0 : __cpuhp_remove_state_cpuslocked(state, invoke);
2598 : cpus_read_unlock();
2599 0 : }
2600 : EXPORT_SYMBOL(__cpuhp_remove_state);
2601 :
2602 : #ifdef CONFIG_HOTPLUG_SMT
2603 : static void cpuhp_offline_cpu_device(unsigned int cpu)
2604 : {
2605 : struct device *dev = get_cpu_device(cpu);
2606 :
2607 : dev->offline = true;
2608 : /* Tell user space about the state change */
2609 : kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2610 : }
2611 :
2612 : static void cpuhp_online_cpu_device(unsigned int cpu)
2613 : {
2614 : struct device *dev = get_cpu_device(cpu);
2615 :
2616 : dev->offline = false;
2617 : /* Tell user space about the state change */
2618 : kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2619 : }
2620 :
2621 : int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2622 : {
2623 : int cpu, ret = 0;
2624 :
2625 : cpu_maps_update_begin();
2626 : for_each_online_cpu(cpu) {
2627 : if (topology_is_primary_thread(cpu))
2628 : continue;
2629 : ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2630 : if (ret)
2631 : break;
2632 : /*
2633 : * As this needs to hold the cpu maps lock it's impossible
2634 : * to call device_offline() because that ends up calling
2635 : * cpu_down() which takes cpu maps lock. cpu maps lock
2636 : * needs to be held as this might race against in kernel
2637 : * abusers of the hotplug machinery (thermal management).
2638 : *
2639 : * So nothing would update device:offline state. That would
2640 : * leave the sysfs entry stale and prevent onlining after
2641 : * smt control has been changed to 'off' again. This is
2642 : * called under the sysfs hotplug lock, so it is properly
2643 : * serialized against the regular offline usage.
2644 : */
2645 : cpuhp_offline_cpu_device(cpu);
2646 : }
2647 : if (!ret)
2648 : cpu_smt_control = ctrlval;
2649 : cpu_maps_update_done();
2650 : return ret;
2651 : }
2652 :
2653 : int cpuhp_smt_enable(void)
2654 : {
2655 : int cpu, ret = 0;
2656 :
2657 : cpu_maps_update_begin();
2658 : cpu_smt_control = CPU_SMT_ENABLED;
2659 : for_each_present_cpu(cpu) {
2660 : /* Skip online CPUs and CPUs on offline nodes */
2661 : if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
2662 : continue;
2663 : ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2664 : if (ret)
2665 : break;
2666 : /* See comment in cpuhp_smt_disable() */
2667 : cpuhp_online_cpu_device(cpu);
2668 : }
2669 : cpu_maps_update_done();
2670 : return ret;
2671 : }
2672 : #endif
2673 :
2674 : #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
2675 : static ssize_t state_show(struct device *dev,
2676 : struct device_attribute *attr, char *buf)
2677 : {
2678 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2679 :
2680 : return sprintf(buf, "%d\n", st->state);
2681 : }
2682 : static DEVICE_ATTR_RO(state);
2683 :
2684 : static ssize_t target_store(struct device *dev, struct device_attribute *attr,
2685 : const char *buf, size_t count)
2686 : {
2687 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2688 : struct cpuhp_step *sp;
2689 : int target, ret;
2690 :
2691 : ret = kstrtoint(buf, 10, &target);
2692 : if (ret)
2693 : return ret;
2694 :
2695 : #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2696 : if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
2697 : return -EINVAL;
2698 : #else
2699 : if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
2700 : return -EINVAL;
2701 : #endif
2702 :
2703 : ret = lock_device_hotplug_sysfs();
2704 : if (ret)
2705 : return ret;
2706 :
2707 : mutex_lock(&cpuhp_state_mutex);
2708 : sp = cpuhp_get_step(target);
2709 : ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2710 : mutex_unlock(&cpuhp_state_mutex);
2711 : if (ret)
2712 : goto out;
2713 :
2714 : if (st->state < target)
2715 : ret = cpu_up(dev->id, target);
2716 : else if (st->state > target)
2717 : ret = cpu_down(dev->id, target);
2718 : else if (WARN_ON(st->target != target))
2719 : st->target = target;
2720 : out:
2721 : unlock_device_hotplug();
2722 : return ret ? ret : count;
2723 : }
2724 :
2725 : static ssize_t target_show(struct device *dev,
2726 : struct device_attribute *attr, char *buf)
2727 : {
2728 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2729 :
2730 : return sprintf(buf, "%d\n", st->target);
2731 : }
2732 : static DEVICE_ATTR_RW(target);
2733 :
2734 : static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
2735 : const char *buf, size_t count)
2736 : {
2737 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2738 : struct cpuhp_step *sp;
2739 : int fail, ret;
2740 :
2741 : ret = kstrtoint(buf, 10, &fail);
2742 : if (ret)
2743 : return ret;
2744 :
2745 : if (fail == CPUHP_INVALID) {
2746 : st->fail = fail;
2747 : return count;
2748 : }
2749 :
2750 : if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
2751 : return -EINVAL;
2752 :
2753 : /*
2754 : * Cannot fail STARTING/DYING callbacks.
2755 : */
2756 : if (cpuhp_is_atomic_state(fail))
2757 : return -EINVAL;
2758 :
2759 : /*
2760 : * DEAD callbacks cannot fail...
2761 : * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
2762 : * triggering STARTING callbacks, a failure in this state would
2763 : * hinder rollback.
2764 : */
2765 : if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
2766 : return -EINVAL;
2767 :
2768 : /*
2769 : * Cannot fail anything that doesn't have callbacks.
2770 : */
2771 : mutex_lock(&cpuhp_state_mutex);
2772 : sp = cpuhp_get_step(fail);
2773 : if (!sp->startup.single && !sp->teardown.single)
2774 : ret = -EINVAL;
2775 : mutex_unlock(&cpuhp_state_mutex);
2776 : if (ret)
2777 : return ret;
2778 :
2779 : st->fail = fail;
2780 :
2781 : return count;
2782 : }
2783 :
2784 : static ssize_t fail_show(struct device *dev,
2785 : struct device_attribute *attr, char *buf)
2786 : {
2787 : struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2788 :
2789 : return sprintf(buf, "%d\n", st->fail);
2790 : }
2791 :
2792 : static DEVICE_ATTR_RW(fail);
2793 :
2794 : static struct attribute *cpuhp_cpu_attrs[] = {
2795 : &dev_attr_state.attr,
2796 : &dev_attr_target.attr,
2797 : &dev_attr_fail.attr,
2798 : NULL
2799 : };
2800 :
2801 : static const struct attribute_group cpuhp_cpu_attr_group = {
2802 : .attrs = cpuhp_cpu_attrs,
2803 : .name = "hotplug",
2804 : NULL
2805 : };
2806 :
2807 : static ssize_t states_show(struct device *dev,
2808 : struct device_attribute *attr, char *buf)
2809 : {
2810 : ssize_t cur, res = 0;
2811 : int i;
2812 :
2813 : mutex_lock(&cpuhp_state_mutex);
2814 : for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2815 : struct cpuhp_step *sp = cpuhp_get_step(i);
2816 :
2817 : if (sp->name) {
2818 : cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2819 : buf += cur;
2820 : res += cur;
2821 : }
2822 : }
2823 : mutex_unlock(&cpuhp_state_mutex);
2824 : return res;
2825 : }
2826 : static DEVICE_ATTR_RO(states);
2827 :
2828 : static struct attribute *cpuhp_cpu_root_attrs[] = {
2829 : &dev_attr_states.attr,
2830 : NULL
2831 : };
2832 :
2833 : static const struct attribute_group cpuhp_cpu_root_attr_group = {
2834 : .attrs = cpuhp_cpu_root_attrs,
2835 : .name = "hotplug",
2836 : NULL
2837 : };
2838 :
2839 : #ifdef CONFIG_HOTPLUG_SMT
2840 :
2841 : static ssize_t
2842 : __store_smt_control(struct device *dev, struct device_attribute *attr,
2843 : const char *buf, size_t count)
2844 : {
2845 : int ctrlval, ret;
2846 :
2847 : if (sysfs_streq(buf, "on"))
2848 : ctrlval = CPU_SMT_ENABLED;
2849 : else if (sysfs_streq(buf, "off"))
2850 : ctrlval = CPU_SMT_DISABLED;
2851 : else if (sysfs_streq(buf, "forceoff"))
2852 : ctrlval = CPU_SMT_FORCE_DISABLED;
2853 : else
2854 : return -EINVAL;
2855 :
2856 : if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
2857 : return -EPERM;
2858 :
2859 : if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
2860 : return -ENODEV;
2861 :
2862 : ret = lock_device_hotplug_sysfs();
2863 : if (ret)
2864 : return ret;
2865 :
2866 : if (ctrlval != cpu_smt_control) {
2867 : switch (ctrlval) {
2868 : case CPU_SMT_ENABLED:
2869 : ret = cpuhp_smt_enable();
2870 : break;
2871 : case CPU_SMT_DISABLED:
2872 : case CPU_SMT_FORCE_DISABLED:
2873 : ret = cpuhp_smt_disable(ctrlval);
2874 : break;
2875 : }
2876 : }
2877 :
2878 : unlock_device_hotplug();
2879 : return ret ? ret : count;
2880 : }
2881 :
2882 : #else /* !CONFIG_HOTPLUG_SMT */
2883 : static ssize_t
2884 : __store_smt_control(struct device *dev, struct device_attribute *attr,
2885 : const char *buf, size_t count)
2886 : {
2887 : return -ENODEV;
2888 : }
2889 : #endif /* CONFIG_HOTPLUG_SMT */
2890 :
2891 : static const char *smt_states[] = {
2892 : [CPU_SMT_ENABLED] = "on",
2893 : [CPU_SMT_DISABLED] = "off",
2894 : [CPU_SMT_FORCE_DISABLED] = "forceoff",
2895 : [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2896 : [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2897 : };
2898 :
2899 : static ssize_t control_show(struct device *dev,
2900 : struct device_attribute *attr, char *buf)
2901 : {
2902 : const char *state = smt_states[cpu_smt_control];
2903 :
2904 : return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
2905 : }
2906 :
2907 : static ssize_t control_store(struct device *dev, struct device_attribute *attr,
2908 : const char *buf, size_t count)
2909 : {
2910 : return __store_smt_control(dev, attr, buf, count);
2911 : }
2912 : static DEVICE_ATTR_RW(control);
2913 :
2914 : static ssize_t active_show(struct device *dev,
2915 : struct device_attribute *attr, char *buf)
2916 : {
2917 : return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
2918 : }
2919 : static DEVICE_ATTR_RO(active);
2920 :
2921 : static struct attribute *cpuhp_smt_attrs[] = {
2922 : &dev_attr_control.attr,
2923 : &dev_attr_active.attr,
2924 : NULL
2925 : };
2926 :
2927 : static const struct attribute_group cpuhp_smt_attr_group = {
2928 : .attrs = cpuhp_smt_attrs,
2929 : .name = "smt",
2930 : NULL
2931 : };
2932 :
2933 : static int __init cpu_smt_sysfs_init(void)
2934 : {
2935 : struct device *dev_root;
2936 : int ret = -ENODEV;
2937 :
2938 : dev_root = bus_get_dev_root(&cpu_subsys);
2939 : if (dev_root) {
2940 : ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group);
2941 : put_device(dev_root);
2942 : }
2943 : return ret;
2944 : }
2945 :
2946 : static int __init cpuhp_sysfs_init(void)
2947 : {
2948 : struct device *dev_root;
2949 : int cpu, ret;
2950 :
2951 : ret = cpu_smt_sysfs_init();
2952 : if (ret)
2953 : return ret;
2954 :
2955 : dev_root = bus_get_dev_root(&cpu_subsys);
2956 : if (dev_root) {
2957 : ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group);
2958 : put_device(dev_root);
2959 : if (ret)
2960 : return ret;
2961 : }
2962 :
2963 : for_each_possible_cpu(cpu) {
2964 : struct device *dev = get_cpu_device(cpu);
2965 :
2966 : if (!dev)
2967 : continue;
2968 : ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2969 : if (ret)
2970 : return ret;
2971 : }
2972 : return 0;
2973 : }
2974 : device_initcall(cpuhp_sysfs_init);
2975 : #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2976 :
2977 : /*
2978 : * cpu_bit_bitmap[] is a special, "compressed" data structure that
2979 : * represents all NR_CPUS bits binary values of 1<<nr.
2980 : *
2981 : * It is used by cpumask_of() to get a constant address to a CPU
2982 : * mask value that has a single bit set only.
2983 : */
2984 :
2985 : /* cpu_bit_bitmap[0] is empty - so we can back into it */
2986 : #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x))
2987 : #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
2988 : #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
2989 : #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2990 :
2991 : const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
2992 :
2993 : MASK_DECLARE_8(0), MASK_DECLARE_8(8),
2994 : MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2995 : #if BITS_PER_LONG > 32
2996 : MASK_DECLARE_8(32), MASK_DECLARE_8(40),
2997 : MASK_DECLARE_8(48), MASK_DECLARE_8(56),
2998 : #endif
2999 : };
3000 : EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
3001 :
3002 : const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
3003 : EXPORT_SYMBOL(cpu_all_bits);
3004 :
3005 : #ifdef CONFIG_INIT_ALL_POSSIBLE
3006 : struct cpumask __cpu_possible_mask __read_mostly
3007 : = {CPU_BITS_ALL};
3008 : #else
3009 : struct cpumask __cpu_possible_mask __read_mostly;
3010 : #endif
3011 : EXPORT_SYMBOL(__cpu_possible_mask);
3012 :
3013 : struct cpumask __cpu_online_mask __read_mostly;
3014 : EXPORT_SYMBOL(__cpu_online_mask);
3015 :
3016 : struct cpumask __cpu_present_mask __read_mostly;
3017 : EXPORT_SYMBOL(__cpu_present_mask);
3018 :
3019 : struct cpumask __cpu_active_mask __read_mostly;
3020 : EXPORT_SYMBOL(__cpu_active_mask);
3021 :
3022 : struct cpumask __cpu_dying_mask __read_mostly;
3023 : EXPORT_SYMBOL(__cpu_dying_mask);
3024 :
3025 : atomic_t __num_online_cpus __read_mostly;
3026 : EXPORT_SYMBOL(__num_online_cpus);
3027 :
3028 0 : void init_cpu_present(const struct cpumask *src)
3029 : {
3030 0 : cpumask_copy(&__cpu_present_mask, src);
3031 0 : }
3032 :
3033 0 : void init_cpu_possible(const struct cpumask *src)
3034 : {
3035 0 : cpumask_copy(&__cpu_possible_mask, src);
3036 0 : }
3037 :
3038 0 : void init_cpu_online(const struct cpumask *src)
3039 : {
3040 0 : cpumask_copy(&__cpu_online_mask, src);
3041 0 : }
3042 :
3043 0 : void set_cpu_online(unsigned int cpu, bool online)
3044 : {
3045 : /*
3046 : * atomic_inc/dec() is required to handle the horrid abuse of this
3047 : * function by the reboot and kexec code which invoke it from
3048 : * IPI/NMI broadcasts when shutting down CPUs. Invocation from
3049 : * regular CPU hotplug is properly serialized.
3050 : *
3051 : * Note, that the fact that __num_online_cpus is of type atomic_t
3052 : * does not protect readers which are not serialized against
3053 : * concurrent hotplug operations.
3054 : */
3055 0 : if (online) {
3056 2 : if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
3057 : atomic_inc(&__num_online_cpus);
3058 : } else {
3059 0 : if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
3060 : atomic_dec(&__num_online_cpus);
3061 : }
3062 0 : }
3063 :
3064 : /*
3065 : * Activate the first processor.
3066 : */
3067 1 : void __init boot_cpu_init(void)
3068 : {
3069 1 : int cpu = smp_processor_id();
3070 :
3071 : /* Mark the boot cpu "present", "online" etc for SMP and UP case */
3072 2 : set_cpu_online(cpu, true);
3073 2 : set_cpu_active(cpu, true);
3074 2 : set_cpu_present(cpu, true);
3075 2 : set_cpu_possible(cpu, true);
3076 :
3077 : #ifdef CONFIG_SMP
3078 : __boot_cpu_id = cpu;
3079 : #endif
3080 1 : }
3081 :
3082 : /*
3083 : * Must be called _AFTER_ setting up the per_cpu areas
3084 : */
3085 1 : void __init boot_cpu_hotplug_init(void)
3086 : {
3087 : #ifdef CONFIG_SMP
3088 : cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
3089 : atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
3090 : #endif
3091 3 : this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
3092 3 : this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
3093 1 : }
3094 :
3095 : /*
3096 : * These are used for a global "mitigations=" cmdline option for toggling
3097 : * optional CPU mitigations.
3098 : */
3099 : enum cpu_mitigations {
3100 : CPU_MITIGATIONS_OFF,
3101 : CPU_MITIGATIONS_AUTO,
3102 : CPU_MITIGATIONS_AUTO_NOSMT,
3103 : };
3104 :
3105 : static enum cpu_mitigations cpu_mitigations __ro_after_init =
3106 : CPU_MITIGATIONS_AUTO;
3107 :
3108 0 : static int __init mitigations_parse_cmdline(char *arg)
3109 : {
3110 0 : if (!strcmp(arg, "off"))
3111 0 : cpu_mitigations = CPU_MITIGATIONS_OFF;
3112 0 : else if (!strcmp(arg, "auto"))
3113 0 : cpu_mitigations = CPU_MITIGATIONS_AUTO;
3114 0 : else if (!strcmp(arg, "auto,nosmt"))
3115 0 : cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
3116 : else
3117 0 : pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
3118 : arg);
3119 :
3120 0 : return 0;
3121 : }
3122 : early_param("mitigations", mitigations_parse_cmdline);
3123 :
3124 : /* mitigations=off */
3125 0 : bool cpu_mitigations_off(void)
3126 : {
3127 0 : return cpu_mitigations == CPU_MITIGATIONS_OFF;
3128 : }
3129 : EXPORT_SYMBOL_GPL(cpu_mitigations_off);
3130 :
3131 : /* mitigations=auto,nosmt */
3132 0 : bool cpu_mitigations_auto_nosmt(void)
3133 : {
3134 0 : return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
3135 : }
3136 : EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
|