LCOV - code coverage report
Current view: top level - kernel - cpu.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 97 201 48.3 %
Date: 2023-08-24 13:40:31 Functions: 11 21 52.4 %

          Line data    Source code
       1             : /* CPU control.
       2             :  * (C) 2001, 2002, 2003, 2004 Rusty Russell
       3             :  *
       4             :  * This code is licenced under the GPL.
       5             :  */
       6             : #include <linux/sched/mm.h>
       7             : #include <linux/proc_fs.h>
       8             : #include <linux/smp.h>
       9             : #include <linux/init.h>
      10             : #include <linux/notifier.h>
      11             : #include <linux/sched/signal.h>
      12             : #include <linux/sched/hotplug.h>
      13             : #include <linux/sched/isolation.h>
      14             : #include <linux/sched/task.h>
      15             : #include <linux/sched/smt.h>
      16             : #include <linux/unistd.h>
      17             : #include <linux/cpu.h>
      18             : #include <linux/oom.h>
      19             : #include <linux/rcupdate.h>
      20             : #include <linux/delay.h>
      21             : #include <linux/export.h>
      22             : #include <linux/bug.h>
      23             : #include <linux/kthread.h>
      24             : #include <linux/stop_machine.h>
      25             : #include <linux/mutex.h>
      26             : #include <linux/gfp.h>
      27             : #include <linux/suspend.h>
      28             : #include <linux/lockdep.h>
      29             : #include <linux/tick.h>
      30             : #include <linux/irq.h>
      31             : #include <linux/nmi.h>
      32             : #include <linux/smpboot.h>
      33             : #include <linux/relay.h>
      34             : #include <linux/slab.h>
      35             : #include <linux/scs.h>
      36             : #include <linux/percpu-rwsem.h>
      37             : #include <linux/cpuset.h>
      38             : #include <linux/random.h>
      39             : #include <linux/cc_platform.h>
      40             : 
      41             : #include <trace/events/power.h>
      42             : #define CREATE_TRACE_POINTS
      43             : #include <trace/events/cpuhp.h>
      44             : 
      45             : #include "smpboot.h"
      46             : 
      47             : /**
      48             :  * struct cpuhp_cpu_state - Per cpu hotplug state storage
      49             :  * @state:      The current cpu state
      50             :  * @target:     The target state
      51             :  * @fail:       Current CPU hotplug callback state
      52             :  * @thread:     Pointer to the hotplug thread
      53             :  * @should_run: Thread should execute
      54             :  * @rollback:   Perform a rollback
      55             :  * @single:     Single callback invocation
      56             :  * @bringup:    Single callback bringup or teardown selector
      57             :  * @cpu:        CPU number
      58             :  * @node:       Remote CPU node; for multi-instance, do a
      59             :  *              single entry callback for install/remove
      60             :  * @last:       For multi-instance rollback, remember how far we got
      61             :  * @cb_state:   The state for a single callback (install/uninstall)
      62             :  * @result:     Result of the operation
      63             :  * @ap_sync_state:      State for AP synchronization
      64             :  * @done_up:    Signal completion to the issuer of the task for cpu-up
      65             :  * @done_down:  Signal completion to the issuer of the task for cpu-down
      66             :  */
      67             : struct cpuhp_cpu_state {
      68             :         enum cpuhp_state        state;
      69             :         enum cpuhp_state        target;
      70             :         enum cpuhp_state        fail;
      71             : #ifdef CONFIG_SMP
      72             :         struct task_struct      *thread;
      73             :         bool                    should_run;
      74             :         bool                    rollback;
      75             :         bool                    single;
      76             :         bool                    bringup;
      77             :         struct hlist_node       *node;
      78             :         struct hlist_node       *last;
      79             :         enum cpuhp_state        cb_state;
      80             :         int                     result;
      81             :         atomic_t                ap_sync_state;
      82             :         struct completion       done_up;
      83             :         struct completion       done_down;
      84             : #endif
      85             : };
      86             : 
      87             : static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
      88             :         .fail = CPUHP_INVALID,
      89             : };
      90             : 
      91             : #ifdef CONFIG_SMP
      92             : cpumask_t cpus_booted_once_mask;
      93             : #endif
      94             : 
      95             : #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
      96             : static struct lockdep_map cpuhp_state_up_map =
      97             :         STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
      98             : static struct lockdep_map cpuhp_state_down_map =
      99             :         STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
     100             : 
     101             : 
     102             : static inline void cpuhp_lock_acquire(bool bringup)
     103             : {
     104             :         lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
     105             : }
     106             : 
     107             : static inline void cpuhp_lock_release(bool bringup)
     108             : {
     109             :         lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
     110             : }
     111             : #else
     112             : 
     113             : static inline void cpuhp_lock_acquire(bool bringup) { }
     114             : static inline void cpuhp_lock_release(bool bringup) { }
     115             : 
     116             : #endif
     117             : 
     118             : /**
     119             :  * struct cpuhp_step - Hotplug state machine step
     120             :  * @name:       Name of the step
     121             :  * @startup:    Startup function of the step
     122             :  * @teardown:   Teardown function of the step
     123             :  * @cant_stop:  Bringup/teardown can't be stopped at this step
     124             :  * @multi_instance:     State has multiple instances which get added afterwards
     125             :  */
     126             : struct cpuhp_step {
     127             :         const char              *name;
     128             :         union {
     129             :                 int             (*single)(unsigned int cpu);
     130             :                 int             (*multi)(unsigned int cpu,
     131             :                                          struct hlist_node *node);
     132             :         } startup;
     133             :         union {
     134             :                 int             (*single)(unsigned int cpu);
     135             :                 int             (*multi)(unsigned int cpu,
     136             :                                          struct hlist_node *node);
     137             :         } teardown;
     138             :         /* private: */
     139             :         struct hlist_head       list;
     140             :         /* public: */
     141             :         bool                    cant_stop;
     142             :         bool                    multi_instance;
     143             : };
     144             : 
     145             : static DEFINE_MUTEX(cpuhp_state_mutex);
     146             : static struct cpuhp_step cpuhp_hp_states[];
     147             : 
     148             : static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
     149             : {
     150          27 :         return cpuhp_hp_states + state;
     151             : }
     152             : 
     153             : static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
     154             : {
     155           6 :         return bringup ? !step->startup.single : !step->teardown.single;
     156             : }
     157             : 
     158             : /**
     159             :  * cpuhp_invoke_callback - Invoke the callbacks for a given state
     160             :  * @cpu:        The cpu for which the callback should be invoked
     161             :  * @state:      The state to do callbacks for
     162             :  * @bringup:    True if the bringup callback should be invoked
     163             :  * @node:       For multi-instance, do a single entry callback for install/remove
     164             :  * @lastp:      For multi-instance rollback, remember how far we got
     165             :  *
     166             :  * Called from cpu hotplug and from the state register machinery.
     167             :  *
     168             :  * Return: %0 on success or a negative errno code
     169             :  */
     170           3 : static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
     171             :                                  bool bringup, struct hlist_node *node,
     172             :                                  struct hlist_node **lastp)
     173             : {
     174           3 :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
     175           3 :         struct cpuhp_step *step = cpuhp_get_step(state);
     176             :         int (*cbm)(unsigned int cpu, struct hlist_node *node);
     177             :         int (*cb)(unsigned int cpu);
     178             :         int ret, cnt;
     179             : 
     180           3 :         if (st->fail == state) {
     181           0 :                 st->fail = CPUHP_INVALID;
     182           0 :                 return -EAGAIN;
     183             :         }
     184             : 
     185           3 :         if (cpuhp_step_empty(bringup, step)) {
     186           0 :                 WARN_ON_ONCE(1);
     187             :                 return 0;
     188             :         }
     189             : 
     190           3 :         if (!step->multi_instance) {
     191           3 :                 WARN_ON_ONCE(lastp && *lastp);
     192           3 :                 cb = bringup ? step->startup.single : step->teardown.single;
     193             : 
     194           3 :                 trace_cpuhp_enter(cpu, st->target, state, cb);
     195           3 :                 ret = cb(cpu);
     196           3 :                 trace_cpuhp_exit(cpu, st->state, state, ret);
     197           3 :                 return ret;
     198             :         }
     199           0 :         cbm = bringup ? step->startup.multi : step->teardown.multi;
     200             : 
     201             :         /* Single invocation for instance add/remove */
     202           0 :         if (node) {
     203           0 :                 WARN_ON_ONCE(lastp && *lastp);
     204           0 :                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
     205           0 :                 ret = cbm(cpu, node);
     206           0 :                 trace_cpuhp_exit(cpu, st->state, state, ret);
     207           0 :                 return ret;
     208             :         }
     209             : 
     210             :         /* State transition. Invoke on all instances */
     211           0 :         cnt = 0;
     212           0 :         hlist_for_each(node, &step->list) {
     213           0 :                 if (lastp && node == *lastp)
     214             :                         break;
     215             : 
     216           0 :                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
     217           0 :                 ret = cbm(cpu, node);
     218           0 :                 trace_cpuhp_exit(cpu, st->state, state, ret);
     219           0 :                 if (ret) {
     220           0 :                         if (!lastp)
     221             :                                 goto err;
     222             : 
     223           0 :                         *lastp = node;
     224           0 :                         return ret;
     225             :                 }
     226           0 :                 cnt++;
     227             :         }
     228           0 :         if (lastp)
     229           0 :                 *lastp = NULL;
     230             :         return 0;
     231             : err:
     232             :         /* Rollback the instances if one failed */
     233           0 :         cbm = !bringup ? step->startup.multi : step->teardown.multi;
     234           0 :         if (!cbm)
     235             :                 return ret;
     236             : 
     237           0 :         hlist_for_each(node, &step->list) {
     238           0 :                 if (!cnt--)
     239             :                         break;
     240             : 
     241           0 :                 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
     242           0 :                 ret = cbm(cpu, node);
     243           0 :                 trace_cpuhp_exit(cpu, st->state, state, ret);
     244             :                 /*
     245             :                  * Rollback must not fail,
     246             :                  */
     247           0 :                 WARN_ON_ONCE(ret);
     248             :         }
     249             :         return ret;
     250             : }
     251             : 
     252             : #ifdef CONFIG_SMP
     253             : static bool cpuhp_is_ap_state(enum cpuhp_state state)
     254             : {
     255             :         /*
     256             :          * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
     257             :          * purposes as that state is handled explicitly in cpu_down.
     258             :          */
     259             :         return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
     260             : }
     261             : 
     262             : static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
     263             : {
     264             :         struct completion *done = bringup ? &st->done_up : &st->done_down;
     265             :         wait_for_completion(done);
     266             : }
     267             : 
     268             : static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
     269             : {
     270             :         struct completion *done = bringup ? &st->done_up : &st->done_down;
     271             :         complete(done);
     272             : }
     273             : 
     274             : /*
     275             :  * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
     276             :  */
     277             : static bool cpuhp_is_atomic_state(enum cpuhp_state state)
     278             : {
     279             :         return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
     280             : }
     281             : 
     282             : /* Synchronization state management */
     283             : enum cpuhp_sync_state {
     284             :         SYNC_STATE_DEAD,
     285             :         SYNC_STATE_KICKED,
     286             :         SYNC_STATE_SHOULD_DIE,
     287             :         SYNC_STATE_ALIVE,
     288             :         SYNC_STATE_SHOULD_ONLINE,
     289             :         SYNC_STATE_ONLINE,
     290             : };
     291             : 
     292             : #ifdef CONFIG_HOTPLUG_CORE_SYNC
     293             : /**
     294             :  * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown
     295             :  * @state:      The synchronization state to set
     296             :  *
     297             :  * No synchronization point. Just update of the synchronization state, but implies
     298             :  * a full barrier so that the AP changes are visible before the control CPU proceeds.
     299             :  */
     300             : static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state)
     301             : {
     302             :         atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
     303             : 
     304             :         (void)atomic_xchg(st, state);
     305             : }
     306             : 
     307             : void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); }
     308             : 
     309             : static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state,
     310             :                                       enum cpuhp_sync_state next_state)
     311             : {
     312             :         atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
     313             :         ktime_t now, end, start = ktime_get();
     314             :         int sync;
     315             : 
     316             :         end = start + 10ULL * NSEC_PER_SEC;
     317             : 
     318             :         sync = atomic_read(st);
     319             :         while (1) {
     320             :                 if (sync == state) {
     321             :                         if (!atomic_try_cmpxchg(st, &sync, next_state))
     322             :                                 continue;
     323             :                         return true;
     324             :                 }
     325             : 
     326             :                 now = ktime_get();
     327             :                 if (now > end) {
     328             :                         /* Timeout. Leave the state unchanged */
     329             :                         return false;
     330             :                 } else if (now - start < NSEC_PER_MSEC) {
     331             :                         /* Poll for one millisecond */
     332             :                         arch_cpuhp_sync_state_poll();
     333             :                 } else {
     334             :                         usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE);
     335             :                 }
     336             :                 sync = atomic_read(st);
     337             :         }
     338             :         return true;
     339             : }
     340             : #else  /* CONFIG_HOTPLUG_CORE_SYNC */
     341             : static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { }
     342             : #endif /* !CONFIG_HOTPLUG_CORE_SYNC */
     343             : 
     344             : #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD
     345             : /**
     346             :  * cpuhp_ap_report_dead - Update synchronization state to DEAD
     347             :  *
     348             :  * No synchronization point. Just update of the synchronization state.
     349             :  */
     350             : void cpuhp_ap_report_dead(void)
     351             : {
     352             :         cpuhp_ap_update_sync_state(SYNC_STATE_DEAD);
     353             : }
     354             : 
     355             : void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { }
     356             : 
     357             : /*
     358             :  * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down
     359             :  * because the AP cannot issue complete() at this stage.
     360             :  */
     361             : static void cpuhp_bp_sync_dead(unsigned int cpu)
     362             : {
     363             :         atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
     364             :         int sync = atomic_read(st);
     365             : 
     366             :         do {
     367             :                 /* CPU can have reported dead already. Don't overwrite that! */
     368             :                 if (sync == SYNC_STATE_DEAD)
     369             :                         break;
     370             :         } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE));
     371             : 
     372             :         if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) {
     373             :                 /* CPU reached dead state. Invoke the cleanup function */
     374             :                 arch_cpuhp_cleanup_dead_cpu(cpu);
     375             :                 return;
     376             :         }
     377             : 
     378             :         /* No further action possible. Emit message and give up. */
     379             :         pr_err("CPU%u failed to report dead state\n", cpu);
     380             : }
     381             : #else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */
     382             : static inline void cpuhp_bp_sync_dead(unsigned int cpu) { }
     383             : #endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */
     384             : 
     385             : #ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL
     386             : /**
     387             :  * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive
     388             :  *
     389             :  * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits
     390             :  * for the BP to release it.
     391             :  */
     392             : void cpuhp_ap_sync_alive(void)
     393             : {
     394             :         atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state);
     395             : 
     396             :         cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE);
     397             : 
     398             :         /* Wait for the control CPU to release it. */
     399             :         while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE)
     400             :                 cpu_relax();
     401             : }
     402             : 
     403             : static bool cpuhp_can_boot_ap(unsigned int cpu)
     404             : {
     405             :         atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu);
     406             :         int sync = atomic_read(st);
     407             : 
     408             : again:
     409             :         switch (sync) {
     410             :         case SYNC_STATE_DEAD:
     411             :                 /* CPU is properly dead */
     412             :                 break;
     413             :         case SYNC_STATE_KICKED:
     414             :                 /* CPU did not come up in previous attempt */
     415             :                 break;
     416             :         case SYNC_STATE_ALIVE:
     417             :                 /* CPU is stuck cpuhp_ap_sync_alive(). */
     418             :                 break;
     419             :         default:
     420             :                 /* CPU failed to report online or dead and is in limbo state. */
     421             :                 return false;
     422             :         }
     423             : 
     424             :         /* Prepare for booting */
     425             :         if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED))
     426             :                 goto again;
     427             : 
     428             :         return true;
     429             : }
     430             : 
     431             : void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { }
     432             : 
     433             : /*
     434             :  * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up
     435             :  * because the AP cannot issue complete() so early in the bringup.
     436             :  */
     437             : static int cpuhp_bp_sync_alive(unsigned int cpu)
     438             : {
     439             :         int ret = 0;
     440             : 
     441             :         if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL))
     442             :                 return 0;
     443             : 
     444             :         if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) {
     445             :                 pr_err("CPU%u failed to report alive state\n", cpu);
     446             :                 ret = -EIO;
     447             :         }
     448             : 
     449             :         /* Let the architecture cleanup the kick alive mechanics. */
     450             :         arch_cpuhp_cleanup_kick_cpu(cpu);
     451             :         return ret;
     452             : }
     453             : #else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */
     454             : static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; }
     455             : static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; }
     456             : #endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */
     457             : 
     458             : /* Serializes the updates to cpu_online_mask, cpu_present_mask */
     459             : static DEFINE_MUTEX(cpu_add_remove_lock);
     460             : bool cpuhp_tasks_frozen;
     461             : EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
     462             : 
     463             : /*
     464             :  * The following two APIs (cpu_maps_update_begin/done) must be used when
     465             :  * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
     466             :  */
     467             : void cpu_maps_update_begin(void)
     468             : {
     469             :         mutex_lock(&cpu_add_remove_lock);
     470             : }
     471             : 
     472             : void cpu_maps_update_done(void)
     473             : {
     474             :         mutex_unlock(&cpu_add_remove_lock);
     475             : }
     476             : 
     477             : /*
     478             :  * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
     479             :  * Should always be manipulated under cpu_add_remove_lock
     480             :  */
     481             : static int cpu_hotplug_disabled;
     482             : 
     483             : #ifdef CONFIG_HOTPLUG_CPU
     484             : 
     485             : DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
     486             : 
     487             : void cpus_read_lock(void)
     488             : {
     489             :         percpu_down_read(&cpu_hotplug_lock);
     490             : }
     491             : EXPORT_SYMBOL_GPL(cpus_read_lock);
     492             : 
     493             : int cpus_read_trylock(void)
     494             : {
     495             :         return percpu_down_read_trylock(&cpu_hotplug_lock);
     496             : }
     497             : EXPORT_SYMBOL_GPL(cpus_read_trylock);
     498             : 
     499             : void cpus_read_unlock(void)
     500             : {
     501             :         percpu_up_read(&cpu_hotplug_lock);
     502             : }
     503             : EXPORT_SYMBOL_GPL(cpus_read_unlock);
     504             : 
     505             : void cpus_write_lock(void)
     506             : {
     507             :         percpu_down_write(&cpu_hotplug_lock);
     508             : }
     509             : 
     510             : void cpus_write_unlock(void)
     511             : {
     512             :         percpu_up_write(&cpu_hotplug_lock);
     513             : }
     514             : 
     515             : void lockdep_assert_cpus_held(void)
     516             : {
     517             :         /*
     518             :          * We can't have hotplug operations before userspace starts running,
     519             :          * and some init codepaths will knowingly not take the hotplug lock.
     520             :          * This is all valid, so mute lockdep until it makes sense to report
     521             :          * unheld locks.
     522             :          */
     523             :         if (system_state < SYSTEM_RUNNING)
     524             :                 return;
     525             : 
     526             :         percpu_rwsem_assert_held(&cpu_hotplug_lock);
     527             : }
     528             : 
     529             : #ifdef CONFIG_LOCKDEP
     530             : int lockdep_is_cpus_held(void)
     531             : {
     532             :         return percpu_rwsem_is_held(&cpu_hotplug_lock);
     533             : }
     534             : #endif
     535             : 
     536             : static void lockdep_acquire_cpus_lock(void)
     537             : {
     538             :         rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
     539             : }
     540             : 
     541             : static void lockdep_release_cpus_lock(void)
     542             : {
     543             :         rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
     544             : }
     545             : 
     546             : /*
     547             :  * Wait for currently running CPU hotplug operations to complete (if any) and
     548             :  * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
     549             :  * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
     550             :  * hotplug path before performing hotplug operations. So acquiring that lock
     551             :  * guarantees mutual exclusion from any currently running hotplug operations.
     552             :  */
     553             : void cpu_hotplug_disable(void)
     554             : {
     555             :         cpu_maps_update_begin();
     556             :         cpu_hotplug_disabled++;
     557             :         cpu_maps_update_done();
     558             : }
     559             : EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
     560             : 
     561             : static void __cpu_hotplug_enable(void)
     562             : {
     563             :         if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
     564             :                 return;
     565             :         cpu_hotplug_disabled--;
     566             : }
     567             : 
     568             : void cpu_hotplug_enable(void)
     569             : {
     570             :         cpu_maps_update_begin();
     571             :         __cpu_hotplug_enable();
     572             :         cpu_maps_update_done();
     573             : }
     574             : EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
     575             : 
     576             : #else
     577             : 
     578             : static void lockdep_acquire_cpus_lock(void)
     579             : {
     580             : }
     581             : 
     582             : static void lockdep_release_cpus_lock(void)
     583             : {
     584             : }
     585             : 
     586             : #endif  /* CONFIG_HOTPLUG_CPU */
     587             : 
     588             : /*
     589             :  * Architectures that need SMT-specific errata handling during SMT hotplug
     590             :  * should override this.
     591             :  */
     592             : void __weak arch_smt_update(void) { }
     593             : 
     594             : #ifdef CONFIG_HOTPLUG_SMT
     595             : enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
     596             : 
     597             : void __init cpu_smt_disable(bool force)
     598             : {
     599             :         if (!cpu_smt_possible())
     600             :                 return;
     601             : 
     602             :         if (force) {
     603             :                 pr_info("SMT: Force disabled\n");
     604             :                 cpu_smt_control = CPU_SMT_FORCE_DISABLED;
     605             :         } else {
     606             :                 pr_info("SMT: disabled\n");
     607             :                 cpu_smt_control = CPU_SMT_DISABLED;
     608             :         }
     609             : }
     610             : 
     611             : /*
     612             :  * The decision whether SMT is supported can only be done after the full
     613             :  * CPU identification. Called from architecture code.
     614             :  */
     615             : void __init cpu_smt_check_topology(void)
     616             : {
     617             :         if (!topology_smt_supported())
     618             :                 cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
     619             : }
     620             : 
     621             : static int __init smt_cmdline_disable(char *str)
     622             : {
     623             :         cpu_smt_disable(str && !strcmp(str, "force"));
     624             :         return 0;
     625             : }
     626             : early_param("nosmt", smt_cmdline_disable);
     627             : 
     628             : static inline bool cpu_smt_allowed(unsigned int cpu)
     629             : {
     630             :         if (cpu_smt_control == CPU_SMT_ENABLED)
     631             :                 return true;
     632             : 
     633             :         if (topology_is_primary_thread(cpu))
     634             :                 return true;
     635             : 
     636             :         /*
     637             :          * On x86 it's required to boot all logical CPUs at least once so
     638             :          * that the init code can get a chance to set CR4.MCE on each
     639             :          * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
     640             :          * core will shutdown the machine.
     641             :          */
     642             :         return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
     643             : }
     644             : 
     645             : /* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
     646             : bool cpu_smt_possible(void)
     647             : {
     648             :         return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
     649             :                 cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
     650             : }
     651             : EXPORT_SYMBOL_GPL(cpu_smt_possible);
     652             : 
     653             : static inline bool cpuhp_smt_aware(void)
     654             : {
     655             :         return topology_smt_supported();
     656             : }
     657             : 
     658             : static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
     659             : {
     660             :         return cpu_primary_thread_mask;
     661             : }
     662             : #else
     663             : static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
     664             : static inline bool cpuhp_smt_aware(void) { return false; }
     665             : static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
     666             : {
     667             :         return cpu_present_mask;
     668             : }
     669             : #endif
     670             : 
     671             : static inline enum cpuhp_state
     672             : cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
     673             : {
     674             :         enum cpuhp_state prev_state = st->state;
     675             :         bool bringup = st->state < target;
     676             : 
     677             :         st->rollback = false;
     678             :         st->last = NULL;
     679             : 
     680             :         st->target = target;
     681             :         st->single = false;
     682             :         st->bringup = bringup;
     683             :         if (cpu_dying(cpu) != !bringup)
     684             :                 set_cpu_dying(cpu, !bringup);
     685             : 
     686             :         return prev_state;
     687             : }
     688             : 
     689             : static inline void
     690             : cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st,
     691             :                   enum cpuhp_state prev_state)
     692             : {
     693             :         bool bringup = !st->bringup;
     694             : 
     695             :         st->target = prev_state;
     696             : 
     697             :         /*
     698             :          * Already rolling back. No need invert the bringup value or to change
     699             :          * the current state.
     700             :          */
     701             :         if (st->rollback)
     702             :                 return;
     703             : 
     704             :         st->rollback = true;
     705             : 
     706             :         /*
     707             :          * If we have st->last we need to undo partial multi_instance of this
     708             :          * state first. Otherwise start undo at the previous state.
     709             :          */
     710             :         if (!st->last) {
     711             :                 if (st->bringup)
     712             :                         st->state--;
     713             :                 else
     714             :                         st->state++;
     715             :         }
     716             : 
     717             :         st->bringup = bringup;
     718             :         if (cpu_dying(cpu) != !bringup)
     719             :                 set_cpu_dying(cpu, !bringup);
     720             : }
     721             : 
     722             : /* Regular hotplug invocation of the AP hotplug thread */
     723             : static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
     724             : {
     725             :         if (!st->single && st->state == st->target)
     726             :                 return;
     727             : 
     728             :         st->result = 0;
     729             :         /*
     730             :          * Make sure the above stores are visible before should_run becomes
     731             :          * true. Paired with the mb() above in cpuhp_thread_fun()
     732             :          */
     733             :         smp_mb();
     734             :         st->should_run = true;
     735             :         wake_up_process(st->thread);
     736             :         wait_for_ap_thread(st, st->bringup);
     737             : }
     738             : 
     739             : static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,
     740             :                          enum cpuhp_state target)
     741             : {
     742             :         enum cpuhp_state prev_state;
     743             :         int ret;
     744             : 
     745             :         prev_state = cpuhp_set_state(cpu, st, target);
     746             :         __cpuhp_kick_ap(st);
     747             :         if ((ret = st->result)) {
     748             :                 cpuhp_reset_state(cpu, st, prev_state);
     749             :                 __cpuhp_kick_ap(st);
     750             :         }
     751             : 
     752             :         return ret;
     753             : }
     754             : 
     755             : static int bringup_wait_for_ap_online(unsigned int cpu)
     756             : {
     757             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
     758             : 
     759             :         /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
     760             :         wait_for_ap_thread(st, true);
     761             :         if (WARN_ON_ONCE((!cpu_online(cpu))))
     762             :                 return -ECANCELED;
     763             : 
     764             :         /* Unpark the hotplug thread of the target cpu */
     765             :         kthread_unpark(st->thread);
     766             : 
     767             :         /*
     768             :          * SMT soft disabling on X86 requires to bring the CPU out of the
     769             :          * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
     770             :          * CPU marked itself as booted_once in notify_cpu_starting() so the
     771             :          * cpu_smt_allowed() check will now return false if this is not the
     772             :          * primary sibling.
     773             :          */
     774             :         if (!cpu_smt_allowed(cpu))
     775             :                 return -ECANCELED;
     776             :         return 0;
     777             : }
     778             : 
     779             : #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
     780             : static int cpuhp_kick_ap_alive(unsigned int cpu)
     781             : {
     782             :         if (!cpuhp_can_boot_ap(cpu))
     783             :                 return -EAGAIN;
     784             : 
     785             :         return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu));
     786             : }
     787             : 
     788             : static int cpuhp_bringup_ap(unsigned int cpu)
     789             : {
     790             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
     791             :         int ret;
     792             : 
     793             :         /*
     794             :          * Some architectures have to walk the irq descriptors to
     795             :          * setup the vector space for the cpu which comes online.
     796             :          * Prevent irq alloc/free across the bringup.
     797             :          */
     798             :         irq_lock_sparse();
     799             : 
     800             :         ret = cpuhp_bp_sync_alive(cpu);
     801             :         if (ret)
     802             :                 goto out_unlock;
     803             : 
     804             :         ret = bringup_wait_for_ap_online(cpu);
     805             :         if (ret)
     806             :                 goto out_unlock;
     807             : 
     808             :         irq_unlock_sparse();
     809             : 
     810             :         if (st->target <= CPUHP_AP_ONLINE_IDLE)
     811             :                 return 0;
     812             : 
     813             :         return cpuhp_kick_ap(cpu, st, st->target);
     814             : 
     815             : out_unlock:
     816             :         irq_unlock_sparse();
     817             :         return ret;
     818             : }
     819             : #else
     820             : static int bringup_cpu(unsigned int cpu)
     821             : {
     822             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
     823             :         struct task_struct *idle = idle_thread_get(cpu);
     824             :         int ret;
     825             : 
     826             :         if (!cpuhp_can_boot_ap(cpu))
     827             :                 return -EAGAIN;
     828             : 
     829             :         /*
     830             :          * Some architectures have to walk the irq descriptors to
     831             :          * setup the vector space for the cpu which comes online.
     832             :          *
     833             :          * Prevent irq alloc/free across the bringup by acquiring the
     834             :          * sparse irq lock. Hold it until the upcoming CPU completes the
     835             :          * startup in cpuhp_online_idle() which allows to avoid
     836             :          * intermediate synchronization points in the architecture code.
     837             :          */
     838             :         irq_lock_sparse();
     839             : 
     840             :         ret = __cpu_up(cpu, idle);
     841             :         if (ret)
     842             :                 goto out_unlock;
     843             : 
     844             :         ret = cpuhp_bp_sync_alive(cpu);
     845             :         if (ret)
     846             :                 goto out_unlock;
     847             : 
     848             :         ret = bringup_wait_for_ap_online(cpu);
     849             :         if (ret)
     850             :                 goto out_unlock;
     851             : 
     852             :         irq_unlock_sparse();
     853             : 
     854             :         if (st->target <= CPUHP_AP_ONLINE_IDLE)
     855             :                 return 0;
     856             : 
     857             :         return cpuhp_kick_ap(cpu, st, st->target);
     858             : 
     859             : out_unlock:
     860             :         irq_unlock_sparse();
     861             :         return ret;
     862             : }
     863             : #endif
     864             : 
     865             : static int finish_cpu(unsigned int cpu)
     866             : {
     867             :         struct task_struct *idle = idle_thread_get(cpu);
     868             :         struct mm_struct *mm = idle->active_mm;
     869             : 
     870             :         /*
     871             :          * idle_task_exit() will have switched to &init_mm, now
     872             :          * clean up any remaining active_mm state.
     873             :          */
     874             :         if (mm != &init_mm)
     875             :                 idle->active_mm = &init_mm;
     876             :         mmdrop_lazy_tlb(mm);
     877             :         return 0;
     878             : }
     879             : 
     880             : /*
     881             :  * Hotplug state machine related functions
     882             :  */
     883             : 
     884             : /*
     885             :  * Get the next state to run. Empty ones will be skipped. Returns true if a
     886             :  * state must be run.
     887             :  *
     888             :  * st->state will be modified ahead of time, to match state_to_run, as if it
     889             :  * has already ran.
     890             :  */
     891             : static bool cpuhp_next_state(bool bringup,
     892             :                              enum cpuhp_state *state_to_run,
     893             :                              struct cpuhp_cpu_state *st,
     894             :                              enum cpuhp_state target)
     895             : {
     896             :         do {
     897             :                 if (bringup) {
     898             :                         if (st->state >= target)
     899             :                                 return false;
     900             : 
     901             :                         *state_to_run = ++st->state;
     902             :                 } else {
     903             :                         if (st->state <= target)
     904             :                                 return false;
     905             : 
     906             :                         *state_to_run = st->state--;
     907             :                 }
     908             : 
     909             :                 if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
     910             :                         break;
     911             :         } while (true);
     912             : 
     913             :         return true;
     914             : }
     915             : 
     916             : static int __cpuhp_invoke_callback_range(bool bringup,
     917             :                                          unsigned int cpu,
     918             :                                          struct cpuhp_cpu_state *st,
     919             :                                          enum cpuhp_state target,
     920             :                                          bool nofail)
     921             : {
     922             :         enum cpuhp_state state;
     923             :         int ret = 0;
     924             : 
     925             :         while (cpuhp_next_state(bringup, &state, st, target)) {
     926             :                 int err;
     927             : 
     928             :                 err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
     929             :                 if (!err)
     930             :                         continue;
     931             : 
     932             :                 if (nofail) {
     933             :                         pr_warn("CPU %u %s state %s (%d) failed (%d)\n",
     934             :                                 cpu, bringup ? "UP" : "DOWN",
     935             :                                 cpuhp_get_step(st->state)->name,
     936             :                                 st->state, err);
     937             :                         ret = -1;
     938             :                 } else {
     939             :                         ret = err;
     940             :                         break;
     941             :                 }
     942             :         }
     943             : 
     944             :         return ret;
     945             : }
     946             : 
     947             : static inline int cpuhp_invoke_callback_range(bool bringup,
     948             :                                               unsigned int cpu,
     949             :                                               struct cpuhp_cpu_state *st,
     950             :                                               enum cpuhp_state target)
     951             : {
     952             :         return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false);
     953             : }
     954             : 
     955             : static inline void cpuhp_invoke_callback_range_nofail(bool bringup,
     956             :                                                       unsigned int cpu,
     957             :                                                       struct cpuhp_cpu_state *st,
     958             :                                                       enum cpuhp_state target)
     959             : {
     960             :         __cpuhp_invoke_callback_range(bringup, cpu, st, target, true);
     961             : }
     962             : 
     963             : static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
     964             : {
     965             :         if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
     966             :                 return true;
     967             :         /*
     968             :          * When CPU hotplug is disabled, then taking the CPU down is not
     969             :          * possible because takedown_cpu() and the architecture and
     970             :          * subsystem specific mechanisms are not available. So the CPU
     971             :          * which would be completely unplugged again needs to stay around
     972             :          * in the current state.
     973             :          */
     974             :         return st->state <= CPUHP_BRINGUP_CPU;
     975             : }
     976             : 
     977             : static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
     978             :                               enum cpuhp_state target)
     979             : {
     980             :         enum cpuhp_state prev_state = st->state;
     981             :         int ret = 0;
     982             : 
     983             :         ret = cpuhp_invoke_callback_range(true, cpu, st, target);
     984             :         if (ret) {
     985             :                 pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n",
     986             :                          ret, cpu, cpuhp_get_step(st->state)->name,
     987             :                          st->state);
     988             : 
     989             :                 cpuhp_reset_state(cpu, st, prev_state);
     990             :                 if (can_rollback_cpu(st))
     991             :                         WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
     992             :                                                             prev_state));
     993             :         }
     994             :         return ret;
     995             : }
     996             : 
     997             : /*
     998             :  * The cpu hotplug threads manage the bringup and teardown of the cpus
     999             :  */
    1000             : static int cpuhp_should_run(unsigned int cpu)
    1001             : {
    1002             :         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
    1003             : 
    1004             :         return st->should_run;
    1005             : }
    1006             : 
    1007             : /*
    1008             :  * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
    1009             :  * callbacks when a state gets [un]installed at runtime.
    1010             :  *
    1011             :  * Each invocation of this function by the smpboot thread does a single AP
    1012             :  * state callback.
    1013             :  *
    1014             :  * It has 3 modes of operation:
    1015             :  *  - single: runs st->cb_state
    1016             :  *  - up:     runs ++st->state, while st->state < st->target
    1017             :  *  - down:   runs st->state--, while st->state > st->target
    1018             :  *
    1019             :  * When complete or on error, should_run is cleared and the completion is fired.
    1020             :  */
    1021             : static void cpuhp_thread_fun(unsigned int cpu)
    1022             : {
    1023             :         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
    1024             :         bool bringup = st->bringup;
    1025             :         enum cpuhp_state state;
    1026             : 
    1027             :         if (WARN_ON_ONCE(!st->should_run))
    1028             :                 return;
    1029             : 
    1030             :         /*
    1031             :          * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
    1032             :          * that if we see ->should_run we also see the rest of the state.
    1033             :          */
    1034             :         smp_mb();
    1035             : 
    1036             :         /*
    1037             :          * The BP holds the hotplug lock, but we're now running on the AP,
    1038             :          * ensure that anybody asserting the lock is held, will actually find
    1039             :          * it so.
    1040             :          */
    1041             :         lockdep_acquire_cpus_lock();
    1042             :         cpuhp_lock_acquire(bringup);
    1043             : 
    1044             :         if (st->single) {
    1045             :                 state = st->cb_state;
    1046             :                 st->should_run = false;
    1047             :         } else {
    1048             :                 st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
    1049             :                 if (!st->should_run)
    1050             :                         goto end;
    1051             :         }
    1052             : 
    1053             :         WARN_ON_ONCE(!cpuhp_is_ap_state(state));
    1054             : 
    1055             :         if (cpuhp_is_atomic_state(state)) {
    1056             :                 local_irq_disable();
    1057             :                 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
    1058             :                 local_irq_enable();
    1059             : 
    1060             :                 /*
    1061             :                  * STARTING/DYING must not fail!
    1062             :                  */
    1063             :                 WARN_ON_ONCE(st->result);
    1064             :         } else {
    1065             :                 st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
    1066             :         }
    1067             : 
    1068             :         if (st->result) {
    1069             :                 /*
    1070             :                  * If we fail on a rollback, we're up a creek without no
    1071             :                  * paddle, no way forward, no way back. We loose, thanks for
    1072             :                  * playing.
    1073             :                  */
    1074             :                 WARN_ON_ONCE(st->rollback);
    1075             :                 st->should_run = false;
    1076             :         }
    1077             : 
    1078             : end:
    1079             :         cpuhp_lock_release(bringup);
    1080             :         lockdep_release_cpus_lock();
    1081             : 
    1082             :         if (!st->should_run)
    1083             :                 complete_ap_thread(st, bringup);
    1084             : }
    1085             : 
    1086             : /* Invoke a single callback on a remote cpu */
    1087             : static int
    1088             : cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
    1089             :                          struct hlist_node *node)
    1090             : {
    1091             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    1092             :         int ret;
    1093             : 
    1094             :         if (!cpu_online(cpu))
    1095             :                 return 0;
    1096             : 
    1097             :         cpuhp_lock_acquire(false);
    1098             :         cpuhp_lock_release(false);
    1099             : 
    1100             :         cpuhp_lock_acquire(true);
    1101             :         cpuhp_lock_release(true);
    1102             : 
    1103             :         /*
    1104             :          * If we are up and running, use the hotplug thread. For early calls
    1105             :          * we invoke the thread function directly.
    1106             :          */
    1107             :         if (!st->thread)
    1108             :                 return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
    1109             : 
    1110             :         st->rollback = false;
    1111             :         st->last = NULL;
    1112             : 
    1113             :         st->node = node;
    1114             :         st->bringup = bringup;
    1115             :         st->cb_state = state;
    1116             :         st->single = true;
    1117             : 
    1118             :         __cpuhp_kick_ap(st);
    1119             : 
    1120             :         /*
    1121             :          * If we failed and did a partial, do a rollback.
    1122             :          */
    1123             :         if ((ret = st->result) && st->last) {
    1124             :                 st->rollback = true;
    1125             :                 st->bringup = !bringup;
    1126             : 
    1127             :                 __cpuhp_kick_ap(st);
    1128             :         }
    1129             : 
    1130             :         /*
    1131             :          * Clean up the leftovers so the next hotplug operation wont use stale
    1132             :          * data.
    1133             :          */
    1134             :         st->node = st->last = NULL;
    1135             :         return ret;
    1136             : }
    1137             : 
    1138             : static int cpuhp_kick_ap_work(unsigned int cpu)
    1139             : {
    1140             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    1141             :         enum cpuhp_state prev_state = st->state;
    1142             :         int ret;
    1143             : 
    1144             :         cpuhp_lock_acquire(false);
    1145             :         cpuhp_lock_release(false);
    1146             : 
    1147             :         cpuhp_lock_acquire(true);
    1148             :         cpuhp_lock_release(true);
    1149             : 
    1150             :         trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
    1151             :         ret = cpuhp_kick_ap(cpu, st, st->target);
    1152             :         trace_cpuhp_exit(cpu, st->state, prev_state, ret);
    1153             : 
    1154             :         return ret;
    1155             : }
    1156             : 
    1157             : static struct smp_hotplug_thread cpuhp_threads = {
    1158             :         .store                  = &cpuhp_state.thread,
    1159             :         .thread_should_run      = cpuhp_should_run,
    1160             :         .thread_fn              = cpuhp_thread_fun,
    1161             :         .thread_comm            = "cpuhp/%u",
    1162             :         .selfparking            = true,
    1163             : };
    1164             : 
    1165             : static __init void cpuhp_init_state(void)
    1166             : {
    1167             :         struct cpuhp_cpu_state *st;
    1168             :         int cpu;
    1169             : 
    1170             :         for_each_possible_cpu(cpu) {
    1171             :                 st = per_cpu_ptr(&cpuhp_state, cpu);
    1172             :                 init_completion(&st->done_up);
    1173             :                 init_completion(&st->done_down);
    1174             :         }
    1175             : }
    1176             : 
    1177             : void __init cpuhp_threads_init(void)
    1178             : {
    1179             :         cpuhp_init_state();
    1180             :         BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
    1181             :         kthread_unpark(this_cpu_read(cpuhp_state.thread));
    1182             : }
    1183             : 
    1184             : /*
    1185             :  *
    1186             :  * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
    1187             :  * protected region.
    1188             :  *
    1189             :  * The operation is still serialized against concurrent CPU hotplug via
    1190             :  * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
    1191             :  * serialized against other hotplug related activity like adding or
    1192             :  * removing of state callbacks and state instances, which invoke either the
    1193             :  * startup or the teardown callback of the affected state.
    1194             :  *
    1195             :  * This is required for subsystems which are unfixable vs. CPU hotplug and
    1196             :  * evade lock inversion problems by scheduling work which has to be
    1197             :  * completed _before_ cpu_up()/_cpu_down() returns.
    1198             :  *
    1199             :  * Don't even think about adding anything to this for any new code or even
    1200             :  * drivers. It's only purpose is to keep existing lock order trainwrecks
    1201             :  * working.
    1202             :  *
    1203             :  * For cpu_down() there might be valid reasons to finish cleanups which are
    1204             :  * not required to be done under cpu_hotplug_lock, but that's a different
    1205             :  * story and would be not invoked via this.
    1206             :  */
    1207             : static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
    1208             : {
    1209             :         /*
    1210             :          * cpusets delegate hotplug operations to a worker to "solve" the
    1211             :          * lock order problems. Wait for the worker, but only if tasks are
    1212             :          * _not_ frozen (suspend, hibernate) as that would wait forever.
    1213             :          *
    1214             :          * The wait is required because otherwise the hotplug operation
    1215             :          * returns with inconsistent state, which could even be observed in
    1216             :          * user space when a new CPU is brought up. The CPU plug uevent
    1217             :          * would be delivered and user space reacting on it would fail to
    1218             :          * move tasks to the newly plugged CPU up to the point where the
    1219             :          * work has finished because up to that point the newly plugged CPU
    1220             :          * is not assignable in cpusets/cgroups. On unplug that's not
    1221             :          * necessarily a visible issue, but it is still inconsistent state,
    1222             :          * which is the real problem which needs to be "fixed". This can't
    1223             :          * prevent the transient state between scheduling the work and
    1224             :          * returning from waiting for it.
    1225             :          */
    1226             :         if (!tasks_frozen)
    1227             :                 cpuset_wait_for_hotplug();
    1228             : }
    1229             : 
    1230             : #ifdef CONFIG_HOTPLUG_CPU
    1231             : #ifndef arch_clear_mm_cpumask_cpu
    1232             : #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
    1233             : #endif
    1234             : 
    1235             : /**
    1236             :  * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
    1237             :  * @cpu: a CPU id
    1238             :  *
    1239             :  * This function walks all processes, finds a valid mm struct for each one and
    1240             :  * then clears a corresponding bit in mm's cpumask.  While this all sounds
    1241             :  * trivial, there are various non-obvious corner cases, which this function
    1242             :  * tries to solve in a safe manner.
    1243             :  *
    1244             :  * Also note that the function uses a somewhat relaxed locking scheme, so it may
    1245             :  * be called only for an already offlined CPU.
    1246             :  */
    1247             : void clear_tasks_mm_cpumask(int cpu)
    1248             : {
    1249             :         struct task_struct *p;
    1250             : 
    1251             :         /*
    1252             :          * This function is called after the cpu is taken down and marked
    1253             :          * offline, so its not like new tasks will ever get this cpu set in
    1254             :          * their mm mask. -- Peter Zijlstra
    1255             :          * Thus, we may use rcu_read_lock() here, instead of grabbing
    1256             :          * full-fledged tasklist_lock.
    1257             :          */
    1258             :         WARN_ON(cpu_online(cpu));
    1259             :         rcu_read_lock();
    1260             :         for_each_process(p) {
    1261             :                 struct task_struct *t;
    1262             : 
    1263             :                 /*
    1264             :                  * Main thread might exit, but other threads may still have
    1265             :                  * a valid mm. Find one.
    1266             :                  */
    1267             :                 t = find_lock_task_mm(p);
    1268             :                 if (!t)
    1269             :                         continue;
    1270             :                 arch_clear_mm_cpumask_cpu(cpu, t->mm);
    1271             :                 task_unlock(t);
    1272             :         }
    1273             :         rcu_read_unlock();
    1274             : }
    1275             : 
    1276             : /* Take this CPU down. */
    1277             : static int take_cpu_down(void *_param)
    1278             : {
    1279             :         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
    1280             :         enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
    1281             :         int err, cpu = smp_processor_id();
    1282             : 
    1283             :         /* Ensure this CPU doesn't handle any more interrupts. */
    1284             :         err = __cpu_disable();
    1285             :         if (err < 0)
    1286             :                 return err;
    1287             : 
    1288             :         /*
    1289             :          * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
    1290             :          * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
    1291             :          */
    1292             :         WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
    1293             : 
    1294             :         /*
    1295             :          * Invoke the former CPU_DYING callbacks. DYING must not fail!
    1296             :          */
    1297             :         cpuhp_invoke_callback_range_nofail(false, cpu, st, target);
    1298             : 
    1299             :         /* Give up timekeeping duties */
    1300             :         tick_handover_do_timer();
    1301             :         /* Remove CPU from timer broadcasting */
    1302             :         tick_offline_cpu(cpu);
    1303             :         /* Park the stopper thread */
    1304             :         stop_machine_park(cpu);
    1305             :         return 0;
    1306             : }
    1307             : 
    1308             : static int takedown_cpu(unsigned int cpu)
    1309             : {
    1310             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    1311             :         int err;
    1312             : 
    1313             :         /* Park the smpboot threads */
    1314             :         kthread_park(st->thread);
    1315             : 
    1316             :         /*
    1317             :          * Prevent irq alloc/free while the dying cpu reorganizes the
    1318             :          * interrupt affinities.
    1319             :          */
    1320             :         irq_lock_sparse();
    1321             : 
    1322             :         /*
    1323             :          * So now all preempt/rcu users must observe !cpu_active().
    1324             :          */
    1325             :         err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
    1326             :         if (err) {
    1327             :                 /* CPU refused to die */
    1328             :                 irq_unlock_sparse();
    1329             :                 /* Unpark the hotplug thread so we can rollback there */
    1330             :                 kthread_unpark(st->thread);
    1331             :                 return err;
    1332             :         }
    1333             :         BUG_ON(cpu_online(cpu));
    1334             : 
    1335             :         /*
    1336             :          * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
    1337             :          * all runnable tasks from the CPU, there's only the idle task left now
    1338             :          * that the migration thread is done doing the stop_machine thing.
    1339             :          *
    1340             :          * Wait for the stop thread to go away.
    1341             :          */
    1342             :         wait_for_ap_thread(st, false);
    1343             :         BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
    1344             : 
    1345             :         /* Interrupts are moved away from the dying cpu, reenable alloc/free */
    1346             :         irq_unlock_sparse();
    1347             : 
    1348             :         hotplug_cpu__broadcast_tick_pull(cpu);
    1349             :         /* This actually kills the CPU. */
    1350             :         __cpu_die(cpu);
    1351             : 
    1352             :         cpuhp_bp_sync_dead(cpu);
    1353             : 
    1354             :         tick_cleanup_dead_cpu(cpu);
    1355             :         rcutree_migrate_callbacks(cpu);
    1356             :         return 0;
    1357             : }
    1358             : 
    1359             : static void cpuhp_complete_idle_dead(void *arg)
    1360             : {
    1361             :         struct cpuhp_cpu_state *st = arg;
    1362             : 
    1363             :         complete_ap_thread(st, false);
    1364             : }
    1365             : 
    1366             : void cpuhp_report_idle_dead(void)
    1367             : {
    1368             :         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
    1369             : 
    1370             :         BUG_ON(st->state != CPUHP_AP_OFFLINE);
    1371             :         rcu_report_dead(smp_processor_id());
    1372             :         st->state = CPUHP_AP_IDLE_DEAD;
    1373             :         /*
    1374             :          * We cannot call complete after rcu_report_dead() so we delegate it
    1375             :          * to an online cpu.
    1376             :          */
    1377             :         smp_call_function_single(cpumask_first(cpu_online_mask),
    1378             :                                  cpuhp_complete_idle_dead, st, 0);
    1379             : }
    1380             : 
    1381             : static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
    1382             :                                 enum cpuhp_state target)
    1383             : {
    1384             :         enum cpuhp_state prev_state = st->state;
    1385             :         int ret = 0;
    1386             : 
    1387             :         ret = cpuhp_invoke_callback_range(false, cpu, st, target);
    1388             :         if (ret) {
    1389             :                 pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n",
    1390             :                          ret, cpu, cpuhp_get_step(st->state)->name,
    1391             :                          st->state);
    1392             : 
    1393             :                 cpuhp_reset_state(cpu, st, prev_state);
    1394             : 
    1395             :                 if (st->state < prev_state)
    1396             :                         WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
    1397             :                                                             prev_state));
    1398             :         }
    1399             : 
    1400             :         return ret;
    1401             : }
    1402             : 
    1403             : /* Requires cpu_add_remove_lock to be held */
    1404             : static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
    1405             :                            enum cpuhp_state target)
    1406             : {
    1407             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    1408             :         int prev_state, ret = 0;
    1409             : 
    1410             :         if (num_online_cpus() == 1)
    1411             :                 return -EBUSY;
    1412             : 
    1413             :         if (!cpu_present(cpu))
    1414             :                 return -EINVAL;
    1415             : 
    1416             :         cpus_write_lock();
    1417             : 
    1418             :         cpuhp_tasks_frozen = tasks_frozen;
    1419             : 
    1420             :         prev_state = cpuhp_set_state(cpu, st, target);
    1421             :         /*
    1422             :          * If the current CPU state is in the range of the AP hotplug thread,
    1423             :          * then we need to kick the thread.
    1424             :          */
    1425             :         if (st->state > CPUHP_TEARDOWN_CPU) {
    1426             :                 st->target = max((int)target, CPUHP_TEARDOWN_CPU);
    1427             :                 ret = cpuhp_kick_ap_work(cpu);
    1428             :                 /*
    1429             :                  * The AP side has done the error rollback already. Just
    1430             :                  * return the error code..
    1431             :                  */
    1432             :                 if (ret)
    1433             :                         goto out;
    1434             : 
    1435             :                 /*
    1436             :                  * We might have stopped still in the range of the AP hotplug
    1437             :                  * thread. Nothing to do anymore.
    1438             :                  */
    1439             :                 if (st->state > CPUHP_TEARDOWN_CPU)
    1440             :                         goto out;
    1441             : 
    1442             :                 st->target = target;
    1443             :         }
    1444             :         /*
    1445             :          * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
    1446             :          * to do the further cleanups.
    1447             :          */
    1448             :         ret = cpuhp_down_callbacks(cpu, st, target);
    1449             :         if (ret && st->state < prev_state) {
    1450             :                 if (st->state == CPUHP_TEARDOWN_CPU) {
    1451             :                         cpuhp_reset_state(cpu, st, prev_state);
    1452             :                         __cpuhp_kick_ap(st);
    1453             :                 } else {
    1454             :                         WARN(1, "DEAD callback error for CPU%d", cpu);
    1455             :                 }
    1456             :         }
    1457             : 
    1458             : out:
    1459             :         cpus_write_unlock();
    1460             :         /*
    1461             :          * Do post unplug cleanup. This is still protected against
    1462             :          * concurrent CPU hotplug via cpu_add_remove_lock.
    1463             :          */
    1464             :         lockup_detector_cleanup();
    1465             :         arch_smt_update();
    1466             :         cpu_up_down_serialize_trainwrecks(tasks_frozen);
    1467             :         return ret;
    1468             : }
    1469             : 
    1470             : static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
    1471             : {
    1472             :         /*
    1473             :          * If the platform does not support hotplug, report it explicitly to
    1474             :          * differentiate it from a transient offlining failure.
    1475             :          */
    1476             :         if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED))
    1477             :                 return -EOPNOTSUPP;
    1478             :         if (cpu_hotplug_disabled)
    1479             :                 return -EBUSY;
    1480             :         return _cpu_down(cpu, 0, target);
    1481             : }
    1482             : 
    1483             : static int cpu_down(unsigned int cpu, enum cpuhp_state target)
    1484             : {
    1485             :         int err;
    1486             : 
    1487             :         cpu_maps_update_begin();
    1488             :         err = cpu_down_maps_locked(cpu, target);
    1489             :         cpu_maps_update_done();
    1490             :         return err;
    1491             : }
    1492             : 
    1493             : /**
    1494             :  * cpu_device_down - Bring down a cpu device
    1495             :  * @dev: Pointer to the cpu device to offline
    1496             :  *
    1497             :  * This function is meant to be used by device core cpu subsystem only.
    1498             :  *
    1499             :  * Other subsystems should use remove_cpu() instead.
    1500             :  *
    1501             :  * Return: %0 on success or a negative errno code
    1502             :  */
    1503             : int cpu_device_down(struct device *dev)
    1504             : {
    1505             :         return cpu_down(dev->id, CPUHP_OFFLINE);
    1506             : }
    1507             : 
    1508             : int remove_cpu(unsigned int cpu)
    1509             : {
    1510             :         int ret;
    1511             : 
    1512             :         lock_device_hotplug();
    1513             :         ret = device_offline(get_cpu_device(cpu));
    1514             :         unlock_device_hotplug();
    1515             : 
    1516             :         return ret;
    1517             : }
    1518             : EXPORT_SYMBOL_GPL(remove_cpu);
    1519             : 
    1520             : void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
    1521             : {
    1522             :         unsigned int cpu;
    1523             :         int error;
    1524             : 
    1525             :         cpu_maps_update_begin();
    1526             : 
    1527             :         /*
    1528             :          * Make certain the cpu I'm about to reboot on is online.
    1529             :          *
    1530             :          * This is inline to what migrate_to_reboot_cpu() already do.
    1531             :          */
    1532             :         if (!cpu_online(primary_cpu))
    1533             :                 primary_cpu = cpumask_first(cpu_online_mask);
    1534             : 
    1535             :         for_each_online_cpu(cpu) {
    1536             :                 if (cpu == primary_cpu)
    1537             :                         continue;
    1538             : 
    1539             :                 error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
    1540             :                 if (error) {
    1541             :                         pr_err("Failed to offline CPU%d - error=%d",
    1542             :                                 cpu, error);
    1543             :                         break;
    1544             :                 }
    1545             :         }
    1546             : 
    1547             :         /*
    1548             :          * Ensure all but the reboot CPU are offline.
    1549             :          */
    1550             :         BUG_ON(num_online_cpus() > 1);
    1551             : 
    1552             :         /*
    1553             :          * Make sure the CPUs won't be enabled by someone else after this
    1554             :          * point. Kexec will reboot to a new kernel shortly resetting
    1555             :          * everything along the way.
    1556             :          */
    1557             :         cpu_hotplug_disabled++;
    1558             : 
    1559             :         cpu_maps_update_done();
    1560             : }
    1561             : 
    1562             : #else
    1563             : #define takedown_cpu            NULL
    1564             : #endif /*CONFIG_HOTPLUG_CPU*/
    1565             : 
    1566             : /**
    1567             :  * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
    1568             :  * @cpu: cpu that just started
    1569             :  *
    1570             :  * It must be called by the arch code on the new cpu, before the new cpu
    1571             :  * enables interrupts and before the "boot" cpu returns from __cpu_up().
    1572             :  */
    1573             : void notify_cpu_starting(unsigned int cpu)
    1574             : {
    1575             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    1576             :         enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
    1577             : 
    1578             :         rcu_cpu_starting(cpu);  /* Enables RCU usage on this CPU. */
    1579             :         cpumask_set_cpu(cpu, &cpus_booted_once_mask);
    1580             : 
    1581             :         /*
    1582             :          * STARTING must not fail!
    1583             :          */
    1584             :         cpuhp_invoke_callback_range_nofail(true, cpu, st, target);
    1585             : }
    1586             : 
    1587             : /*
    1588             :  * Called from the idle task. Wake up the controlling task which brings the
    1589             :  * hotplug thread of the upcoming CPU up and then delegates the rest of the
    1590             :  * online bringup to the hotplug thread.
    1591             :  */
    1592             : void cpuhp_online_idle(enum cpuhp_state state)
    1593             : {
    1594             :         struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
    1595             : 
    1596             :         /* Happens for the boot cpu */
    1597             :         if (state != CPUHP_AP_ONLINE_IDLE)
    1598             :                 return;
    1599             : 
    1600             :         cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE);
    1601             : 
    1602             :         /*
    1603             :          * Unpark the stopper thread before we start the idle loop (and start
    1604             :          * scheduling); this ensures the stopper task is always available.
    1605             :          */
    1606             :         stop_machine_unpark(smp_processor_id());
    1607             : 
    1608             :         st->state = CPUHP_AP_ONLINE_IDLE;
    1609             :         complete_ap_thread(st, true);
    1610             : }
    1611             : 
    1612             : /* Requires cpu_add_remove_lock to be held */
    1613             : static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
    1614             : {
    1615             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    1616             :         struct task_struct *idle;
    1617             :         int ret = 0;
    1618             : 
    1619             :         cpus_write_lock();
    1620             : 
    1621             :         if (!cpu_present(cpu)) {
    1622             :                 ret = -EINVAL;
    1623             :                 goto out;
    1624             :         }
    1625             : 
    1626             :         /*
    1627             :          * The caller of cpu_up() might have raced with another
    1628             :          * caller. Nothing to do.
    1629             :          */
    1630             :         if (st->state >= target)
    1631             :                 goto out;
    1632             : 
    1633             :         if (st->state == CPUHP_OFFLINE) {
    1634             :                 /* Let it fail before we try to bring the cpu up */
    1635             :                 idle = idle_thread_get(cpu);
    1636             :                 if (IS_ERR(idle)) {
    1637             :                         ret = PTR_ERR(idle);
    1638             :                         goto out;
    1639             :                 }
    1640             : 
    1641             :                 /*
    1642             :                  * Reset stale stack state from the last time this CPU was online.
    1643             :                  */
    1644             :                 scs_task_reset(idle);
    1645             :                 kasan_unpoison_task_stack(idle);
    1646             :         }
    1647             : 
    1648             :         cpuhp_tasks_frozen = tasks_frozen;
    1649             : 
    1650             :         cpuhp_set_state(cpu, st, target);
    1651             :         /*
    1652             :          * If the current CPU state is in the range of the AP hotplug thread,
    1653             :          * then we need to kick the thread once more.
    1654             :          */
    1655             :         if (st->state > CPUHP_BRINGUP_CPU) {
    1656             :                 ret = cpuhp_kick_ap_work(cpu);
    1657             :                 /*
    1658             :                  * The AP side has done the error rollback already. Just
    1659             :                  * return the error code..
    1660             :                  */
    1661             :                 if (ret)
    1662             :                         goto out;
    1663             :         }
    1664             : 
    1665             :         /*
    1666             :          * Try to reach the target state. We max out on the BP at
    1667             :          * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
    1668             :          * responsible for bringing it up to the target state.
    1669             :          */
    1670             :         target = min((int)target, CPUHP_BRINGUP_CPU);
    1671             :         ret = cpuhp_up_callbacks(cpu, st, target);
    1672             : out:
    1673             :         cpus_write_unlock();
    1674             :         arch_smt_update();
    1675             :         cpu_up_down_serialize_trainwrecks(tasks_frozen);
    1676             :         return ret;
    1677             : }
    1678             : 
    1679             : static int cpu_up(unsigned int cpu, enum cpuhp_state target)
    1680             : {
    1681             :         int err = 0;
    1682             : 
    1683             :         if (!cpu_possible(cpu)) {
    1684             :                 pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
    1685             :                        cpu);
    1686             : #if defined(CONFIG_IA64)
    1687             :                 pr_err("please check additional_cpus= boot parameter\n");
    1688             : #endif
    1689             :                 return -EINVAL;
    1690             :         }
    1691             : 
    1692             :         err = try_online_node(cpu_to_node(cpu));
    1693             :         if (err)
    1694             :                 return err;
    1695             : 
    1696             :         cpu_maps_update_begin();
    1697             : 
    1698             :         if (cpu_hotplug_disabled) {
    1699             :                 err = -EBUSY;
    1700             :                 goto out;
    1701             :         }
    1702             :         if (!cpu_smt_allowed(cpu)) {
    1703             :                 err = -EPERM;
    1704             :                 goto out;
    1705             :         }
    1706             : 
    1707             :         err = _cpu_up(cpu, 0, target);
    1708             : out:
    1709             :         cpu_maps_update_done();
    1710             :         return err;
    1711             : }
    1712             : 
    1713             : /**
    1714             :  * cpu_device_up - Bring up a cpu device
    1715             :  * @dev: Pointer to the cpu device to online
    1716             :  *
    1717             :  * This function is meant to be used by device core cpu subsystem only.
    1718             :  *
    1719             :  * Other subsystems should use add_cpu() instead.
    1720             :  *
    1721             :  * Return: %0 on success or a negative errno code
    1722             :  */
    1723             : int cpu_device_up(struct device *dev)
    1724             : {
    1725             :         return cpu_up(dev->id, CPUHP_ONLINE);
    1726             : }
    1727             : 
    1728             : int add_cpu(unsigned int cpu)
    1729             : {
    1730             :         int ret;
    1731             : 
    1732             :         lock_device_hotplug();
    1733             :         ret = device_online(get_cpu_device(cpu));
    1734             :         unlock_device_hotplug();
    1735             : 
    1736             :         return ret;
    1737             : }
    1738             : EXPORT_SYMBOL_GPL(add_cpu);
    1739             : 
    1740             : /**
    1741             :  * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
    1742             :  * @sleep_cpu: The cpu we hibernated on and should be brought up.
    1743             :  *
    1744             :  * On some architectures like arm64, we can hibernate on any CPU, but on
    1745             :  * wake up the CPU we hibernated on might be offline as a side effect of
    1746             :  * using maxcpus= for example.
    1747             :  *
    1748             :  * Return: %0 on success or a negative errno code
    1749             :  */
    1750             : int bringup_hibernate_cpu(unsigned int sleep_cpu)
    1751             : {
    1752             :         int ret;
    1753             : 
    1754             :         if (!cpu_online(sleep_cpu)) {
    1755             :                 pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
    1756             :                 ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
    1757             :                 if (ret) {
    1758             :                         pr_err("Failed to bring hibernate-CPU up!\n");
    1759             :                         return ret;
    1760             :                 }
    1761             :         }
    1762             :         return 0;
    1763             : }
    1764             : 
    1765             : static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus,
    1766             :                                       enum cpuhp_state target)
    1767             : {
    1768             :         unsigned int cpu;
    1769             : 
    1770             :         for_each_cpu(cpu, mask) {
    1771             :                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    1772             : 
    1773             :                 if (cpu_up(cpu, target) && can_rollback_cpu(st)) {
    1774             :                         /*
    1775             :                          * If this failed then cpu_up() might have only
    1776             :                          * rolled back to CPUHP_BP_KICK_AP for the final
    1777             :                          * online. Clean it up. NOOP if already rolled back.
    1778             :                          */
    1779             :                         WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE));
    1780             :                 }
    1781             : 
    1782             :                 if (!--ncpus)
    1783             :                         break;
    1784             :         }
    1785             : }
    1786             : 
    1787             : #ifdef CONFIG_HOTPLUG_PARALLEL
    1788             : static bool __cpuhp_parallel_bringup __ro_after_init = true;
    1789             : 
    1790             : static int __init parallel_bringup_parse_param(char *arg)
    1791             : {
    1792             :         return kstrtobool(arg, &__cpuhp_parallel_bringup);
    1793             : }
    1794             : early_param("cpuhp.parallel", parallel_bringup_parse_param);
    1795             : 
    1796             : /*
    1797             :  * On architectures which have enabled parallel bringup this invokes all BP
    1798             :  * prepare states for each of the to be onlined APs first. The last state
    1799             :  * sends the startup IPI to the APs. The APs proceed through the low level
    1800             :  * bringup code in parallel and then wait for the control CPU to release
    1801             :  * them one by one for the final onlining procedure.
    1802             :  *
    1803             :  * This avoids waiting for each AP to respond to the startup IPI in
    1804             :  * CPUHP_BRINGUP_CPU.
    1805             :  */
    1806             : static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus)
    1807             : {
    1808             :         const struct cpumask *mask = cpu_present_mask;
    1809             : 
    1810             :         if (__cpuhp_parallel_bringup)
    1811             :                 __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup();
    1812             :         if (!__cpuhp_parallel_bringup)
    1813             :                 return false;
    1814             : 
    1815             :         if (cpuhp_smt_aware()) {
    1816             :                 const struct cpumask *pmask = cpuhp_get_primary_thread_mask();
    1817             :                 static struct cpumask tmp_mask __initdata;
    1818             : 
    1819             :                 /*
    1820             :                  * X86 requires to prevent that SMT siblings stopped while
    1821             :                  * the primary thread does a microcode update for various
    1822             :                  * reasons. Bring the primary threads up first.
    1823             :                  */
    1824             :                 cpumask_and(&tmp_mask, mask, pmask);
    1825             :                 cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP);
    1826             :                 cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE);
    1827             :                 /* Account for the online CPUs */
    1828             :                 ncpus -= num_online_cpus();
    1829             :                 if (!ncpus)
    1830             :                         return true;
    1831             :                 /* Create the mask for secondary CPUs */
    1832             :                 cpumask_andnot(&tmp_mask, mask, pmask);
    1833             :                 mask = &tmp_mask;
    1834             :         }
    1835             : 
    1836             :         /* Bring the not-yet started CPUs up */
    1837             :         cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP);
    1838             :         cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE);
    1839             :         return true;
    1840             : }
    1841             : #else
    1842             : static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; }
    1843             : #endif /* CONFIG_HOTPLUG_PARALLEL */
    1844             : 
    1845             : void __init bringup_nonboot_cpus(unsigned int setup_max_cpus)
    1846             : {
    1847             :         /* Try parallel bringup optimization if enabled */
    1848             :         if (cpuhp_bringup_cpus_parallel(setup_max_cpus))
    1849             :                 return;
    1850             : 
    1851             :         /* Full per CPU serialized bringup */
    1852             :         cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE);
    1853             : }
    1854             : 
    1855             : #ifdef CONFIG_PM_SLEEP_SMP
    1856             : static cpumask_var_t frozen_cpus;
    1857             : 
    1858             : int freeze_secondary_cpus(int primary)
    1859             : {
    1860             :         int cpu, error = 0;
    1861             : 
    1862             :         cpu_maps_update_begin();
    1863             :         if (primary == -1) {
    1864             :                 primary = cpumask_first(cpu_online_mask);
    1865             :                 if (!housekeeping_cpu(primary, HK_TYPE_TIMER))
    1866             :                         primary = housekeeping_any_cpu(HK_TYPE_TIMER);
    1867             :         } else {
    1868             :                 if (!cpu_online(primary))
    1869             :                         primary = cpumask_first(cpu_online_mask);
    1870             :         }
    1871             : 
    1872             :         /*
    1873             :          * We take down all of the non-boot CPUs in one shot to avoid races
    1874             :          * with the userspace trying to use the CPU hotplug at the same time
    1875             :          */
    1876             :         cpumask_clear(frozen_cpus);
    1877             : 
    1878             :         pr_info("Disabling non-boot CPUs ...\n");
    1879             :         for_each_online_cpu(cpu) {
    1880             :                 if (cpu == primary)
    1881             :                         continue;
    1882             : 
    1883             :                 if (pm_wakeup_pending()) {
    1884             :                         pr_info("Wakeup pending. Abort CPU freeze\n");
    1885             :                         error = -EBUSY;
    1886             :                         break;
    1887             :                 }
    1888             : 
    1889             :                 trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
    1890             :                 error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
    1891             :                 trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
    1892             :                 if (!error)
    1893             :                         cpumask_set_cpu(cpu, frozen_cpus);
    1894             :                 else {
    1895             :                         pr_err("Error taking CPU%d down: %d\n", cpu, error);
    1896             :                         break;
    1897             :                 }
    1898             :         }
    1899             : 
    1900             :         if (!error)
    1901             :                 BUG_ON(num_online_cpus() > 1);
    1902             :         else
    1903             :                 pr_err("Non-boot CPUs are not disabled\n");
    1904             : 
    1905             :         /*
    1906             :          * Make sure the CPUs won't be enabled by someone else. We need to do
    1907             :          * this even in case of failure as all freeze_secondary_cpus() users are
    1908             :          * supposed to do thaw_secondary_cpus() on the failure path.
    1909             :          */
    1910             :         cpu_hotplug_disabled++;
    1911             : 
    1912             :         cpu_maps_update_done();
    1913             :         return error;
    1914             : }
    1915             : 
    1916             : void __weak arch_thaw_secondary_cpus_begin(void)
    1917             : {
    1918             : }
    1919             : 
    1920             : void __weak arch_thaw_secondary_cpus_end(void)
    1921             : {
    1922             : }
    1923             : 
    1924             : void thaw_secondary_cpus(void)
    1925             : {
    1926             :         int cpu, error;
    1927             : 
    1928             :         /* Allow everyone to use the CPU hotplug again */
    1929             :         cpu_maps_update_begin();
    1930             :         __cpu_hotplug_enable();
    1931             :         if (cpumask_empty(frozen_cpus))
    1932             :                 goto out;
    1933             : 
    1934             :         pr_info("Enabling non-boot CPUs ...\n");
    1935             : 
    1936             :         arch_thaw_secondary_cpus_begin();
    1937             : 
    1938             :         for_each_cpu(cpu, frozen_cpus) {
    1939             :                 trace_suspend_resume(TPS("CPU_ON"), cpu, true);
    1940             :                 error = _cpu_up(cpu, 1, CPUHP_ONLINE);
    1941             :                 trace_suspend_resume(TPS("CPU_ON"), cpu, false);
    1942             :                 if (!error) {
    1943             :                         pr_info("CPU%d is up\n", cpu);
    1944             :                         continue;
    1945             :                 }
    1946             :                 pr_warn("Error taking CPU%d up: %d\n", cpu, error);
    1947             :         }
    1948             : 
    1949             :         arch_thaw_secondary_cpus_end();
    1950             : 
    1951             :         cpumask_clear(frozen_cpus);
    1952             : out:
    1953             :         cpu_maps_update_done();
    1954             : }
    1955             : 
    1956             : static int __init alloc_frozen_cpus(void)
    1957             : {
    1958             :         if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
    1959             :                 return -ENOMEM;
    1960             :         return 0;
    1961             : }
    1962             : core_initcall(alloc_frozen_cpus);
    1963             : 
    1964             : /*
    1965             :  * When callbacks for CPU hotplug notifications are being executed, we must
    1966             :  * ensure that the state of the system with respect to the tasks being frozen
    1967             :  * or not, as reported by the notification, remains unchanged *throughout the
    1968             :  * duration* of the execution of the callbacks.
    1969             :  * Hence we need to prevent the freezer from racing with regular CPU hotplug.
    1970             :  *
    1971             :  * This synchronization is implemented by mutually excluding regular CPU
    1972             :  * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
    1973             :  * Hibernate notifications.
    1974             :  */
    1975             : static int
    1976             : cpu_hotplug_pm_callback(struct notifier_block *nb,
    1977             :                         unsigned long action, void *ptr)
    1978             : {
    1979             :         switch (action) {
    1980             : 
    1981             :         case PM_SUSPEND_PREPARE:
    1982             :         case PM_HIBERNATION_PREPARE:
    1983             :                 cpu_hotplug_disable();
    1984             :                 break;
    1985             : 
    1986             :         case PM_POST_SUSPEND:
    1987             :         case PM_POST_HIBERNATION:
    1988             :                 cpu_hotplug_enable();
    1989             :                 break;
    1990             : 
    1991             :         default:
    1992             :                 return NOTIFY_DONE;
    1993             :         }
    1994             : 
    1995             :         return NOTIFY_OK;
    1996             : }
    1997             : 
    1998             : 
    1999             : static int __init cpu_hotplug_pm_sync_init(void)
    2000             : {
    2001             :         /*
    2002             :          * cpu_hotplug_pm_callback has higher priority than x86
    2003             :          * bsp_pm_callback which depends on cpu_hotplug_pm_callback
    2004             :          * to disable cpu hotplug to avoid cpu hotplug race.
    2005             :          */
    2006             :         pm_notifier(cpu_hotplug_pm_callback, 0);
    2007             :         return 0;
    2008             : }
    2009             : core_initcall(cpu_hotplug_pm_sync_init);
    2010             : 
    2011             : #endif /* CONFIG_PM_SLEEP_SMP */
    2012             : 
    2013             : int __boot_cpu_id;
    2014             : 
    2015             : #endif /* CONFIG_SMP */
    2016             : 
    2017             : /* Boot processor state steps */
    2018             : static struct cpuhp_step cpuhp_hp_states[] = {
    2019             :         [CPUHP_OFFLINE] = {
    2020             :                 .name                   = "offline",
    2021             :                 .startup.single         = NULL,
    2022             :                 .teardown.single        = NULL,
    2023             :         },
    2024             : #ifdef CONFIG_SMP
    2025             :         [CPUHP_CREATE_THREADS]= {
    2026             :                 .name                   = "threads:prepare",
    2027             :                 .startup.single         = smpboot_create_threads,
    2028             :                 .teardown.single        = NULL,
    2029             :                 .cant_stop              = true,
    2030             :         },
    2031             :         [CPUHP_PERF_PREPARE] = {
    2032             :                 .name                   = "perf:prepare",
    2033             :                 .startup.single         = perf_event_init_cpu,
    2034             :                 .teardown.single        = perf_event_exit_cpu,
    2035             :         },
    2036             :         [CPUHP_RANDOM_PREPARE] = {
    2037             :                 .name                   = "random:prepare",
    2038             :                 .startup.single         = random_prepare_cpu,
    2039             :                 .teardown.single        = NULL,
    2040             :         },
    2041             :         [CPUHP_WORKQUEUE_PREP] = {
    2042             :                 .name                   = "workqueue:prepare",
    2043             :                 .startup.single         = workqueue_prepare_cpu,
    2044             :                 .teardown.single        = NULL,
    2045             :         },
    2046             :         [CPUHP_HRTIMERS_PREPARE] = {
    2047             :                 .name                   = "hrtimers:prepare",
    2048             :                 .startup.single         = hrtimers_prepare_cpu,
    2049             :                 .teardown.single        = hrtimers_dead_cpu,
    2050             :         },
    2051             :         [CPUHP_SMPCFD_PREPARE] = {
    2052             :                 .name                   = "smpcfd:prepare",
    2053             :                 .startup.single         = smpcfd_prepare_cpu,
    2054             :                 .teardown.single        = smpcfd_dead_cpu,
    2055             :         },
    2056             :         [CPUHP_RELAY_PREPARE] = {
    2057             :                 .name                   = "relay:prepare",
    2058             :                 .startup.single         = relay_prepare_cpu,
    2059             :                 .teardown.single        = NULL,
    2060             :         },
    2061             :         [CPUHP_SLAB_PREPARE] = {
    2062             :                 .name                   = "slab:prepare",
    2063             :                 .startup.single         = slab_prepare_cpu,
    2064             :                 .teardown.single        = slab_dead_cpu,
    2065             :         },
    2066             :         [CPUHP_RCUTREE_PREP] = {
    2067             :                 .name                   = "RCU/tree:prepare",
    2068             :                 .startup.single         = rcutree_prepare_cpu,
    2069             :                 .teardown.single        = rcutree_dead_cpu,
    2070             :         },
    2071             :         /*
    2072             :          * On the tear-down path, timers_dead_cpu() must be invoked
    2073             :          * before blk_mq_queue_reinit_notify() from notify_dead(),
    2074             :          * otherwise a RCU stall occurs.
    2075             :          */
    2076             :         [CPUHP_TIMERS_PREPARE] = {
    2077             :                 .name                   = "timers:prepare",
    2078             :                 .startup.single         = timers_prepare_cpu,
    2079             :                 .teardown.single        = timers_dead_cpu,
    2080             :         },
    2081             : 
    2082             : #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP
    2083             :         /*
    2084             :          * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until
    2085             :          * the next step will release it.
    2086             :          */
    2087             :         [CPUHP_BP_KICK_AP] = {
    2088             :                 .name                   = "cpu:kick_ap",
    2089             :                 .startup.single         = cpuhp_kick_ap_alive,
    2090             :         },
    2091             : 
    2092             :         /*
    2093             :          * Waits for the AP to reach cpuhp_ap_sync_alive() and then
    2094             :          * releases it for the complete bringup.
    2095             :          */
    2096             :         [CPUHP_BRINGUP_CPU] = {
    2097             :                 .name                   = "cpu:bringup",
    2098             :                 .startup.single         = cpuhp_bringup_ap,
    2099             :                 .teardown.single        = finish_cpu,
    2100             :                 .cant_stop              = true,
    2101             :         },
    2102             : #else
    2103             :         /*
    2104             :          * All-in-one CPU bringup state which includes the kick alive.
    2105             :          */
    2106             :         [CPUHP_BRINGUP_CPU] = {
    2107             :                 .name                   = "cpu:bringup",
    2108             :                 .startup.single         = bringup_cpu,
    2109             :                 .teardown.single        = finish_cpu,
    2110             :                 .cant_stop              = true,
    2111             :         },
    2112             : #endif
    2113             :         /* Final state before CPU kills itself */
    2114             :         [CPUHP_AP_IDLE_DEAD] = {
    2115             :                 .name                   = "idle:dead",
    2116             :         },
    2117             :         /*
    2118             :          * Last state before CPU enters the idle loop to die. Transient state
    2119             :          * for synchronization.
    2120             :          */
    2121             :         [CPUHP_AP_OFFLINE] = {
    2122             :                 .name                   = "ap:offline",
    2123             :                 .cant_stop              = true,
    2124             :         },
    2125             :         /* First state is scheduler control. Interrupts are disabled */
    2126             :         [CPUHP_AP_SCHED_STARTING] = {
    2127             :                 .name                   = "sched:starting",
    2128             :                 .startup.single         = sched_cpu_starting,
    2129             :                 .teardown.single        = sched_cpu_dying,
    2130             :         },
    2131             :         [CPUHP_AP_RCUTREE_DYING] = {
    2132             :                 .name                   = "RCU/tree:dying",
    2133             :                 .startup.single         = NULL,
    2134             :                 .teardown.single        = rcutree_dying_cpu,
    2135             :         },
    2136             :         [CPUHP_AP_SMPCFD_DYING] = {
    2137             :                 .name                   = "smpcfd:dying",
    2138             :                 .startup.single         = NULL,
    2139             :                 .teardown.single        = smpcfd_dying_cpu,
    2140             :         },
    2141             :         /* Entry state on starting. Interrupts enabled from here on. Transient
    2142             :          * state for synchronsization */
    2143             :         [CPUHP_AP_ONLINE] = {
    2144             :                 .name                   = "ap:online",
    2145             :         },
    2146             :         /*
    2147             :          * Handled on control processor until the plugged processor manages
    2148             :          * this itself.
    2149             :          */
    2150             :         [CPUHP_TEARDOWN_CPU] = {
    2151             :                 .name                   = "cpu:teardown",
    2152             :                 .startup.single         = NULL,
    2153             :                 .teardown.single        = takedown_cpu,
    2154             :                 .cant_stop              = true,
    2155             :         },
    2156             : 
    2157             :         [CPUHP_AP_SCHED_WAIT_EMPTY] = {
    2158             :                 .name                   = "sched:waitempty",
    2159             :                 .startup.single         = NULL,
    2160             :                 .teardown.single        = sched_cpu_wait_empty,
    2161             :         },
    2162             : 
    2163             :         /* Handle smpboot threads park/unpark */
    2164             :         [CPUHP_AP_SMPBOOT_THREADS] = {
    2165             :                 .name                   = "smpboot/threads:online",
    2166             :                 .startup.single         = smpboot_unpark_threads,
    2167             :                 .teardown.single        = smpboot_park_threads,
    2168             :         },
    2169             :         [CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
    2170             :                 .name                   = "irq/affinity:online",
    2171             :                 .startup.single         = irq_affinity_online_cpu,
    2172             :                 .teardown.single        = NULL,
    2173             :         },
    2174             :         [CPUHP_AP_PERF_ONLINE] = {
    2175             :                 .name                   = "perf:online",
    2176             :                 .startup.single         = perf_event_init_cpu,
    2177             :                 .teardown.single        = perf_event_exit_cpu,
    2178             :         },
    2179             :         [CPUHP_AP_WATCHDOG_ONLINE] = {
    2180             :                 .name                   = "lockup_detector:online",
    2181             :                 .startup.single         = lockup_detector_online_cpu,
    2182             :                 .teardown.single        = lockup_detector_offline_cpu,
    2183             :         },
    2184             :         [CPUHP_AP_WORKQUEUE_ONLINE] = {
    2185             :                 .name                   = "workqueue:online",
    2186             :                 .startup.single         = workqueue_online_cpu,
    2187             :                 .teardown.single        = workqueue_offline_cpu,
    2188             :         },
    2189             :         [CPUHP_AP_RANDOM_ONLINE] = {
    2190             :                 .name                   = "random:online",
    2191             :                 .startup.single         = random_online_cpu,
    2192             :                 .teardown.single        = NULL,
    2193             :         },
    2194             :         [CPUHP_AP_RCUTREE_ONLINE] = {
    2195             :                 .name                   = "RCU/tree:online",
    2196             :                 .startup.single         = rcutree_online_cpu,
    2197             :                 .teardown.single        = rcutree_offline_cpu,
    2198             :         },
    2199             : #endif
    2200             :         /*
    2201             :          * The dynamically registered state space is here
    2202             :          */
    2203             : 
    2204             : #ifdef CONFIG_SMP
    2205             :         /* Last state is scheduler control setting the cpu active */
    2206             :         [CPUHP_AP_ACTIVE] = {
    2207             :                 .name                   = "sched:active",
    2208             :                 .startup.single         = sched_cpu_activate,
    2209             :                 .teardown.single        = sched_cpu_deactivate,
    2210             :         },
    2211             : #endif
    2212             : 
    2213             :         /* CPU is fully up and running. */
    2214             :         [CPUHP_ONLINE] = {
    2215             :                 .name                   = "online",
    2216             :                 .startup.single         = NULL,
    2217             :                 .teardown.single        = NULL,
    2218             :         },
    2219             : };
    2220             : 
    2221             : /* Sanity check for callbacks */
    2222             : static int cpuhp_cb_check(enum cpuhp_state state)
    2223             : {
    2224          18 :         if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
    2225             :                 return -EINVAL;
    2226             :         return 0;
    2227             : }
    2228             : 
    2229             : /*
    2230             :  * Returns a free for dynamic slot assignment of the Online state. The states
    2231             :  * are protected by the cpuhp_slot_states mutex and an empty slot is identified
    2232             :  * by having no name assigned.
    2233             :  */
    2234           5 : static int cpuhp_reserve_state(enum cpuhp_state state)
    2235             : {
    2236             :         enum cpuhp_state i, end;
    2237             :         struct cpuhp_step *step;
    2238             : 
    2239           5 :         switch (state) {
    2240             :         case CPUHP_AP_ONLINE_DYN:
    2241             :                 step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
    2242             :                 end = CPUHP_AP_ONLINE_DYN_END;
    2243             :                 break;
    2244             :         case CPUHP_BP_PREPARE_DYN:
    2245           1 :                 step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
    2246           1 :                 end = CPUHP_BP_PREPARE_DYN_END;
    2247           1 :                 break;
    2248             :         default:
    2249             :                 return -EINVAL;
    2250             :         }
    2251             : 
    2252          11 :         for (i = state; i <= end; i++, step++) {
    2253          11 :                 if (!step->name)
    2254             :                         return i;
    2255             :         }
    2256           0 :         WARN(1, "No more dynamic states available for CPU hotplug\n");
    2257           0 :         return -ENOSPC;
    2258             : }
    2259             : 
    2260          18 : static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
    2261             :                                  int (*startup)(unsigned int cpu),
    2262             :                                  int (*teardown)(unsigned int cpu),
    2263             :                                  bool multi_instance)
    2264             : {
    2265             :         /* (Un)Install the callbacks for further cpu hotplug operations */
    2266             :         struct cpuhp_step *sp;
    2267          19 :         int ret = 0;
    2268             : 
    2269             :         /*
    2270             :          * If name is NULL, then the state gets removed.
    2271             :          *
    2272             :          * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
    2273             :          * the first allocation from these dynamic ranges, so the removal
    2274             :          * would trigger a new allocation and clear the wrong (already
    2275             :          * empty) state, leaving the callbacks of the to be cleared state
    2276             :          * dangling, which causes wreckage on the next hotplug operation.
    2277             :          */
    2278          36 :         if (name && (state == CPUHP_AP_ONLINE_DYN ||
    2279          18 :                      state == CPUHP_BP_PREPARE_DYN)) {
    2280           5 :                 ret = cpuhp_reserve_state(state);
    2281           5 :                 if (ret < 0)
    2282             :                         return ret;
    2283             :                 state = ret;
    2284             :         }
    2285          19 :         sp = cpuhp_get_step(state);
    2286          18 :         if (name && sp->name)
    2287             :                 return -EBUSY;
    2288             : 
    2289          19 :         sp->startup.single = startup;
    2290          19 :         sp->teardown.single = teardown;
    2291          19 :         sp->name = name;
    2292          19 :         sp->multi_instance = multi_instance;
    2293          19 :         INIT_HLIST_HEAD(&sp->list);
    2294          18 :         return ret;
    2295             : }
    2296             : 
    2297             : static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
    2298             : {
    2299           0 :         return cpuhp_get_step(state)->teardown.single;
    2300             : }
    2301             : 
    2302             : /*
    2303             :  * Call the startup/teardown function for a step either on the AP or
    2304             :  * on the current CPU.
    2305             :  */
    2306           3 : static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
    2307             :                             struct hlist_node *node)
    2308             : {
    2309           3 :         struct cpuhp_step *sp = cpuhp_get_step(state);
    2310             :         int ret;
    2311             : 
    2312             :         /*
    2313             :          * If there's nothing to do, we done.
    2314             :          * Relies on the union for multi_instance.
    2315             :          */
    2316           6 :         if (cpuhp_step_empty(bringup, sp))
    2317             :                 return 0;
    2318             :         /*
    2319             :          * The non AP bound callbacks can fail on bringup. On teardown
    2320             :          * e.g. module removal we crash for now.
    2321             :          */
    2322             : #ifdef CONFIG_SMP
    2323             :         if (cpuhp_is_ap_state(state))
    2324             :                 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
    2325             :         else
    2326             :                 ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
    2327             : #else
    2328           3 :         ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
    2329             : #endif
    2330           3 :         BUG_ON(ret && !bringup);
    2331             :         return ret;
    2332             : }
    2333             : 
    2334             : /*
    2335             :  * Called from __cpuhp_setup_state on a recoverable failure.
    2336             :  *
    2337             :  * Note: The teardown callbacks for rollback are not allowed to fail!
    2338             :  */
    2339           1 : static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
    2340             :                                    struct hlist_node *node)
    2341             : {
    2342             :         int cpu;
    2343             : 
    2344             :         /* Roll back the already executed steps on the other cpus */
    2345           1 :         for_each_present_cpu(cpu) {
    2346           1 :                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    2347           1 :                 int cpustate = st->state;
    2348             : 
    2349           1 :                 if (cpu >= failedcpu)
    2350             :                         break;
    2351             : 
    2352             :                 /* Did we invoke the startup call on that cpu ? */
    2353           0 :                 if (cpustate >= state)
    2354           0 :                         cpuhp_issue_call(cpu, state, false, node);
    2355             :         }
    2356           1 : }
    2357             : 
    2358           2 : int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
    2359             :                                           struct hlist_node *node,
    2360             :                                           bool invoke)
    2361             : {
    2362             :         struct cpuhp_step *sp;
    2363             :         int cpu;
    2364             :         int ret;
    2365             : 
    2366           2 :         lockdep_assert_cpus_held();
    2367             : 
    2368           2 :         sp = cpuhp_get_step(state);
    2369           2 :         if (sp->multi_instance == false)
    2370             :                 return -EINVAL;
    2371             : 
    2372           2 :         mutex_lock(&cpuhp_state_mutex);
    2373             : 
    2374           2 :         if (!invoke || !sp->startup.multi)
    2375             :                 goto add_node;
    2376             : 
    2377             :         /*
    2378             :          * Try to call the startup callback for each present cpu
    2379             :          * depending on the hotplug state of the cpu.
    2380             :          */
    2381           0 :         for_each_present_cpu(cpu) {
    2382           0 :                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    2383           0 :                 int cpustate = st->state;
    2384             : 
    2385           0 :                 if (cpustate < state)
    2386           0 :                         continue;
    2387             : 
    2388           0 :                 ret = cpuhp_issue_call(cpu, state, true, node);
    2389           0 :                 if (ret) {
    2390           0 :                         if (sp->teardown.multi)
    2391           0 :                                 cpuhp_rollback_install(cpu, state, node);
    2392             :                         goto unlock;
    2393             :                 }
    2394             :         }
    2395             : add_node:
    2396           2 :         ret = 0;
    2397           2 :         hlist_add_head(node, &sp->list);
    2398             : unlock:
    2399           2 :         mutex_unlock(&cpuhp_state_mutex);
    2400           2 :         return ret;
    2401             : }
    2402             : 
    2403           2 : int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
    2404             :                                bool invoke)
    2405             : {
    2406             :         int ret;
    2407             : 
    2408             :         cpus_read_lock();
    2409           2 :         ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
    2410             :         cpus_read_unlock();
    2411           2 :         return ret;
    2412             : }
    2413             : EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
    2414             : 
    2415             : /**
    2416             :  * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
    2417             :  * @state:              The state to setup
    2418             :  * @name:               Name of the step
    2419             :  * @invoke:             If true, the startup function is invoked for cpus where
    2420             :  *                      cpu state >= @state
    2421             :  * @startup:            startup callback function
    2422             :  * @teardown:           teardown callback function
    2423             :  * @multi_instance:     State is set up for multiple instances which get
    2424             :  *                      added afterwards.
    2425             :  *
    2426             :  * The caller needs to hold cpus read locked while calling this function.
    2427             :  * Return:
    2428             :  *   On success:
    2429             :  *      Positive state number if @state is CPUHP_AP_ONLINE_DYN;
    2430             :  *      0 for all other states
    2431             :  *   On failure: proper (negative) error code
    2432             :  */
    2433          18 : int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
    2434             :                                    const char *name, bool invoke,
    2435             :                                    int (*startup)(unsigned int cpu),
    2436             :                                    int (*teardown)(unsigned int cpu),
    2437             :                                    bool multi_instance)
    2438             : {
    2439          18 :         int cpu, ret = 0;
    2440             :         bool dynstate;
    2441             : 
    2442          18 :         lockdep_assert_cpus_held();
    2443             : 
    2444          18 :         if (cpuhp_cb_check(state) || !name)
    2445             :                 return -EINVAL;
    2446             : 
    2447          18 :         mutex_lock(&cpuhp_state_mutex);
    2448             : 
    2449          18 :         ret = cpuhp_store_callbacks(state, name, startup, teardown,
    2450             :                                     multi_instance);
    2451             : 
    2452          18 :         dynstate = state == CPUHP_AP_ONLINE_DYN;
    2453          18 :         if (ret > 0 && dynstate) {
    2454           4 :                 state = ret;
    2455           4 :                 ret = 0;
    2456             :         }
    2457             : 
    2458          18 :         if (ret || !invoke || !startup)
    2459             :                 goto out;
    2460             : 
    2461             :         /*
    2462             :          * Try to call the startup callback for each present cpu
    2463             :          * depending on the hotplug state of the cpu.
    2464             :          */
    2465           2 :         for_each_present_cpu(cpu) {
    2466           3 :                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    2467           3 :                 int cpustate = st->state;
    2468             : 
    2469           3 :                 if (cpustate < state)
    2470           0 :                         continue;
    2471             : 
    2472           3 :                 ret = cpuhp_issue_call(cpu, state, true, NULL);
    2473           3 :                 if (ret) {
    2474           1 :                         if (teardown)
    2475           1 :                                 cpuhp_rollback_install(cpu, state, NULL);
    2476             :                         cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
    2477             :                         goto out;
    2478             :                 }
    2479             :         }
    2480             : out:
    2481          18 :         mutex_unlock(&cpuhp_state_mutex);
    2482             :         /*
    2483             :          * If the requested state is CPUHP_AP_ONLINE_DYN, return the
    2484             :          * dynamically allocated state in case of success.
    2485             :          */
    2486          18 :         if (!ret && dynstate)
    2487             :                 return state;
    2488          14 :         return ret;
    2489             : }
    2490             : EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
    2491             : 
    2492          18 : int __cpuhp_setup_state(enum cpuhp_state state,
    2493             :                         const char *name, bool invoke,
    2494             :                         int (*startup)(unsigned int cpu),
    2495             :                         int (*teardown)(unsigned int cpu),
    2496             :                         bool multi_instance)
    2497             : {
    2498             :         int ret;
    2499             : 
    2500             :         cpus_read_lock();
    2501          18 :         ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
    2502             :                                              teardown, multi_instance);
    2503             :         cpus_read_unlock();
    2504          18 :         return ret;
    2505             : }
    2506             : EXPORT_SYMBOL(__cpuhp_setup_state);
    2507             : 
    2508           0 : int __cpuhp_state_remove_instance(enum cpuhp_state state,
    2509             :                                   struct hlist_node *node, bool invoke)
    2510             : {
    2511           0 :         struct cpuhp_step *sp = cpuhp_get_step(state);
    2512             :         int cpu;
    2513             : 
    2514           0 :         BUG_ON(cpuhp_cb_check(state));
    2515             : 
    2516           0 :         if (!sp->multi_instance)
    2517             :                 return -EINVAL;
    2518             : 
    2519             :         cpus_read_lock();
    2520           0 :         mutex_lock(&cpuhp_state_mutex);
    2521             : 
    2522           0 :         if (!invoke || !cpuhp_get_teardown_cb(state))
    2523             :                 goto remove;
    2524             :         /*
    2525             :          * Call the teardown callback for each present cpu depending
    2526             :          * on the hotplug state of the cpu. This function is not
    2527             :          * allowed to fail currently!
    2528             :          */
    2529           0 :         for_each_present_cpu(cpu) {
    2530           0 :                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    2531           0 :                 int cpustate = st->state;
    2532             : 
    2533           0 :                 if (cpustate >= state)
    2534           0 :                         cpuhp_issue_call(cpu, state, false, node);
    2535             :         }
    2536             : 
    2537             : remove:
    2538           0 :         hlist_del(node);
    2539           0 :         mutex_unlock(&cpuhp_state_mutex);
    2540             :         cpus_read_unlock();
    2541             : 
    2542           0 :         return 0;
    2543             : }
    2544             : EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
    2545             : 
    2546             : /**
    2547             :  * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
    2548             :  * @state:      The state to remove
    2549             :  * @invoke:     If true, the teardown function is invoked for cpus where
    2550             :  *              cpu state >= @state
    2551             :  *
    2552             :  * The caller needs to hold cpus read locked while calling this function.
    2553             :  * The teardown callback is currently not allowed to fail. Think
    2554             :  * about module removal!
    2555             :  */
    2556           0 : void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
    2557             : {
    2558           0 :         struct cpuhp_step *sp = cpuhp_get_step(state);
    2559             :         int cpu;
    2560             : 
    2561           0 :         BUG_ON(cpuhp_cb_check(state));
    2562             : 
    2563             :         lockdep_assert_cpus_held();
    2564             : 
    2565           0 :         mutex_lock(&cpuhp_state_mutex);
    2566           0 :         if (sp->multi_instance) {
    2567           0 :                 WARN(!hlist_empty(&sp->list),
    2568             :                      "Error: Removing state %d which has instances left.\n",
    2569             :                      state);
    2570             :                 goto remove;
    2571             :         }
    2572             : 
    2573           0 :         if (!invoke || !cpuhp_get_teardown_cb(state))
    2574             :                 goto remove;
    2575             : 
    2576             :         /*
    2577             :          * Call the teardown callback for each present cpu depending
    2578             :          * on the hotplug state of the cpu. This function is not
    2579             :          * allowed to fail currently!
    2580             :          */
    2581           0 :         for_each_present_cpu(cpu) {
    2582           0 :                 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
    2583           0 :                 int cpustate = st->state;
    2584             : 
    2585           0 :                 if (cpustate >= state)
    2586           0 :                         cpuhp_issue_call(cpu, state, false, NULL);
    2587             :         }
    2588             : remove:
    2589           0 :         cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
    2590           0 :         mutex_unlock(&cpuhp_state_mutex);
    2591           0 : }
    2592             : EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
    2593             : 
    2594           0 : void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
    2595             : {
    2596             :         cpus_read_lock();
    2597           0 :         __cpuhp_remove_state_cpuslocked(state, invoke);
    2598             :         cpus_read_unlock();
    2599           0 : }
    2600             : EXPORT_SYMBOL(__cpuhp_remove_state);
    2601             : 
    2602             : #ifdef CONFIG_HOTPLUG_SMT
    2603             : static void cpuhp_offline_cpu_device(unsigned int cpu)
    2604             : {
    2605             :         struct device *dev = get_cpu_device(cpu);
    2606             : 
    2607             :         dev->offline = true;
    2608             :         /* Tell user space about the state change */
    2609             :         kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
    2610             : }
    2611             : 
    2612             : static void cpuhp_online_cpu_device(unsigned int cpu)
    2613             : {
    2614             :         struct device *dev = get_cpu_device(cpu);
    2615             : 
    2616             :         dev->offline = false;
    2617             :         /* Tell user space about the state change */
    2618             :         kobject_uevent(&dev->kobj, KOBJ_ONLINE);
    2619             : }
    2620             : 
    2621             : int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
    2622             : {
    2623             :         int cpu, ret = 0;
    2624             : 
    2625             :         cpu_maps_update_begin();
    2626             :         for_each_online_cpu(cpu) {
    2627             :                 if (topology_is_primary_thread(cpu))
    2628             :                         continue;
    2629             :                 ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
    2630             :                 if (ret)
    2631             :                         break;
    2632             :                 /*
    2633             :                  * As this needs to hold the cpu maps lock it's impossible
    2634             :                  * to call device_offline() because that ends up calling
    2635             :                  * cpu_down() which takes cpu maps lock. cpu maps lock
    2636             :                  * needs to be held as this might race against in kernel
    2637             :                  * abusers of the hotplug machinery (thermal management).
    2638             :                  *
    2639             :                  * So nothing would update device:offline state. That would
    2640             :                  * leave the sysfs entry stale and prevent onlining after
    2641             :                  * smt control has been changed to 'off' again. This is
    2642             :                  * called under the sysfs hotplug lock, so it is properly
    2643             :                  * serialized against the regular offline usage.
    2644             :                  */
    2645             :                 cpuhp_offline_cpu_device(cpu);
    2646             :         }
    2647             :         if (!ret)
    2648             :                 cpu_smt_control = ctrlval;
    2649             :         cpu_maps_update_done();
    2650             :         return ret;
    2651             : }
    2652             : 
    2653             : int cpuhp_smt_enable(void)
    2654             : {
    2655             :         int cpu, ret = 0;
    2656             : 
    2657             :         cpu_maps_update_begin();
    2658             :         cpu_smt_control = CPU_SMT_ENABLED;
    2659             :         for_each_present_cpu(cpu) {
    2660             :                 /* Skip online CPUs and CPUs on offline nodes */
    2661             :                 if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
    2662             :                         continue;
    2663             :                 ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
    2664             :                 if (ret)
    2665             :                         break;
    2666             :                 /* See comment in cpuhp_smt_disable() */
    2667             :                 cpuhp_online_cpu_device(cpu);
    2668             :         }
    2669             :         cpu_maps_update_done();
    2670             :         return ret;
    2671             : }
    2672             : #endif
    2673             : 
    2674             : #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
    2675             : static ssize_t state_show(struct device *dev,
    2676             :                           struct device_attribute *attr, char *buf)
    2677             : {
    2678             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
    2679             : 
    2680             :         return sprintf(buf, "%d\n", st->state);
    2681             : }
    2682             : static DEVICE_ATTR_RO(state);
    2683             : 
    2684             : static ssize_t target_store(struct device *dev, struct device_attribute *attr,
    2685             :                             const char *buf, size_t count)
    2686             : {
    2687             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
    2688             :         struct cpuhp_step *sp;
    2689             :         int target, ret;
    2690             : 
    2691             :         ret = kstrtoint(buf, 10, &target);
    2692             :         if (ret)
    2693             :                 return ret;
    2694             : 
    2695             : #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
    2696             :         if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
    2697             :                 return -EINVAL;
    2698             : #else
    2699             :         if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
    2700             :                 return -EINVAL;
    2701             : #endif
    2702             : 
    2703             :         ret = lock_device_hotplug_sysfs();
    2704             :         if (ret)
    2705             :                 return ret;
    2706             : 
    2707             :         mutex_lock(&cpuhp_state_mutex);
    2708             :         sp = cpuhp_get_step(target);
    2709             :         ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
    2710             :         mutex_unlock(&cpuhp_state_mutex);
    2711             :         if (ret)
    2712             :                 goto out;
    2713             : 
    2714             :         if (st->state < target)
    2715             :                 ret = cpu_up(dev->id, target);
    2716             :         else if (st->state > target)
    2717             :                 ret = cpu_down(dev->id, target);
    2718             :         else if (WARN_ON(st->target != target))
    2719             :                 st->target = target;
    2720             : out:
    2721             :         unlock_device_hotplug();
    2722             :         return ret ? ret : count;
    2723             : }
    2724             : 
    2725             : static ssize_t target_show(struct device *dev,
    2726             :                            struct device_attribute *attr, char *buf)
    2727             : {
    2728             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
    2729             : 
    2730             :         return sprintf(buf, "%d\n", st->target);
    2731             : }
    2732             : static DEVICE_ATTR_RW(target);
    2733             : 
    2734             : static ssize_t fail_store(struct device *dev, struct device_attribute *attr,
    2735             :                           const char *buf, size_t count)
    2736             : {
    2737             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
    2738             :         struct cpuhp_step *sp;
    2739             :         int fail, ret;
    2740             : 
    2741             :         ret = kstrtoint(buf, 10, &fail);
    2742             :         if (ret)
    2743             :                 return ret;
    2744             : 
    2745             :         if (fail == CPUHP_INVALID) {
    2746             :                 st->fail = fail;
    2747             :                 return count;
    2748             :         }
    2749             : 
    2750             :         if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
    2751             :                 return -EINVAL;
    2752             : 
    2753             :         /*
    2754             :          * Cannot fail STARTING/DYING callbacks.
    2755             :          */
    2756             :         if (cpuhp_is_atomic_state(fail))
    2757             :                 return -EINVAL;
    2758             : 
    2759             :         /*
    2760             :          * DEAD callbacks cannot fail...
    2761             :          * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
    2762             :          * triggering STARTING callbacks, a failure in this state would
    2763             :          * hinder rollback.
    2764             :          */
    2765             :         if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
    2766             :                 return -EINVAL;
    2767             : 
    2768             :         /*
    2769             :          * Cannot fail anything that doesn't have callbacks.
    2770             :          */
    2771             :         mutex_lock(&cpuhp_state_mutex);
    2772             :         sp = cpuhp_get_step(fail);
    2773             :         if (!sp->startup.single && !sp->teardown.single)
    2774             :                 ret = -EINVAL;
    2775             :         mutex_unlock(&cpuhp_state_mutex);
    2776             :         if (ret)
    2777             :                 return ret;
    2778             : 
    2779             :         st->fail = fail;
    2780             : 
    2781             :         return count;
    2782             : }
    2783             : 
    2784             : static ssize_t fail_show(struct device *dev,
    2785             :                          struct device_attribute *attr, char *buf)
    2786             : {
    2787             :         struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
    2788             : 
    2789             :         return sprintf(buf, "%d\n", st->fail);
    2790             : }
    2791             : 
    2792             : static DEVICE_ATTR_RW(fail);
    2793             : 
    2794             : static struct attribute *cpuhp_cpu_attrs[] = {
    2795             :         &dev_attr_state.attr,
    2796             :         &dev_attr_target.attr,
    2797             :         &dev_attr_fail.attr,
    2798             :         NULL
    2799             : };
    2800             : 
    2801             : static const struct attribute_group cpuhp_cpu_attr_group = {
    2802             :         .attrs = cpuhp_cpu_attrs,
    2803             :         .name = "hotplug",
    2804             :         NULL
    2805             : };
    2806             : 
    2807             : static ssize_t states_show(struct device *dev,
    2808             :                                  struct device_attribute *attr, char *buf)
    2809             : {
    2810             :         ssize_t cur, res = 0;
    2811             :         int i;
    2812             : 
    2813             :         mutex_lock(&cpuhp_state_mutex);
    2814             :         for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
    2815             :                 struct cpuhp_step *sp = cpuhp_get_step(i);
    2816             : 
    2817             :                 if (sp->name) {
    2818             :                         cur = sprintf(buf, "%3d: %s\n", i, sp->name);
    2819             :                         buf += cur;
    2820             :                         res += cur;
    2821             :                 }
    2822             :         }
    2823             :         mutex_unlock(&cpuhp_state_mutex);
    2824             :         return res;
    2825             : }
    2826             : static DEVICE_ATTR_RO(states);
    2827             : 
    2828             : static struct attribute *cpuhp_cpu_root_attrs[] = {
    2829             :         &dev_attr_states.attr,
    2830             :         NULL
    2831             : };
    2832             : 
    2833             : static const struct attribute_group cpuhp_cpu_root_attr_group = {
    2834             :         .attrs = cpuhp_cpu_root_attrs,
    2835             :         .name = "hotplug",
    2836             :         NULL
    2837             : };
    2838             : 
    2839             : #ifdef CONFIG_HOTPLUG_SMT
    2840             : 
    2841             : static ssize_t
    2842             : __store_smt_control(struct device *dev, struct device_attribute *attr,
    2843             :                     const char *buf, size_t count)
    2844             : {
    2845             :         int ctrlval, ret;
    2846             : 
    2847             :         if (sysfs_streq(buf, "on"))
    2848             :                 ctrlval = CPU_SMT_ENABLED;
    2849             :         else if (sysfs_streq(buf, "off"))
    2850             :                 ctrlval = CPU_SMT_DISABLED;
    2851             :         else if (sysfs_streq(buf, "forceoff"))
    2852             :                 ctrlval = CPU_SMT_FORCE_DISABLED;
    2853             :         else
    2854             :                 return -EINVAL;
    2855             : 
    2856             :         if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
    2857             :                 return -EPERM;
    2858             : 
    2859             :         if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
    2860             :                 return -ENODEV;
    2861             : 
    2862             :         ret = lock_device_hotplug_sysfs();
    2863             :         if (ret)
    2864             :                 return ret;
    2865             : 
    2866             :         if (ctrlval != cpu_smt_control) {
    2867             :                 switch (ctrlval) {
    2868             :                 case CPU_SMT_ENABLED:
    2869             :                         ret = cpuhp_smt_enable();
    2870             :                         break;
    2871             :                 case CPU_SMT_DISABLED:
    2872             :                 case CPU_SMT_FORCE_DISABLED:
    2873             :                         ret = cpuhp_smt_disable(ctrlval);
    2874             :                         break;
    2875             :                 }
    2876             :         }
    2877             : 
    2878             :         unlock_device_hotplug();
    2879             :         return ret ? ret : count;
    2880             : }
    2881             : 
    2882             : #else /* !CONFIG_HOTPLUG_SMT */
    2883             : static ssize_t
    2884             : __store_smt_control(struct device *dev, struct device_attribute *attr,
    2885             :                     const char *buf, size_t count)
    2886             : {
    2887             :         return -ENODEV;
    2888             : }
    2889             : #endif /* CONFIG_HOTPLUG_SMT */
    2890             : 
    2891             : static const char *smt_states[] = {
    2892             :         [CPU_SMT_ENABLED]               = "on",
    2893             :         [CPU_SMT_DISABLED]              = "off",
    2894             :         [CPU_SMT_FORCE_DISABLED]        = "forceoff",
    2895             :         [CPU_SMT_NOT_SUPPORTED]         = "notsupported",
    2896             :         [CPU_SMT_NOT_IMPLEMENTED]       = "notimplemented",
    2897             : };
    2898             : 
    2899             : static ssize_t control_show(struct device *dev,
    2900             :                             struct device_attribute *attr, char *buf)
    2901             : {
    2902             :         const char *state = smt_states[cpu_smt_control];
    2903             : 
    2904             :         return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
    2905             : }
    2906             : 
    2907             : static ssize_t control_store(struct device *dev, struct device_attribute *attr,
    2908             :                              const char *buf, size_t count)
    2909             : {
    2910             :         return __store_smt_control(dev, attr, buf, count);
    2911             : }
    2912             : static DEVICE_ATTR_RW(control);
    2913             : 
    2914             : static ssize_t active_show(struct device *dev,
    2915             :                            struct device_attribute *attr, char *buf)
    2916             : {
    2917             :         return snprintf(buf, PAGE_SIZE - 2, "%d\n", sched_smt_active());
    2918             : }
    2919             : static DEVICE_ATTR_RO(active);
    2920             : 
    2921             : static struct attribute *cpuhp_smt_attrs[] = {
    2922             :         &dev_attr_control.attr,
    2923             :         &dev_attr_active.attr,
    2924             :         NULL
    2925             : };
    2926             : 
    2927             : static const struct attribute_group cpuhp_smt_attr_group = {
    2928             :         .attrs = cpuhp_smt_attrs,
    2929             :         .name = "smt",
    2930             :         NULL
    2931             : };
    2932             : 
    2933             : static int __init cpu_smt_sysfs_init(void)
    2934             : {
    2935             :         struct device *dev_root;
    2936             :         int ret = -ENODEV;
    2937             : 
    2938             :         dev_root = bus_get_dev_root(&cpu_subsys);
    2939             :         if (dev_root) {
    2940             :                 ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group);
    2941             :                 put_device(dev_root);
    2942             :         }
    2943             :         return ret;
    2944             : }
    2945             : 
    2946             : static int __init cpuhp_sysfs_init(void)
    2947             : {
    2948             :         struct device *dev_root;
    2949             :         int cpu, ret;
    2950             : 
    2951             :         ret = cpu_smt_sysfs_init();
    2952             :         if (ret)
    2953             :                 return ret;
    2954             : 
    2955             :         dev_root = bus_get_dev_root(&cpu_subsys);
    2956             :         if (dev_root) {
    2957             :                 ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group);
    2958             :                 put_device(dev_root);
    2959             :                 if (ret)
    2960             :                         return ret;
    2961             :         }
    2962             : 
    2963             :         for_each_possible_cpu(cpu) {
    2964             :                 struct device *dev = get_cpu_device(cpu);
    2965             : 
    2966             :                 if (!dev)
    2967             :                         continue;
    2968             :                 ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
    2969             :                 if (ret)
    2970             :                         return ret;
    2971             :         }
    2972             :         return 0;
    2973             : }
    2974             : device_initcall(cpuhp_sysfs_init);
    2975             : #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
    2976             : 
    2977             : /*
    2978             :  * cpu_bit_bitmap[] is a special, "compressed" data structure that
    2979             :  * represents all NR_CPUS bits binary values of 1<<nr.
    2980             :  *
    2981             :  * It is used by cpumask_of() to get a constant address to a CPU
    2982             :  * mask value that has a single bit set only.
    2983             :  */
    2984             : 
    2985             : /* cpu_bit_bitmap[0] is empty - so we can back into it */
    2986             : #define MASK_DECLARE_1(x)       [x+1][0] = (1UL << (x))
    2987             : #define MASK_DECLARE_2(x)       MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
    2988             : #define MASK_DECLARE_4(x)       MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
    2989             : #define MASK_DECLARE_8(x)       MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
    2990             : 
    2991             : const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
    2992             : 
    2993             :         MASK_DECLARE_8(0),      MASK_DECLARE_8(8),
    2994             :         MASK_DECLARE_8(16),     MASK_DECLARE_8(24),
    2995             : #if BITS_PER_LONG > 32
    2996             :         MASK_DECLARE_8(32),     MASK_DECLARE_8(40),
    2997             :         MASK_DECLARE_8(48),     MASK_DECLARE_8(56),
    2998             : #endif
    2999             : };
    3000             : EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
    3001             : 
    3002             : const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
    3003             : EXPORT_SYMBOL(cpu_all_bits);
    3004             : 
    3005             : #ifdef CONFIG_INIT_ALL_POSSIBLE
    3006             : struct cpumask __cpu_possible_mask __read_mostly
    3007             :         = {CPU_BITS_ALL};
    3008             : #else
    3009             : struct cpumask __cpu_possible_mask __read_mostly;
    3010             : #endif
    3011             : EXPORT_SYMBOL(__cpu_possible_mask);
    3012             : 
    3013             : struct cpumask __cpu_online_mask __read_mostly;
    3014             : EXPORT_SYMBOL(__cpu_online_mask);
    3015             : 
    3016             : struct cpumask __cpu_present_mask __read_mostly;
    3017             : EXPORT_SYMBOL(__cpu_present_mask);
    3018             : 
    3019             : struct cpumask __cpu_active_mask __read_mostly;
    3020             : EXPORT_SYMBOL(__cpu_active_mask);
    3021             : 
    3022             : struct cpumask __cpu_dying_mask __read_mostly;
    3023             : EXPORT_SYMBOL(__cpu_dying_mask);
    3024             : 
    3025             : atomic_t __num_online_cpus __read_mostly;
    3026             : EXPORT_SYMBOL(__num_online_cpus);
    3027             : 
    3028           0 : void init_cpu_present(const struct cpumask *src)
    3029             : {
    3030           0 :         cpumask_copy(&__cpu_present_mask, src);
    3031           0 : }
    3032             : 
    3033           0 : void init_cpu_possible(const struct cpumask *src)
    3034             : {
    3035           0 :         cpumask_copy(&__cpu_possible_mask, src);
    3036           0 : }
    3037             : 
    3038           0 : void init_cpu_online(const struct cpumask *src)
    3039             : {
    3040           0 :         cpumask_copy(&__cpu_online_mask, src);
    3041           0 : }
    3042             : 
    3043           0 : void set_cpu_online(unsigned int cpu, bool online)
    3044             : {
    3045             :         /*
    3046             :          * atomic_inc/dec() is required to handle the horrid abuse of this
    3047             :          * function by the reboot and kexec code which invoke it from
    3048             :          * IPI/NMI broadcasts when shutting down CPUs. Invocation from
    3049             :          * regular CPU hotplug is properly serialized.
    3050             :          *
    3051             :          * Note, that the fact that __num_online_cpus is of type atomic_t
    3052             :          * does not protect readers which are not serialized against
    3053             :          * concurrent hotplug operations.
    3054             :          */
    3055           0 :         if (online) {
    3056           2 :                 if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
    3057             :                         atomic_inc(&__num_online_cpus);
    3058             :         } else {
    3059           0 :                 if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
    3060             :                         atomic_dec(&__num_online_cpus);
    3061             :         }
    3062           0 : }
    3063             : 
    3064             : /*
    3065             :  * Activate the first processor.
    3066             :  */
    3067           1 : void __init boot_cpu_init(void)
    3068             : {
    3069           1 :         int cpu = smp_processor_id();
    3070             : 
    3071             :         /* Mark the boot cpu "present", "online" etc for SMP and UP case */
    3072           2 :         set_cpu_online(cpu, true);
    3073           2 :         set_cpu_active(cpu, true);
    3074           2 :         set_cpu_present(cpu, true);
    3075           2 :         set_cpu_possible(cpu, true);
    3076             : 
    3077             : #ifdef CONFIG_SMP
    3078             :         __boot_cpu_id = cpu;
    3079             : #endif
    3080           1 : }
    3081             : 
    3082             : /*
    3083             :  * Must be called _AFTER_ setting up the per_cpu areas
    3084             :  */
    3085           1 : void __init boot_cpu_hotplug_init(void)
    3086             : {
    3087             : #ifdef CONFIG_SMP
    3088             :         cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
    3089             :         atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);
    3090             : #endif
    3091           3 :         this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
    3092           3 :         this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);
    3093           1 : }
    3094             : 
    3095             : /*
    3096             :  * These are used for a global "mitigations=" cmdline option for toggling
    3097             :  * optional CPU mitigations.
    3098             :  */
    3099             : enum cpu_mitigations {
    3100             :         CPU_MITIGATIONS_OFF,
    3101             :         CPU_MITIGATIONS_AUTO,
    3102             :         CPU_MITIGATIONS_AUTO_NOSMT,
    3103             : };
    3104             : 
    3105             : static enum cpu_mitigations cpu_mitigations __ro_after_init =
    3106             :         CPU_MITIGATIONS_AUTO;
    3107             : 
    3108           0 : static int __init mitigations_parse_cmdline(char *arg)
    3109             : {
    3110           0 :         if (!strcmp(arg, "off"))
    3111           0 :                 cpu_mitigations = CPU_MITIGATIONS_OFF;
    3112           0 :         else if (!strcmp(arg, "auto"))
    3113           0 :                 cpu_mitigations = CPU_MITIGATIONS_AUTO;
    3114           0 :         else if (!strcmp(arg, "auto,nosmt"))
    3115           0 :                 cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
    3116             :         else
    3117           0 :                 pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
    3118             :                         arg);
    3119             : 
    3120           0 :         return 0;
    3121             : }
    3122             : early_param("mitigations", mitigations_parse_cmdline);
    3123             : 
    3124             : /* mitigations=off */
    3125           0 : bool cpu_mitigations_off(void)
    3126             : {
    3127           0 :         return cpu_mitigations == CPU_MITIGATIONS_OFF;
    3128             : }
    3129             : EXPORT_SYMBOL_GPL(cpu_mitigations_off);
    3130             : 
    3131             : /* mitigations=auto,nosmt */
    3132           0 : bool cpu_mitigations_auto_nosmt(void)
    3133             : {
    3134           0 :         return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
    3135             : }
    3136             : EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);

Generated by: LCOV version 1.14