LCOV - coverage.info - kernel/sched/sched.h

LCOV - code coverage report

Current view:	top level - kernel/sched - sched.h (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	43	82	52.4 %
Date:	2023-07-19 18:55:55	Functions:	1	3	33.3 %

          Line data    Source code

       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : /*
       3             :  * Scheduler internal types and methods:
       4             :  */
       5             : #ifndef _KERNEL_SCHED_SCHED_H
       6             : #define _KERNEL_SCHED_SCHED_H
       7             : 
       8             : #include <linux/sched/affinity.h>
       9             : #include <linux/sched/autogroup.h>
      10             : #include <linux/sched/cpufreq.h>
      11             : #include <linux/sched/deadline.h>
      12             : #include <linux/sched.h>
      13             : #include <linux/sched/loadavg.h>
      14             : #include <linux/sched/mm.h>
      15             : #include <linux/sched/rseq_api.h>
      16             : #include <linux/sched/signal.h>
      17             : #include <linux/sched/smt.h>
      18             : #include <linux/sched/stat.h>
      19             : #include <linux/sched/sysctl.h>
      20             : #include <linux/sched/task_flags.h>
      21             : #include <linux/sched/task.h>
      22             : #include <linux/sched/topology.h>
      23             : 
      24             : #include <linux/atomic.h>
      25             : #include <linux/bitmap.h>
      26             : #include <linux/bug.h>
      27             : #include <linux/capability.h>
      28             : #include <linux/cgroup_api.h>
      29             : #include <linux/cgroup.h>
      30             : #include <linux/context_tracking.h>
      31             : #include <linux/cpufreq.h>
      32             : #include <linux/cpumask_api.h>
      33             : #include <linux/ctype.h>
      34             : #include <linux/file.h>
      35             : #include <linux/fs_api.h>
      36             : #include <linux/hrtimer_api.h>
      37             : #include <linux/interrupt.h>
      38             : #include <linux/irq_work.h>
      39             : #include <linux/jiffies.h>
      40             : #include <linux/kref_api.h>
      41             : #include <linux/kthread.h>
      42             : #include <linux/ktime_api.h>
      43             : #include <linux/lockdep_api.h>
      44             : #include <linux/lockdep.h>
      45             : #include <linux/minmax.h>
      46             : #include <linux/mm.h>
      47             : #include <linux/module.h>
      48             : #include <linux/mutex_api.h>
      49             : #include <linux/plist.h>
      50             : #include <linux/poll.h>
      51             : #include <linux/proc_fs.h>
      52             : #include <linux/profile.h>
      53             : #include <linux/psi.h>
      54             : #include <linux/rcupdate.h>
      55             : #include <linux/seq_file.h>
      56             : #include <linux/seqlock.h>
      57             : #include <linux/softirq.h>
      58             : #include <linux/spinlock_api.h>
      59             : #include <linux/static_key.h>
      60             : #include <linux/stop_machine.h>
      61             : #include <linux/syscalls_api.h>
      62             : #include <linux/syscalls.h>
      63             : #include <linux/tick.h>
      64             : #include <linux/topology.h>
      65             : #include <linux/types.h>
      66             : #include <linux/u64_stats_sync_api.h>
      67             : #include <linux/uaccess.h>
      68             : #include <linux/wait_api.h>
      69             : #include <linux/wait_bit.h>
      70             : #include <linux/workqueue_api.h>
      71             : 
      72             : #include <trace/events/power.h>
      73             : #include <trace/events/sched.h>
      74             : 
      75             : #include "../workqueue_internal.h"
      76             : 
      77             : #ifdef CONFIG_CGROUP_SCHED
      78             : #include <linux/cgroup.h>
      79             : #include <linux/psi.h>
      80             : #endif
      81             : 
      82             : #ifdef CONFIG_SCHED_DEBUG
      83             : # include <linux/static_key.h>
      84             : #endif
      85             : 
      86             : #ifdef CONFIG_PARAVIRT
      87             : # include <asm/paravirt.h>
      88             : # include <asm/paravirt_api_clock.h>
      89             : #endif
      90             : 
      91             : #include "cpupri.h"
      92             : #include "cpudeadline.h"
      93             : 
      94             : #ifdef CONFIG_SCHED_DEBUG
      95             : # define SCHED_WARN_ON(x)      WARN_ONCE(x, #x)
      96             : #else
      97             : # define SCHED_WARN_ON(x)      ({ (void)(x), 0; })
      98             : #endif
      99             : 
     100             : struct rq;
     101             : struct cpuidle_state;
     102             : 
     103             : /* task_struct::on_rq states: */
     104             : #define TASK_ON_RQ_QUEUED       1
     105             : #define TASK_ON_RQ_MIGRATING    2
     106             : 
     107             : extern __read_mostly int scheduler_running;
     108             : 
     109             : extern unsigned long calc_load_update;
     110             : extern atomic_long_t calc_load_tasks;
     111             : 
     112             : extern unsigned int sysctl_sched_child_runs_first;
     113             : 
     114             : extern void calc_global_load_tick(struct rq *this_rq);
     115             : extern long calc_load_fold_active(struct rq *this_rq, long adjust);
     116             : 
     117             : extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
     118             : 
     119             : extern unsigned int sysctl_sched_rt_period;
     120             : extern int sysctl_sched_rt_runtime;
     121             : extern int sched_rr_timeslice;
     122             : 
     123             : /*
     124             :  * Helpers for converting nanosecond timing to jiffy resolution
     125             :  */
     126             : #define NS_TO_JIFFIES(TIME)     ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
     127             : 
     128             : /*
     129             :  * Increase resolution of nice-level calculations for 64-bit architectures.
     130             :  * The extra resolution improves shares distribution and load balancing of
     131             :  * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
     132             :  * hierarchies, especially on larger systems. This is not a user-visible change
     133             :  * and does not change the user-interface for setting shares/weights.
     134             :  *
     135             :  * We increase resolution only if we have enough bits to allow this increased
     136             :  * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
     137             :  * are pretty high and the returns do not justify the increased costs.
     138             :  *
     139             :  * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
     140             :  * increase coverage and consistency always enable it on 64-bit platforms.
     141             :  */
     142             : #ifdef CONFIG_64BIT
     143             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
     144             : # define scale_load(w)          ((w) << SCHED_FIXEDPOINT_SHIFT)
     145             : # define scale_load_down(w) \
     146             : ({ \
     147             :         unsigned long __w = (w); \
     148             :         if (__w) \
     149             :                 __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
     150             :         __w; \
     151             : })
     152             : #else
     153             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT)
     154             : # define scale_load(w)          (w)
     155             : # define scale_load_down(w)     (w)
     156             : #endif
     157             : 
     158             : /*
     159             :  * Task weight (visible to users) and its load (invisible to users) have
     160             :  * independent resolution, but they should be well calibrated. We use
     161             :  * scale_load() and scale_load_down(w) to convert between them. The
     162             :  * following must be true:
     163             :  *
     164             :  *  scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
     165             :  *
     166             :  */
     167             : #define NICE_0_LOAD             (1L << NICE_0_LOAD_SHIFT)
     168             : 
     169             : /*
     170             :  * Single value that decides SCHED_DEADLINE internal math precision.
     171             :  * 10 -> just above 1us
     172             :  * 9  -> just above 0.5us
     173             :  */
     174             : #define DL_SCALE                10
     175             : 
     176             : /*
     177             :  * Single value that denotes runtime == period, ie unlimited time.
     178             :  */
     179             : #define RUNTIME_INF             ((u64)~0ULL)
     180             : 
     181             : static inline int idle_policy(int policy)
     182             : {
     183        6917 :         return policy == SCHED_IDLE;
     184             : }
     185             : static inline int fair_policy(int policy)
     186             : {
     187         760 :         return policy == SCHED_NORMAL || policy == SCHED_BATCH;
     188             : }
     189             : 
     190             : static inline int rt_policy(int policy)
     191             : {
     192         390 :         return policy == SCHED_FIFO || policy == SCHED_RR;
     193             : }
     194             : 
     195             : static inline int dl_policy(int policy)
     196             : {
     197             :         return policy == SCHED_DEADLINE;
     198             : }
     199             : static inline bool valid_policy(int policy)
     200             : {
     201         760 :         return idle_policy(policy) || fair_policy(policy) ||
     202         380 :                 rt_policy(policy) || dl_policy(policy);
     203             : }
     204             : 
     205             : static inline int task_has_idle_policy(struct task_struct *p)
     206             : {
     207       13840 :         return idle_policy(p->policy);
     208             : }
     209             : 
     210             : static inline int task_has_rt_policy(struct task_struct *p)
     211             : {
     212          10 :         return rt_policy(p->policy);
     213             : }
     214             : 
     215             : static inline int task_has_dl_policy(struct task_struct *p)
     216             : {
     217           5 :         return dl_policy(p->policy);
     218             : }
     219             : 
     220             : #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
     221             : 
     222             : static inline void update_avg(u64 *avg, u64 sample)
     223             : {
     224             :         s64 diff = sample - *avg;
     225             :         *avg += diff / 8;
     226             : }
     227             : 
     228             : /*
     229             :  * Shifting a value by an exponent greater *or equal* to the size of said value
     230             :  * is UB; cap at size-1.
     231             :  */
     232             : #define shr_bound(val, shift)                                                   \
     233             :         (val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
     234             : 
     235             : /*
     236             :  * !! For sched_setattr_nocheck() (kernel) only !!
     237             :  *
     238             :  * This is actually gross. :(
     239             :  *
     240             :  * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
     241             :  * tasks, but still be able to sleep. We need this on platforms that cannot
     242             :  * atomically change clock frequency. Remove once fast switching will be
     243             :  * available on such platforms.
     244             :  *
     245             :  * SUGOV stands for SchedUtil GOVernor.
     246             :  */
     247             : #define SCHED_FLAG_SUGOV        0x10000000
     248             : 
     249             : #define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV)
     250             : 
     251             : static inline bool dl_entity_is_special(const struct sched_dl_entity *dl_se)
     252             : {
     253             : #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
     254             :         return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
     255             : #else
     256             :         return false;
     257             : #endif
     258             : }
     259             : 
     260             : /*
     261             :  * Tells if entity @a should preempt entity @b.
     262             :  */
     263             : static inline bool dl_entity_preempt(const struct sched_dl_entity *a,
     264             :                                      const struct sched_dl_entity *b)
     265             : {
     266           0 :         return dl_entity_is_special(a) ||
     267           0 :                dl_time_before(a->deadline, b->deadline);
     268             : }
     269             : 
     270             : /*
     271             :  * This is the priority-queue data structure of the RT scheduling class:
     272             :  */
     273             : struct rt_prio_array {
     274             :         DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
     275             :         struct list_head queue[MAX_RT_PRIO];
     276             : };
     277             : 
     278             : struct rt_bandwidth {
     279             :         /* nests inside the rq lock: */
     280             :         raw_spinlock_t          rt_runtime_lock;
     281             :         ktime_t                 rt_period;
     282             :         u64                     rt_runtime;
     283             :         struct hrtimer          rt_period_timer;
     284             :         unsigned int            rt_period_active;
     285             : };
     286             : 
     287             : void __dl_clear_params(struct task_struct *p);
     288             : 
     289             : struct dl_bandwidth {
     290             :         raw_spinlock_t          dl_runtime_lock;
     291             :         u64                     dl_runtime;
     292             :         u64                     dl_period;
     293             : };
     294             : 
     295             : static inline int dl_bandwidth_enabled(void)
     296             : {
     297             :         return sysctl_sched_rt_runtime >= 0;
     298             : }
     299             : 
     300             : /*
     301             :  * To keep the bandwidth of -deadline tasks under control
     302             :  * we need some place where:
     303             :  *  - store the maximum -deadline bandwidth of each cpu;
     304             :  *  - cache the fraction of bandwidth that is currently allocated in
     305             :  *    each root domain;
     306             :  *
     307             :  * This is all done in the data structure below. It is similar to the
     308             :  * one used for RT-throttling (rt_bandwidth), with the main difference
     309             :  * that, since here we are only interested in admission control, we
     310             :  * do not decrease any runtime while the group "executes", neither we
     311             :  * need a timer to replenish it.
     312             :  *
     313             :  * With respect to SMP, bandwidth is given on a per root domain basis,
     314             :  * meaning that:
     315             :  *  - bw (< 100%) is the deadline bandwidth of each CPU;
     316             :  *  - total_bw is the currently allocated bandwidth in each root domain;
     317             :  */
     318             : struct dl_bw {
     319             :         raw_spinlock_t          lock;
     320             :         u64                     bw;
     321             :         u64                     total_bw;
     322             : };
     323             : 
     324             : extern void init_dl_bw(struct dl_bw *dl_b);
     325             : extern int  sched_dl_global_validate(void);
     326             : extern void sched_dl_do_global(void);
     327             : extern int  sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
     328             : extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
     329             : extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
     330             : extern bool __checkparam_dl(const struct sched_attr *attr);
     331             : extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
     332             : extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
     333             : extern int  dl_cpu_busy(int cpu, struct task_struct *p);
     334             : 
     335             : #ifdef CONFIG_CGROUP_SCHED
     336             : 
     337             : struct cfs_rq;
     338             : struct rt_rq;
     339             : 
     340             : extern struct list_head task_groups;
     341             : 
     342             : struct cfs_bandwidth {
     343             : #ifdef CONFIG_CFS_BANDWIDTH
     344             :         raw_spinlock_t          lock;
     345             :         ktime_t                 period;
     346             :         u64                     quota;
     347             :         u64                     runtime;
     348             :         u64                     burst;
     349             :         u64                     runtime_snap;
     350             :         s64                     hierarchical_quota;
     351             : 
     352             :         u8                      idle;
     353             :         u8                      period_active;
     354             :         u8                      slack_started;
     355             :         struct hrtimer          period_timer;
     356             :         struct hrtimer          slack_timer;
     357             :         struct list_head        throttled_cfs_rq;
     358             : 
     359             :         /* Statistics: */
     360             :         int                     nr_periods;
     361             :         int                     nr_throttled;
     362             :         int                     nr_burst;
     363             :         u64                     throttled_time;
     364             :         u64                     burst_time;
     365             : #endif
     366             : };
     367             : 
     368             : /* Task group related information */
     369             : struct task_group {
     370             :         struct cgroup_subsys_state css;
     371             : 
     372             : #ifdef CONFIG_FAIR_GROUP_SCHED
     373             :         /* schedulable entities of this group on each CPU */
     374             :         struct sched_entity     **se;
     375             :         /* runqueue "owned" by this group on each CPU */
     376             :         struct cfs_rq           **cfs_rq;
     377             :         unsigned long           shares;
     378             : 
     379             :         /* A positive value indicates that this is a SCHED_IDLE group. */
     380             :         int                     idle;
     381             : 
     382             : #ifdef  CONFIG_SMP
     383             :         /*
     384             :          * load_avg can be heavily contended at clock tick time, so put
     385             :          * it in its own cacheline separated from the fields above which
     386             :          * will also be accessed at each tick.
     387             :          */
     388             :         atomic_long_t           load_avg ____cacheline_aligned;
     389             : #endif
     390             : #endif
     391             : 
     392             : #ifdef CONFIG_RT_GROUP_SCHED
     393             :         struct sched_rt_entity  **rt_se;
     394             :         struct rt_rq            **rt_rq;
     395             : 
     396             :         struct rt_bandwidth     rt_bandwidth;
     397             : #endif
     398             : 
     399             :         struct rcu_head         rcu;
     400             :         struct list_head        list;
     401             : 
     402             :         struct task_group       *parent;
     403             :         struct list_head        siblings;
     404             :         struct list_head        children;
     405             : 
     406             : #ifdef CONFIG_SCHED_AUTOGROUP
     407             :         struct autogroup        *autogroup;
     408             : #endif
     409             : 
     410             :         struct cfs_bandwidth    cfs_bandwidth;
     411             : 
     412             : #ifdef CONFIG_UCLAMP_TASK_GROUP
     413             :         /* The two decimal precision [%] value requested from user-space */
     414             :         unsigned int            uclamp_pct[UCLAMP_CNT];
     415             :         /* Clamp values requested for a task group */
     416             :         struct uclamp_se        uclamp_req[UCLAMP_CNT];
     417             :         /* Effective clamp values used for a task group */
     418             :         struct uclamp_se        uclamp[UCLAMP_CNT];
     419             : #endif
     420             : 
     421             : };
     422             : 
     423             : #ifdef CONFIG_FAIR_GROUP_SCHED
     424             : #define ROOT_TASK_GROUP_LOAD    NICE_0_LOAD
     425             : 
     426             : /*
     427             :  * A weight of 0 or 1 can cause arithmetics problems.
     428             :  * A weight of a cfs_rq is the sum of weights of which entities
     429             :  * are queued on this cfs_rq, so a weight of a entity should not be
     430             :  * too large, so as the shares value of a task group.
     431             :  * (The default weight is 1024 - so there's no practical
     432             :  *  limitation from this.)
     433             :  */
     434             : #define MIN_SHARES              (1UL <<  1)
     435             : #define MAX_SHARES              (1UL << 18)
     436             : #endif
     437             : 
     438             : typedef int (*tg_visitor)(struct task_group *, void *);
     439             : 
     440             : extern int walk_tg_tree_from(struct task_group *from,
     441             :                              tg_visitor down, tg_visitor up, void *data);
     442             : 
     443             : /*
     444             :  * Iterate the full tree, calling @down when first entering a node and @up when
     445             :  * leaving it for the final time.
     446             :  *
     447             :  * Caller must hold rcu_lock or sufficient equivalent.
     448             :  */
     449             : static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
     450             : {
     451             :         return walk_tg_tree_from(&root_task_group, down, up, data);
     452             : }
     453             : 
     454             : extern int tg_nop(struct task_group *tg, void *data);
     455             : 
     456             : extern void free_fair_sched_group(struct task_group *tg);
     457             : extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
     458             : extern void online_fair_sched_group(struct task_group *tg);
     459             : extern void unregister_fair_sched_group(struct task_group *tg);
     460             : extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
     461             :                         struct sched_entity *se, int cpu,
     462             :                         struct sched_entity *parent);
     463             : extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     464             : 
     465             : extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
     466             : extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     467             : extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
     468             : 
     469             : extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
     470             :                 struct sched_rt_entity *rt_se, int cpu,
     471             :                 struct sched_rt_entity *parent);
     472             : extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
     473             : extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
     474             : extern long sched_group_rt_runtime(struct task_group *tg);
     475             : extern long sched_group_rt_period(struct task_group *tg);
     476             : extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
     477             : 
     478             : extern struct task_group *sched_create_group(struct task_group *parent);
     479             : extern void sched_online_group(struct task_group *tg,
     480             :                                struct task_group *parent);
     481             : extern void sched_destroy_group(struct task_group *tg);
     482             : extern void sched_release_group(struct task_group *tg);
     483             : 
     484             : extern void sched_move_task(struct task_struct *tsk);
     485             : 
     486             : #ifdef CONFIG_FAIR_GROUP_SCHED
     487             : extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
     488             : 
     489             : extern int sched_group_set_idle(struct task_group *tg, long idle);
     490             : 
     491             : #ifdef CONFIG_SMP
     492             : extern void set_task_rq_fair(struct sched_entity *se,
     493             :                              struct cfs_rq *prev, struct cfs_rq *next);
     494             : #else /* !CONFIG_SMP */
     495             : static inline void set_task_rq_fair(struct sched_entity *se,
     496             :                              struct cfs_rq *prev, struct cfs_rq *next) { }
     497             : #endif /* CONFIG_SMP */
     498             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     499             : 
     500             : #else /* CONFIG_CGROUP_SCHED */
     501             : 
     502             : struct cfs_bandwidth { };
     503             : 
     504             : #endif  /* CONFIG_CGROUP_SCHED */
     505             : 
     506             : extern void unregister_rt_sched_group(struct task_group *tg);
     507             : extern void free_rt_sched_group(struct task_group *tg);
     508             : extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
     509             : 
     510             : /*
     511             :  * u64_u32_load/u64_u32_store
     512             :  *
     513             :  * Use a copy of a u64 value to protect against data race. This is only
     514             :  * applicable for 32-bits architectures.
     515             :  */
     516             : #ifdef CONFIG_64BIT
     517             : # define u64_u32_load_copy(var, copy)       var
     518             : # define u64_u32_store_copy(var, copy, val) (var = val)
     519             : #else
     520             : # define u64_u32_load_copy(var, copy)                                   \
     521             : ({                                                                      \
     522             :         u64 __val, __val_copy;                                          \
     523             :         do {                                                            \
     524             :                 __val_copy = copy;                                      \
     525             :                 /*                                                      \
     526             :                  * paired with u64_u32_store_copy(), ordering access    \
     527             :                  * to var and copy.                                     \
     528             :                  */                                                     \
     529             :                 smp_rmb();                                              \
     530             :                 __val = var;                                            \
     531             :         } while (__val != __val_copy);                                  \
     532             :         __val;                                                          \
     533             : })
     534             : # define u64_u32_store_copy(var, copy, val)                             \
     535             : do {                                                                    \
     536             :         typeof(val) __val = (val);                                      \
     537             :         var = __val;                                                    \
     538             :         /*                                                              \
     539             :          * paired with u64_u32_load_copy(), ordering access to var and  \
     540             :          * copy.                                                        \
     541             :          */                                                             \
     542             :         smp_wmb();                                                      \
     543             :         copy = __val;                                                   \
     544             : } while (0)
     545             : #endif
     546             : # define u64_u32_load(var)      u64_u32_load_copy(var, var##_copy)
     547             : # define u64_u32_store(var, val) u64_u32_store_copy(var, var##_copy, val)
     548             : 
     549             : /* CFS-related fields in a runqueue */
     550             : struct cfs_rq {
     551             :         struct load_weight      load;
     552             :         unsigned int            nr_running;
     553             :         unsigned int            h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
     554             :         unsigned int            idle_nr_running;   /* SCHED_IDLE */
     555             :         unsigned int            idle_h_nr_running; /* SCHED_IDLE */
     556             : 
     557             :         u64                     exec_clock;
     558             :         u64                     min_vruntime;
     559             : #ifdef CONFIG_SCHED_CORE
     560             :         unsigned int            forceidle_seq;
     561             :         u64                     min_vruntime_fi;
     562             : #endif
     563             : 
     564             : #ifndef CONFIG_64BIT
     565             :         u64                     min_vruntime_copy;
     566             : #endif
     567             : 
     568             :         struct rb_root_cached   tasks_timeline;
     569             : 
     570             :         /*
     571             :          * 'curr' points to currently running entity on this cfs_rq.
     572             :          * It is set to NULL otherwise (i.e when none are currently running).
     573             :          */
     574             :         struct sched_entity     *curr;
     575             :         struct sched_entity     *next;
     576             :         struct sched_entity     *last;
     577             :         struct sched_entity     *skip;
     578             : 
     579             : #ifdef  CONFIG_SCHED_DEBUG
     580             :         unsigned int            nr_spread_over;
     581             : #endif
     582             : 
     583             : #ifdef CONFIG_SMP
     584             :         /*
     585             :          * CFS load tracking
     586             :          */
     587             :         struct sched_avg        avg;
     588             : #ifndef CONFIG_64BIT
     589             :         u64                     last_update_time_copy;
     590             : #endif
     591             :         struct {
     592             :                 raw_spinlock_t  lock ____cacheline_aligned;
     593             :                 int             nr;
     594             :                 unsigned long   load_avg;
     595             :                 unsigned long   util_avg;
     596             :                 unsigned long   runnable_avg;
     597             :         } removed;
     598             : 
     599             : #ifdef CONFIG_FAIR_GROUP_SCHED
     600             :         unsigned long           tg_load_avg_contrib;
     601             :         long                    propagate;
     602             :         long                    prop_runnable_sum;
     603             : 
     604             :         /*
     605             :          *   h_load = weight * f(tg)
     606             :          *
     607             :          * Where f(tg) is the recursive weight fraction assigned to
     608             :          * this group.
     609             :          */
     610             :         unsigned long           h_load;
     611             :         u64                     last_h_load_update;
     612             :         struct sched_entity     *h_load_next;
     613             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     614             : #endif /* CONFIG_SMP */
     615             : 
     616             : #ifdef CONFIG_FAIR_GROUP_SCHED
     617             :         struct rq               *rq;    /* CPU runqueue to which this cfs_rq is attached */
     618             : 
     619             :         /*
     620             :          * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
     621             :          * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
     622             :          * (like users, containers etc.)
     623             :          *
     624             :          * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
     625             :          * This list is used during load balance.
     626             :          */
     627             :         int                     on_list;
     628             :         struct list_head        leaf_cfs_rq_list;
     629             :         struct task_group       *tg;    /* group that "owns" this runqueue */
     630             : 
     631             :         /* Locally cached copy of our task_group's idle value */
     632             :         int                     idle;
     633             : 
     634             : #ifdef CONFIG_CFS_BANDWIDTH
     635             :         int                     runtime_enabled;
     636             :         s64                     runtime_remaining;
     637             : 
     638             :         u64                     throttled_pelt_idle;
     639             : #ifndef CONFIG_64BIT
     640             :         u64                     throttled_pelt_idle_copy;
     641             : #endif
     642             :         u64                     throttled_clock;
     643             :         u64                     throttled_clock_pelt;
     644             :         u64                     throttled_clock_pelt_time;
     645             :         int                     throttled;
     646             :         int                     throttle_count;
     647             :         struct list_head        throttled_list;
     648             : #ifdef CONFIG_SMP
     649             :         struct list_head        throttled_csd_list;
     650             : #endif
     651             : #endif /* CONFIG_CFS_BANDWIDTH */
     652             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     653             : };
     654             : 
     655             : static inline int rt_bandwidth_enabled(void)
     656             : {
     657           0 :         return sysctl_sched_rt_runtime >= 0;
     658             : }
     659             : 
     660             : /* RT IPI pull logic requires IRQ_WORK */
     661             : #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
     662             : # define HAVE_RT_PUSH_IPI
     663             : #endif
     664             : 
     665             : /* Real-Time classes' related field in a runqueue: */
     666             : struct rt_rq {
     667             :         struct rt_prio_array    active;
     668             :         unsigned int            rt_nr_running;
     669             :         unsigned int            rr_nr_running;
     670             : #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
     671             :         struct {
     672             :                 int             curr; /* highest queued rt task prio */
     673             : #ifdef CONFIG_SMP
     674             :                 int             next; /* next highest */
     675             : #endif
     676             :         } highest_prio;
     677             : #endif
     678             : #ifdef CONFIG_SMP
     679             :         unsigned int            rt_nr_migratory;
     680             :         unsigned int            rt_nr_total;
     681             :         int                     overloaded;
     682             :         struct plist_head       pushable_tasks;
     683             : 
     684             : #endif /* CONFIG_SMP */
     685             :         int                     rt_queued;
     686             : 
     687             :         int                     rt_throttled;
     688             :         u64                     rt_time;
     689             :         u64                     rt_runtime;
     690             :         /* Nests inside the rq lock: */
     691             :         raw_spinlock_t          rt_runtime_lock;
     692             : 
     693             : #ifdef CONFIG_RT_GROUP_SCHED
     694             :         unsigned int            rt_nr_boosted;
     695             : 
     696             :         struct rq               *rq;
     697             :         struct task_group       *tg;
     698             : #endif
     699             : };
     700             : 
     701             : static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
     702             : {
     703             :         return rt_rq->rt_queued && rt_rq->rt_nr_running;
     704             : }
     705             : 
     706             : /* Deadline class' related fields in a runqueue */
     707             : struct dl_rq {
     708             :         /* runqueue is an rbtree, ordered by deadline */
     709             :         struct rb_root_cached   root;
     710             : 
     711             :         unsigned int            dl_nr_running;
     712             : 
     713             : #ifdef CONFIG_SMP
     714             :         /*
     715             :          * Deadline values of the currently executing and the
     716             :          * earliest ready task on this rq. Caching these facilitates
     717             :          * the decision whether or not a ready but not running task
     718             :          * should migrate somewhere else.
     719             :          */
     720             :         struct {
     721             :                 u64             curr;
     722             :                 u64             next;
     723             :         } earliest_dl;
     724             : 
     725             :         unsigned int            dl_nr_migratory;
     726             :         int                     overloaded;
     727             : 
     728             :         /*
     729             :          * Tasks on this rq that can be pushed away. They are kept in
     730             :          * an rb-tree, ordered by tasks' deadlines, with caching
     731             :          * of the leftmost (earliest deadline) element.
     732             :          */
     733             :         struct rb_root_cached   pushable_dl_tasks_root;
     734             : #else
     735             :         struct dl_bw            dl_bw;
     736             : #endif
     737             :         /*
     738             :          * "Active utilization" for this runqueue: increased when a
     739             :          * task wakes up (becomes TASK_RUNNING) and decreased when a
     740             :          * task blocks
     741             :          */
     742             :         u64                     running_bw;
     743             : 
     744             :         /*
     745             :          * Utilization of the tasks "assigned" to this runqueue (including
     746             :          * the tasks that are in runqueue and the tasks that executed on this
     747             :          * CPU and blocked). Increased when a task moves to this runqueue, and
     748             :          * decreased when the task moves away (migrates, changes scheduling
     749             :          * policy, or terminates).
     750             :          * This is needed to compute the "inactive utilization" for the
     751             :          * runqueue (inactive utilization = this_bw - running_bw).
     752             :          */
     753             :         u64                     this_bw;
     754             :         u64                     extra_bw;
     755             : 
     756             :         /*
     757             :          * Inverse of the fraction of CPU utilization that can be reclaimed
     758             :          * by the GRUB algorithm.
     759             :          */
     760             :         u64                     bw_ratio;
     761             : };
     762             : 
     763             : #ifdef CONFIG_FAIR_GROUP_SCHED
     764             : /* An entity is a task if it doesn't "own" a runqueue */
     765             : #define entity_is_task(se)      (!se->my_q)
     766             : 
     767             : static inline void se_update_runnable(struct sched_entity *se)
     768             : {
     769             :         if (!entity_is_task(se))
     770             :                 se->runnable_weight = se->my_q->h_nr_running;
     771             : }
     772             : 
     773             : static inline long se_runnable(struct sched_entity *se)
     774             : {
     775             :         if (entity_is_task(se))
     776             :                 return !!se->on_rq;
     777             :         else
     778             :                 return se->runnable_weight;
     779             : }
     780             : 
     781             : #else
     782             : #define entity_is_task(se)      1
     783             : 
     784             : static inline void se_update_runnable(struct sched_entity *se) {}
     785             : 
     786             : static inline long se_runnable(struct sched_entity *se)
     787             : {
     788             :         return !!se->on_rq;
     789             : }
     790             : #endif
     791             : 
     792             : #ifdef CONFIG_SMP
     793             : /*
     794             :  * XXX we want to get rid of these helpers and use the full load resolution.
     795             :  */
     796             : static inline long se_weight(struct sched_entity *se)
     797             : {
     798             :         return scale_load_down(se->load.weight);
     799             : }
     800             : 
     801             : 
     802             : static inline bool sched_asym_prefer(int a, int b)
     803             : {
     804             :         return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
     805             : }
     806             : 
     807             : struct perf_domain {
     808             :         struct em_perf_domain *em_pd;
     809             :         struct perf_domain *next;
     810             :         struct rcu_head rcu;
     811             : };
     812             : 
     813             : /* Scheduling group status flags */
     814             : #define SG_OVERLOAD             0x1 /* More than one runnable task on a CPU. */
     815             : #define SG_OVERUTILIZED         0x2 /* One or more CPUs are over-utilized. */
     816             : 
     817             : /*
     818             :  * We add the notion of a root-domain which will be used to define per-domain
     819             :  * variables. Each exclusive cpuset essentially defines an island domain by
     820             :  * fully partitioning the member CPUs from any other cpuset. Whenever a new
     821             :  * exclusive cpuset is created, we also create and attach a new root-domain
     822             :  * object.
     823             :  *
     824             :  */
     825             : struct root_domain {
     826             :         atomic_t                refcount;
     827             :         atomic_t                rto_count;
     828             :         struct rcu_head         rcu;
     829             :         cpumask_var_t           span;
     830             :         cpumask_var_t           online;
     831             : 
     832             :         /*
     833             :          * Indicate pullable load on at least one CPU, e.g:
     834             :          * - More than one runnable task
     835             :          * - Running task is misfit
     836             :          */
     837             :         int                     overload;
     838             : 
     839             :         /* Indicate one or more cpus over-utilized (tipping point) */
     840             :         int                     overutilized;
     841             : 
     842             :         /*
     843             :          * The bit corresponding to a CPU gets set here if such CPU has more
     844             :          * than one runnable -deadline task (as it is below for RT tasks).
     845             :          */
     846             :         cpumask_var_t           dlo_mask;
     847             :         atomic_t                dlo_count;
     848             :         struct dl_bw            dl_bw;
     849             :         struct cpudl            cpudl;
     850             : 
     851             :         /*
     852             :          * Indicate whether a root_domain's dl_bw has been checked or
     853             :          * updated. It's monotonously increasing value.
     854             :          *
     855             :          * Also, some corner cases, like 'wrap around' is dangerous, but given
     856             :          * that u64 is 'big enough'. So that shouldn't be a concern.
     857             :          */
     858             :         u64 visit_gen;
     859             : 
     860             : #ifdef HAVE_RT_PUSH_IPI
     861             :         /*
     862             :          * For IPI pull requests, loop across the rto_mask.
     863             :          */
     864             :         struct irq_work         rto_push_work;
     865             :         raw_spinlock_t          rto_lock;
     866             :         /* These are only updated and read within rto_lock */
     867             :         int                     rto_loop;
     868             :         int                     rto_cpu;
     869             :         /* These atomics are updated outside of a lock */
     870             :         atomic_t                rto_loop_next;
     871             :         atomic_t                rto_loop_start;
     872             : #endif
     873             :         /*
     874             :          * The "RT overload" flag: it gets set if a CPU has more than
     875             :          * one runnable RT task.
     876             :          */
     877             :         cpumask_var_t           rto_mask;
     878             :         struct cpupri           cpupri;
     879             : 
     880             :         unsigned long           max_cpu_capacity;
     881             : 
     882             :         /*
     883             :          * NULL-terminated list of performance domains intersecting with the
     884             :          * CPUs of the rd. Protected by RCU.
     885             :          */
     886             :         struct perf_domain __rcu *pd;
     887             : };
     888             : 
     889             : extern void init_defrootdomain(void);
     890             : extern int sched_init_domains(const struct cpumask *cpu_map);
     891             : extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
     892             : extern void sched_get_rd(struct root_domain *rd);
     893             : extern void sched_put_rd(struct root_domain *rd);
     894             : 
     895             : #ifdef HAVE_RT_PUSH_IPI
     896             : extern void rto_push_irq_work_func(struct irq_work *work);
     897             : #endif
     898             : #endif /* CONFIG_SMP */
     899             : 
     900             : #ifdef CONFIG_UCLAMP_TASK
     901             : /*
     902             :  * struct uclamp_bucket - Utilization clamp bucket
     903             :  * @value: utilization clamp value for tasks on this clamp bucket
     904             :  * @tasks: number of RUNNABLE tasks on this clamp bucket
     905             :  *
     906             :  * Keep track of how many tasks are RUNNABLE for a given utilization
     907             :  * clamp value.
     908             :  */
     909             : struct uclamp_bucket {
     910             :         unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
     911             :         unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
     912             : };
     913             : 
     914             : /*
     915             :  * struct uclamp_rq - rq's utilization clamp
     916             :  * @value: currently active clamp values for a rq
     917             :  * @bucket: utilization clamp buckets affecting a rq
     918             :  *
     919             :  * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
     920             :  * A clamp value is affecting a rq when there is at least one task RUNNABLE
     921             :  * (or actually running) with that value.
     922             :  *
     923             :  * There are up to UCLAMP_CNT possible different clamp values, currently there
     924             :  * are only two: minimum utilization and maximum utilization.
     925             :  *
     926             :  * All utilization clamping values are MAX aggregated, since:
     927             :  * - for util_min: we want to run the CPU at least at the max of the minimum
     928             :  *   utilization required by its currently RUNNABLE tasks.
     929             :  * - for util_max: we want to allow the CPU to run up to the max of the
     930             :  *   maximum utilization allowed by its currently RUNNABLE tasks.
     931             :  *
     932             :  * Since on each system we expect only a limited number of different
     933             :  * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
     934             :  * the metrics required to compute all the per-rq utilization clamp values.
     935             :  */
     936             : struct uclamp_rq {
     937             :         unsigned int value;
     938             :         struct uclamp_bucket bucket[UCLAMP_BUCKETS];
     939             : };
     940             : 
     941             : DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
     942             : #endif /* CONFIG_UCLAMP_TASK */
     943             : 
     944             : struct rq;
     945             : struct balance_callback {
     946             :         struct balance_callback *next;
     947             :         void (*func)(struct rq *rq);
     948             : };
     949             : 
     950             : /*
     951             :  * This is the main, per-CPU runqueue data structure.
     952             :  *
     953             :  * Locking rule: those places that want to lock multiple runqueues
     954             :  * (such as the load balancing or the thread migration code), lock
     955             :  * acquire operations must be ordered by ascending &runqueue.
     956             :  */
     957             : struct rq {
     958             :         /* runqueue lock: */
     959             :         raw_spinlock_t          __lock;
     960             : 
     961             :         /*
     962             :          * nr_running and cpu_load should be in the same cacheline because
     963             :          * remote CPUs use both these fields when doing load calculation.
     964             :          */
     965             :         unsigned int            nr_running;
     966             : #ifdef CONFIG_NUMA_BALANCING
     967             :         unsigned int            nr_numa_running;
     968             :         unsigned int            nr_preferred_running;
     969             :         unsigned int            numa_migrate_on;
     970             : #endif
     971             : #ifdef CONFIG_NO_HZ_COMMON
     972             : #ifdef CONFIG_SMP
     973             :         unsigned long           last_blocked_load_update_tick;
     974             :         unsigned int            has_blocked_load;
     975             :         call_single_data_t      nohz_csd;
     976             : #endif /* CONFIG_SMP */
     977             :         unsigned int            nohz_tick_stopped;
     978             :         atomic_t                nohz_flags;
     979             : #endif /* CONFIG_NO_HZ_COMMON */
     980             : 
     981             : #ifdef CONFIG_SMP
     982             :         unsigned int            ttwu_pending;
     983             : #endif
     984             :         u64                     nr_switches;
     985             : 
     986             : #ifdef CONFIG_UCLAMP_TASK
     987             :         /* Utilization clamp values based on CPU's RUNNABLE tasks */
     988             :         struct uclamp_rq        uclamp[UCLAMP_CNT] ____cacheline_aligned;
     989             :         unsigned int            uclamp_flags;
     990             : #define UCLAMP_FLAG_IDLE 0x01
     991             : #endif
     992             : 
     993             :         struct cfs_rq           cfs;
     994             :         struct rt_rq            rt;
     995             :         struct dl_rq            dl;
     996             : 
     997             : #ifdef CONFIG_FAIR_GROUP_SCHED
     998             :         /* list of leaf cfs_rq on this CPU: */
     999             :         struct list_head        leaf_cfs_rq_list;
    1000             :         struct list_head        *tmp_alone_branch;
    1001             : #endif /* CONFIG_FAIR_GROUP_SCHED */
    1002             : 
    1003             :         /*
    1004             :          * This is part of a global counter where only the total sum
    1005             :          * over all CPUs matters. A task can increase this counter on
    1006             :          * one CPU and if it got migrated afterwards it may decrease
    1007             :          * it on another CPU. Always updated under the runqueue lock:
    1008             :          */
    1009             :         unsigned int            nr_uninterruptible;
    1010             : 
    1011             :         struct task_struct __rcu        *curr;
    1012             :         struct task_struct      *idle;
    1013             :         struct task_struct      *stop;
    1014             :         unsigned long           next_balance;
    1015             :         struct mm_struct        *prev_mm;
    1016             : 
    1017             :         unsigned int            clock_update_flags;
    1018             :         u64                     clock;
    1019             :         /* Ensure that all clocks are in the same cache line */
    1020             :         u64                     clock_task ____cacheline_aligned;
    1021             :         u64                     clock_pelt;
    1022             :         unsigned long           lost_idle_time;
    1023             :         u64                     clock_pelt_idle;
    1024             :         u64                     clock_idle;
    1025             : #ifndef CONFIG_64BIT
    1026             :         u64                     clock_pelt_idle_copy;
    1027             :         u64                     clock_idle_copy;
    1028             : #endif
    1029             : 
    1030             :         atomic_t                nr_iowait;
    1031             : 
    1032             : #ifdef CONFIG_SCHED_DEBUG
    1033             :         u64 last_seen_need_resched_ns;
    1034             :         int ticks_without_resched;
    1035             : #endif
    1036             : 
    1037             : #ifdef CONFIG_MEMBARRIER
    1038             :         int membarrier_state;
    1039             : #endif
    1040             : 
    1041             : #ifdef CONFIG_SMP
    1042             :         struct root_domain              *rd;
    1043             :         struct sched_domain __rcu       *sd;
    1044             : 
    1045             :         unsigned long           cpu_capacity;
    1046             :         unsigned long           cpu_capacity_orig;
    1047             : 
    1048             :         struct balance_callback *balance_callback;
    1049             : 
    1050             :         unsigned char           nohz_idle_balance;
    1051             :         unsigned char           idle_balance;
    1052             : 
    1053             :         unsigned long           misfit_task_load;
    1054             : 
    1055             :         /* For active balancing */
    1056             :         int                     active_balance;
    1057             :         int                     push_cpu;
    1058             :         struct cpu_stop_work    active_balance_work;
    1059             : 
    1060             :         /* CPU of this runqueue: */
    1061             :         int                     cpu;
    1062             :         int                     online;
    1063             : 
    1064             :         struct list_head cfs_tasks;
    1065             : 
    1066             :         struct sched_avg        avg_rt;
    1067             :         struct sched_avg        avg_dl;
    1068             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
    1069             :         struct sched_avg        avg_irq;
    1070             : #endif
    1071             : #ifdef CONFIG_SCHED_THERMAL_PRESSURE
    1072             :         struct sched_avg        avg_thermal;
    1073             : #endif
    1074             :         u64                     idle_stamp;
    1075             :         u64                     avg_idle;
    1076             : 
    1077             :         unsigned long           wake_stamp;
    1078             :         u64                     wake_avg_idle;
    1079             : 
    1080             :         /* This is used to determine avg_idle's max value */
    1081             :         u64                     max_idle_balance_cost;
    1082             : 
    1083             : #ifdef CONFIG_HOTPLUG_CPU
    1084             :         struct rcuwait          hotplug_wait;
    1085             : #endif
    1086             : #endif /* CONFIG_SMP */
    1087             : 
    1088             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    1089             :         u64                     prev_irq_time;
    1090             : #endif
    1091             : #ifdef CONFIG_PARAVIRT
    1092             :         u64                     prev_steal_time;
    1093             : #endif
    1094             : #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
    1095             :         u64                     prev_steal_time_rq;
    1096             : #endif
    1097             : 
    1098             :         /* calc_load related fields */
    1099             :         unsigned long           calc_load_update;
    1100             :         long                    calc_load_active;
    1101             : 
    1102             : #ifdef CONFIG_SCHED_HRTICK
    1103             : #ifdef CONFIG_SMP
    1104             :         call_single_data_t      hrtick_csd;
    1105             : #endif
    1106             :         struct hrtimer          hrtick_timer;
    1107             :         ktime_t                 hrtick_time;
    1108             : #endif
    1109             : 
    1110             : #ifdef CONFIG_SCHEDSTATS
    1111             :         /* latency stats */
    1112             :         struct sched_info       rq_sched_info;
    1113             :         unsigned long long      rq_cpu_time;
    1114             :         /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
    1115             : 
    1116             :         /* sys_sched_yield() stats */
    1117             :         unsigned int            yld_count;
    1118             : 
    1119             :         /* schedule() stats */
    1120             :         unsigned int            sched_count;
    1121             :         unsigned int            sched_goidle;
    1122             : 
    1123             :         /* try_to_wake_up() stats */
    1124             :         unsigned int            ttwu_count;
    1125             :         unsigned int            ttwu_local;
    1126             : #endif
    1127             : 
    1128             : #ifdef CONFIG_CPU_IDLE
    1129             :         /* Must be inspected within a rcu lock section */
    1130             :         struct cpuidle_state    *idle_state;
    1131             : #endif
    1132             : 
    1133             : #ifdef CONFIG_SMP
    1134             :         unsigned int            nr_pinned;
    1135             : #endif
    1136             :         unsigned int            push_busy;
    1137             :         struct cpu_stop_work    push_work;
    1138             : 
    1139             : #ifdef CONFIG_SCHED_CORE
    1140             :         /* per rq */
    1141             :         struct rq               *core;
    1142             :         struct task_struct      *core_pick;
    1143             :         unsigned int            core_enabled;
    1144             :         unsigned int            core_sched_seq;
    1145             :         struct rb_root          core_tree;
    1146             : 
    1147             :         /* shared state -- careful with sched_core_cpu_deactivate() */
    1148             :         unsigned int            core_task_seq;
    1149             :         unsigned int            core_pick_seq;
    1150             :         unsigned long           core_cookie;
    1151             :         unsigned int            core_forceidle_count;
    1152             :         unsigned int            core_forceidle_seq;
    1153             :         unsigned int            core_forceidle_occupation;
    1154             :         u64                     core_forceidle_start;
    1155             : #endif
    1156             : 
    1157             :         /* Scratch cpumask to be temporarily used under rq_lock */
    1158             :         cpumask_var_t           scratch_mask;
    1159             : 
    1160             : #if defined(CONFIG_CFS_BANDWIDTH) && defined(CONFIG_SMP)
    1161             :         call_single_data_t      cfsb_csd;
    1162             :         struct list_head        cfsb_csd_list;
    1163             : #endif
    1164             : };
    1165             : 
    1166             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1167             : 
    1168             : /* CPU runqueue to which this cfs_rq is attached */
    1169             : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1170             : {
    1171             :         return cfs_rq->rq;
    1172             : }
    1173             : 
    1174             : #else
    1175             : 
    1176             : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1177             : {
    1178       23546 :         return container_of(cfs_rq, struct rq, cfs);
    1179             : }
    1180             : #endif
    1181             : 
    1182             : static inline int cpu_of(struct rq *rq)
    1183             : {
    1184             : #ifdef CONFIG_SMP
    1185             :         return rq->cpu;
    1186             : #else
    1187             :         return 0;
    1188             : #endif
    1189             : }
    1190             : 
    1191             : #define MDF_PUSH        0x01
    1192             : 
    1193             : static inline bool is_migration_disabled(struct task_struct *p)
    1194             : {
    1195             : #ifdef CONFIG_SMP
    1196             :         return p->migration_disabled;
    1197             : #else
    1198             :         return false;
    1199             : #endif
    1200             : }
    1201             : 
    1202             : DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
    1203             : 
    1204             : #define cpu_rq(cpu)             (&per_cpu(runqueues, (cpu)))
    1205             : #define this_rq()               this_cpu_ptr(&runqueues)
    1206             : #define task_rq(p)              cpu_rq(task_cpu(p))
    1207             : #define cpu_curr(cpu)           (cpu_rq(cpu)->curr)
    1208             : #define raw_rq()                raw_cpu_ptr(&runqueues)
    1209             : 
    1210             : struct sched_group;
    1211             : #ifdef CONFIG_SCHED_CORE
    1212             : static inline struct cpumask *sched_group_span(struct sched_group *sg);
    1213             : 
    1214             : DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
    1215             : 
    1216             : static inline bool sched_core_enabled(struct rq *rq)
    1217             : {
    1218             :         return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
    1219             : }
    1220             : 
    1221             : static inline bool sched_core_disabled(void)
    1222             : {
    1223             :         return !static_branch_unlikely(&__sched_core_enabled);
    1224             : }
    1225             : 
    1226             : /*
    1227             :  * Be careful with this function; not for general use. The return value isn't
    1228             :  * stable unless you actually hold a relevant rq->__lock.
    1229             :  */
    1230             : static inline raw_spinlock_t *rq_lockp(struct rq *rq)
    1231             : {
    1232             :         if (sched_core_enabled(rq))
    1233             :                 return &rq->core->__lock;
    1234             : 
    1235             :         return &rq->__lock;
    1236             : }
    1237             : 
    1238             : static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
    1239             : {
    1240             :         if (rq->core_enabled)
    1241             :                 return &rq->core->__lock;
    1242             : 
    1243             :         return &rq->__lock;
    1244             : }
    1245             : 
    1246             : bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
    1247             :                         bool fi);
    1248             : 
    1249             : /*
    1250             :  * Helpers to check if the CPU's core cookie matches with the task's cookie
    1251             :  * when core scheduling is enabled.
    1252             :  * A special case is that the task's cookie always matches with CPU's core
    1253             :  * cookie if the CPU is in an idle core.
    1254             :  */
    1255             : static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
    1256             : {
    1257             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1258             :         if (!sched_core_enabled(rq))
    1259             :                 return true;
    1260             : 
    1261             :         return rq->core->core_cookie == p->core_cookie;
    1262             : }
    1263             : 
    1264             : static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
    1265             : {
    1266             :         bool idle_core = true;
    1267             :         int cpu;
    1268             : 
    1269             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1270             :         if (!sched_core_enabled(rq))
    1271             :                 return true;
    1272             : 
    1273             :         for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
    1274             :                 if (!available_idle_cpu(cpu)) {
    1275             :                         idle_core = false;
    1276             :                         break;
    1277             :                 }
    1278             :         }
    1279             : 
    1280             :         /*
    1281             :          * A CPU in an idle core is always the best choice for tasks with
    1282             :          * cookies.
    1283             :          */
    1284             :         return idle_core || rq->core->core_cookie == p->core_cookie;
    1285             : }
    1286             : 
    1287             : static inline bool sched_group_cookie_match(struct rq *rq,
    1288             :                                             struct task_struct *p,
    1289             :                                             struct sched_group *group)
    1290             : {
    1291             :         int cpu;
    1292             : 
    1293             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1294             :         if (!sched_core_enabled(rq))
    1295             :                 return true;
    1296             : 
    1297             :         for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
    1298             :                 if (sched_core_cookie_match(cpu_rq(cpu), p))
    1299             :                         return true;
    1300             :         }
    1301             :         return false;
    1302             : }
    1303             : 
    1304             : static inline bool sched_core_enqueued(struct task_struct *p)
    1305             : {
    1306             :         return !RB_EMPTY_NODE(&p->core_node);
    1307             : }
    1308             : 
    1309             : extern void sched_core_enqueue(struct rq *rq, struct task_struct *p);
    1310             : extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
    1311             : 
    1312             : extern void sched_core_get(void);
    1313             : extern void sched_core_put(void);
    1314             : 
    1315             : #else /* !CONFIG_SCHED_CORE */
    1316             : 
    1317             : static inline bool sched_core_enabled(struct rq *rq)
    1318             : {
    1319             :         return false;
    1320             : }
    1321             : 
    1322             : static inline bool sched_core_disabled(void)
    1323             : {
    1324             :         return true;
    1325             : }
    1326             : 
    1327             : static inline raw_spinlock_t *rq_lockp(struct rq *rq)
    1328             : {
    1329             :         return &rq->__lock;
    1330             : }
    1331             : 
    1332             : static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
    1333             : {
    1334             :         return &rq->__lock;
    1335             : }
    1336             : 
    1337             : static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
    1338             : {
    1339             :         return true;
    1340             : }
    1341             : 
    1342             : static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
    1343             : {
    1344             :         return true;
    1345             : }
    1346             : 
    1347             : static inline bool sched_group_cookie_match(struct rq *rq,
    1348             :                                             struct task_struct *p,
    1349             :                                             struct sched_group *group)
    1350             : {
    1351             :         return true;
    1352             : }
    1353             : #endif /* CONFIG_SCHED_CORE */
    1354             : 
    1355             : static inline void lockdep_assert_rq_held(struct rq *rq)
    1356             : {
    1357       30869 :         lockdep_assert_held(__rq_lockp(rq));
    1358             : }
    1359             : 
    1360             : extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
    1361             : extern bool raw_spin_rq_trylock(struct rq *rq);
    1362             : extern void raw_spin_rq_unlock(struct rq *rq);
    1363             : 
    1364             : static inline void raw_spin_rq_lock(struct rq *rq)
    1365             : {
    1366        8666 :         raw_spin_rq_lock_nested(rq, 0);
    1367             : }
    1368             : 
    1369             : static inline void raw_spin_rq_lock_irq(struct rq *rq)
    1370             : {
    1371           0 :         local_irq_disable();
    1372           0 :         raw_spin_rq_lock(rq);
    1373             : }
    1374             : 
    1375             : static inline void raw_spin_rq_unlock_irq(struct rq *rq)
    1376             : {
    1377        2513 :         raw_spin_rq_unlock(rq);
    1378             :         local_irq_enable();
    1379             : }
    1380             : 
    1381             : static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq)
    1382             : {
    1383             :         unsigned long flags;
    1384           0 :         local_irq_save(flags);
    1385           0 :         raw_spin_rq_lock(rq);
    1386             :         return flags;
    1387             : }
    1388             : 
    1389             : static inline void raw_spin_rq_unlock_irqrestore(struct rq *rq, unsigned long flags)
    1390             : {
    1391           0 :         raw_spin_rq_unlock(rq);
    1392           0 :         local_irq_restore(flags);
    1393             : }
    1394             : 
    1395             : #define raw_spin_rq_lock_irqsave(rq, flags)     \
    1396             : do {                                            \
    1397             :         flags = _raw_spin_rq_lock_irqsave(rq);  \
    1398             : } while (0)
    1399             : 
    1400             : #ifdef CONFIG_SCHED_SMT
    1401             : extern void __update_idle_core(struct rq *rq);
    1402             : 
    1403             : static inline void update_idle_core(struct rq *rq)
    1404             : {
    1405             :         if (static_branch_unlikely(&sched_smt_present))
    1406             :                 __update_idle_core(rq);
    1407             : }
    1408             : 
    1409             : #else
    1410             : static inline void update_idle_core(struct rq *rq) { }
    1411             : #endif
    1412             : 
    1413             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1414             : static inline struct task_struct *task_of(struct sched_entity *se)
    1415             : {
    1416             :         SCHED_WARN_ON(!entity_is_task(se));
    1417             :         return container_of(se, struct task_struct, se);
    1418             : }
    1419             : 
    1420             : static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
    1421             : {
    1422             :         return p->se.cfs_rq;
    1423             : }
    1424             : 
    1425             : /* runqueue on which this entity is (to be) queued */
    1426             : static inline struct cfs_rq *cfs_rq_of(const struct sched_entity *se)
    1427             : {
    1428             :         return se->cfs_rq;
    1429             : }
    1430             : 
    1431             : /* runqueue "owned" by this group */
    1432             : static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
    1433             : {
    1434             :         return grp->my_q;
    1435             : }
    1436             : 
    1437             : #else
    1438             : 
    1439             : #define task_of(_se)    container_of(_se, struct task_struct, se)
    1440             : 
    1441             : static inline struct cfs_rq *task_cfs_rq(const struct task_struct *p)
    1442             : {
    1443        2821 :         return &task_rq(p)->cfs;
    1444             : }
    1445             : 
    1446             : static inline struct cfs_rq *cfs_rq_of(const struct sched_entity *se)
    1447             : {
    1448       19161 :         const struct task_struct *p = task_of(se);
    1449       19161 :         struct rq *rq = task_rq(p);
    1450             : 
    1451             :         return &rq->cfs;
    1452             : }
    1453             : 
    1454             : /* runqueue "owned" by this group */
    1455             : static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
    1456             : {
    1457             :         return NULL;
    1458             : }
    1459             : #endif
    1460             : 
    1461             : extern void update_rq_clock(struct rq *rq);
    1462             : 
    1463             : /*
    1464             :  * rq::clock_update_flags bits
    1465             :  *
    1466             :  * %RQCF_REQ_SKIP - will request skipping of clock update on the next
    1467             :  *  call to __schedule(). This is an optimisation to avoid
    1468             :  *  neighbouring rq clock updates.
    1469             :  *
    1470             :  * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
    1471             :  *  in effect and calls to update_rq_clock() are being ignored.
    1472             :  *
    1473             :  * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
    1474             :  *  made to update_rq_clock() since the last time rq::lock was pinned.
    1475             :  *
    1476             :  * If inside of __schedule(), clock_update_flags will have been
    1477             :  * shifted left (a left shift is a cheap operation for the fast path
    1478             :  * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
    1479             :  *
    1480             :  *      if (rq-clock_update_flags >= RQCF_UPDATED)
    1481             :  *
    1482             :  * to check if %RQCF_UPDATED is set. It'll never be shifted more than
    1483             :  * one position though, because the next rq_unpin_lock() will shift it
    1484             :  * back.
    1485             :  */
    1486             : #define RQCF_REQ_SKIP           0x01
    1487             : #define RQCF_ACT_SKIP           0x02
    1488             : #define RQCF_UPDATED            0x04
    1489             : 
    1490             : static inline void assert_clock_updated(struct rq *rq)
    1491             : {
    1492             :         /*
    1493             :          * The only reason for not seeing a clock update since the
    1494             :          * last rq_pin_lock() is if we're currently skipping updates.
    1495             :          */
    1496             :         SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
    1497             : }
    1498             : 
    1499             : static inline u64 rq_clock(struct rq *rq)
    1500             : {
    1501           0 :         lockdep_assert_rq_held(rq);
    1502           0 :         assert_clock_updated(rq);
    1503             : 
    1504             :         return rq->clock;
    1505             : }
    1506             : 
    1507             : static inline u64 rq_clock_task(struct rq *rq)
    1508             : {
    1509       17817 :         lockdep_assert_rq_held(rq);
    1510       17817 :         assert_clock_updated(rq);
    1511             : 
    1512             :         return rq->clock_task;
    1513             : }
    1514             : 
    1515             : /**
    1516             :  * By default the decay is the default pelt decay period.
    1517             :  * The decay shift can change the decay period in
    1518             :  * multiples of 32.
    1519             :  *  Decay shift         Decay period(ms)
    1520             :  *      0                       32
    1521             :  *      1                       64
    1522             :  *      2                       128
    1523             :  *      3                       256
    1524             :  *      4                       512
    1525             :  */
    1526             : extern int sched_thermal_decay_shift;
    1527             : 
    1528             : static inline u64 rq_clock_thermal(struct rq *rq)
    1529             : {
    1530        2943 :         return rq_clock_task(rq) >> sched_thermal_decay_shift;
    1531             : }
    1532             : 
    1533             : static inline void rq_clock_skip_update(struct rq *rq)
    1534             : {
    1535        1252 :         lockdep_assert_rq_held(rq);
    1536        1252 :         rq->clock_update_flags |= RQCF_REQ_SKIP;
    1537             : }
    1538             : 
    1539             : /*
    1540             :  * See rt task throttling, which is the only time a skip
    1541             :  * request is canceled.
    1542             :  */
    1543             : static inline void rq_clock_cancel_skipupdate(struct rq *rq)
    1544             : {
    1545           0 :         lockdep_assert_rq_held(rq);
    1546           0 :         rq->clock_update_flags &= ~RQCF_REQ_SKIP;
    1547             : }
    1548             : 
    1549             : struct rq_flags {
    1550             :         unsigned long flags;
    1551             :         struct pin_cookie cookie;
    1552             : #ifdef CONFIG_SCHED_DEBUG
    1553             :         /*
    1554             :          * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
    1555             :          * current pin context is stashed here in case it needs to be
    1556             :          * restored in rq_repin_lock().
    1557             :          */
    1558             :         unsigned int clock_update_flags;
    1559             : #endif
    1560             : };
    1561             : 
    1562             : extern struct balance_callback balance_push_callback;
    1563             : 
    1564             : /*
    1565             :  * Lockdep annotation that avoids accidental unlocks; it's like a
    1566             :  * sticky/continuous lockdep_assert_held().
    1567             :  *
    1568             :  * This avoids code that has access to 'struct rq *rq' (basically everything in
    1569             :  * the scheduler) from accidentally unlocking the rq if they do not also have a
    1570             :  * copy of the (on-stack) 'struct rq_flags rf'.
    1571             :  *
    1572             :  * Also see Documentation/locking/lockdep-design.rst.
    1573             :  */
    1574             : static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
    1575             : {
    1576             :         rf->cookie = lockdep_pin_lock(__rq_lockp(rq));
    1577             : 
    1578             : #ifdef CONFIG_SCHED_DEBUG
    1579             :         rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    1580             :         rf->clock_update_flags = 0;
    1581             : #ifdef CONFIG_SMP
    1582             :         SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback);
    1583             : #endif
    1584             : #endif
    1585             : }
    1586             : 
    1587             : static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
    1588             : {
    1589             : #ifdef CONFIG_SCHED_DEBUG
    1590             :         if (rq->clock_update_flags > RQCF_ACT_SKIP)
    1591             :                 rf->clock_update_flags = RQCF_UPDATED;
    1592             : #endif
    1593             : 
    1594        6152 :         lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
    1595             : }
    1596             : 
    1597             : static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
    1598             : {
    1599             :         lockdep_repin_lock(__rq_lockp(rq), rf->cookie);
    1600             : 
    1601             : #ifdef CONFIG_SCHED_DEBUG
    1602             :         /*
    1603             :          * Restore the value we stashed in @rf for this pin context.
    1604             :          */
    1605             :         rq->clock_update_flags |= rf->clock_update_flags;
    1606             : #endif
    1607             : }
    1608             : 
    1609             : struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1610             :         __acquires(rq->lock);
    1611             : 
    1612             : struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1613             :         __acquires(p->pi_lock)
    1614             :         __acquires(rq->lock);
    1615             : 
    1616             : static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
    1617             :         __releases(rq->lock)
    1618             : {
    1619           0 :         rq_unpin_lock(rq, rf);
    1620           0 :         raw_spin_rq_unlock(rq);
    1621             : }
    1622             : 
    1623             : static inline void
    1624             : task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1625             :         __releases(rq->lock)
    1626             :         __releases(p->pi_lock)
    1627             : {
    1628        1534 :         rq_unpin_lock(rq, rf);
    1629         767 :         raw_spin_rq_unlock(rq);
    1630        1534 :         raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
    1631             : }
    1632             : 
    1633             : static inline void
    1634             : rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
    1635             :         __acquires(rq->lock)
    1636             : {
    1637             :         raw_spin_rq_lock_irqsave(rq, rf->flags);
    1638             :         rq_pin_lock(rq, rf);
    1639             : }
    1640             : 
    1641             : static inline void
    1642             : rq_lock_irq(struct rq *rq, struct rq_flags *rf)
    1643             :         __acquires(rq->lock)
    1644             : {
    1645             :         raw_spin_rq_lock_irq(rq);
    1646             :         rq_pin_lock(rq, rf);
    1647             : }
    1648             : 
    1649             : static inline void
    1650             : rq_lock(struct rq *rq, struct rq_flags *rf)
    1651             :         __acquires(rq->lock)
    1652             : {
    1653        7898 :         raw_spin_rq_lock(rq);
    1654        7898 :         rq_pin_lock(rq, rf);
    1655             : }
    1656             : 
    1657             : static inline void
    1658             : rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
    1659             :         __releases(rq->lock)
    1660             : {
    1661             :         rq_unpin_lock(rq, rf);
    1662             :         raw_spin_rq_unlock_irqrestore(rq, rf->flags);
    1663             : }
    1664             : 
    1665             : static inline void
    1666             : rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
    1667             :         __releases(rq->lock)
    1668             : {
    1669           0 :         rq_unpin_lock(rq, rf);
    1670           0 :         raw_spin_rq_unlock_irq(rq);
    1671             : }
    1672             : 
    1673             : static inline void
    1674             : rq_unlock(struct rq *rq, struct rq_flags *rf)
    1675             :         __releases(rq->lock)
    1676             : {
    1677       10388 :         rq_unpin_lock(rq, rf);
    1678        5385 :         raw_spin_rq_unlock(rq);
    1679             : }
    1680             : 
    1681             : static inline struct rq *
    1682             : this_rq_lock_irq(struct rq_flags *rf)
    1683             :         __acquires(rq->lock)
    1684             : {
    1685             :         struct rq *rq;
    1686             : 
    1687             :         local_irq_disable();
    1688           0 :         rq = this_rq();
    1689           0 :         rq_lock(rq, rf);
    1690             :         return rq;
    1691             : }
    1692             : 
    1693             : #ifdef CONFIG_NUMA
    1694             : enum numa_topology_type {
    1695             :         NUMA_DIRECT,
    1696             :         NUMA_GLUELESS_MESH,
    1697             :         NUMA_BACKPLANE,
    1698             : };
    1699             : extern enum numa_topology_type sched_numa_topology_type;
    1700             : extern int sched_max_numa_distance;
    1701             : extern bool find_numa_distance(int distance);
    1702             : extern void sched_init_numa(int offline_node);
    1703             : extern void sched_update_numa(int cpu, bool online);
    1704             : extern void sched_domains_numa_masks_set(unsigned int cpu);
    1705             : extern void sched_domains_numa_masks_clear(unsigned int cpu);
    1706             : extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
    1707             : #else
    1708             : static inline void sched_init_numa(int offline_node) { }
    1709             : static inline void sched_update_numa(int cpu, bool online) { }
    1710             : static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
    1711             : static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
    1712             : static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
    1713             : {
    1714             :         return nr_cpu_ids;
    1715             : }
    1716             : #endif
    1717             : 
    1718             : #ifdef CONFIG_NUMA_BALANCING
    1719             : /* The regions in numa_faults array from task_struct */
    1720             : enum numa_faults_stats {
    1721             :         NUMA_MEM = 0,
    1722             :         NUMA_CPU,
    1723             :         NUMA_MEMBUF,
    1724             :         NUMA_CPUBUF
    1725             : };
    1726             : extern void sched_setnuma(struct task_struct *p, int node);
    1727             : extern int migrate_task_to(struct task_struct *p, int cpu);
    1728             : extern int migrate_swap(struct task_struct *p, struct task_struct *t,
    1729             :                         int cpu, int scpu);
    1730             : extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
    1731             : #else
    1732             : static inline void
    1733             : init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
    1734             : {
    1735             : }
    1736             : #endif /* CONFIG_NUMA_BALANCING */
    1737             : 
    1738             : #ifdef CONFIG_SMP
    1739             : 
    1740             : static inline void
    1741             : queue_balance_callback(struct rq *rq,
    1742             :                        struct balance_callback *head,
    1743             :                        void (*func)(struct rq *rq))
    1744             : {
    1745             :         lockdep_assert_rq_held(rq);
    1746             : 
    1747             :         /*
    1748             :          * Don't (re)queue an already queued item; nor queue anything when
    1749             :          * balance_push() is active, see the comment with
    1750             :          * balance_push_callback.
    1751             :          */
    1752             :         if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
    1753             :                 return;
    1754             : 
    1755             :         head->func = func;
    1756             :         head->next = rq->balance_callback;
    1757             :         rq->balance_callback = head;
    1758             : }
    1759             : 
    1760             : #define rcu_dereference_check_sched_domain(p) \
    1761             :         rcu_dereference_check((p), \
    1762             :                               lockdep_is_held(&sched_domains_mutex))
    1763             : 
    1764             : /*
    1765             :  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
    1766             :  * See destroy_sched_domains: call_rcu for details.
    1767             :  *
    1768             :  * The domain tree of any CPU may only be accessed from within
    1769             :  * preempt-disabled sections.
    1770             :  */
    1771             : #define for_each_domain(cpu, __sd) \
    1772             :         for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
    1773             :                         __sd; __sd = __sd->parent)
    1774             : 
    1775             : /**
    1776             :  * highest_flag_domain - Return highest sched_domain containing flag.
    1777             :  * @cpu:        The CPU whose highest level of sched domain is to
    1778             :  *              be returned.
    1779             :  * @flag:       The flag to check for the highest sched_domain
    1780             :  *              for the given CPU.
    1781             :  *
    1782             :  * Returns the highest sched_domain of a CPU which contains the given flag.
    1783             :  */
    1784             : static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
    1785             : {
    1786             :         struct sched_domain *sd, *hsd = NULL;
    1787             : 
    1788             :         for_each_domain(cpu, sd) {
    1789             :                 if (!(sd->flags & flag))
    1790             :                         break;
    1791             :                 hsd = sd;
    1792             :         }
    1793             : 
    1794             :         return hsd;
    1795             : }
    1796             : 
    1797             : static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
    1798             : {
    1799             :         struct sched_domain *sd;
    1800             : 
    1801             :         for_each_domain(cpu, sd) {
    1802             :                 if (sd->flags & flag)
    1803             :                         break;
    1804             :         }
    1805             : 
    1806             :         return sd;
    1807             : }
    1808             : 
    1809             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
    1810             : DECLARE_PER_CPU(int, sd_llc_size);
    1811             : DECLARE_PER_CPU(int, sd_llc_id);
    1812             : DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
    1813             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
    1814             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
    1815             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
    1816             : extern struct static_key_false sched_asym_cpucapacity;
    1817             : 
    1818             : static __always_inline bool sched_asym_cpucap_active(void)
    1819             : {
    1820             :         return static_branch_unlikely(&sched_asym_cpucapacity);
    1821             : }
    1822             : 
    1823             : struct sched_group_capacity {
    1824             :         atomic_t                ref;
    1825             :         /*
    1826             :          * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
    1827             :          * for a single CPU.
    1828             :          */
    1829             :         unsigned long           capacity;
    1830             :         unsigned long           min_capacity;           /* Min per-CPU capacity in group */
    1831             :         unsigned long           max_capacity;           /* Max per-CPU capacity in group */
    1832             :         unsigned long           next_update;
    1833             :         int                     imbalance;              /* XXX unrelated to capacity but shared group state */
    1834             : 
    1835             : #ifdef CONFIG_SCHED_DEBUG
    1836             :         int                     id;
    1837             : #endif
    1838             : 
    1839             :         unsigned long           cpumask[];              /* Balance mask */
    1840             : };
    1841             : 
    1842             : struct sched_group {
    1843             :         struct sched_group      *next;                  /* Must be a circular list */
    1844             :         atomic_t                ref;
    1845             : 
    1846             :         unsigned int            group_weight;
    1847             :         struct sched_group_capacity *sgc;
    1848             :         int                     asym_prefer_cpu;        /* CPU of highest priority in group */
    1849             :         int                     flags;
    1850             : 
    1851             :         /*
    1852             :          * The CPUs this group covers.
    1853             :          *
    1854             :          * NOTE: this field is variable length. (Allocated dynamically
    1855             :          * by attaching extra space to the end of the structure,
    1856             :          * depending on how many CPUs the kernel has booted up with)
    1857             :          */
    1858             :         unsigned long           cpumask[];
    1859             : };
    1860             : 
    1861             : static inline struct cpumask *sched_group_span(struct sched_group *sg)
    1862             : {
    1863             :         return to_cpumask(sg->cpumask);
    1864             : }
    1865             : 
    1866             : /*
    1867             :  * See build_balance_mask().
    1868             :  */
    1869             : static inline struct cpumask *group_balance_mask(struct sched_group *sg)
    1870             : {
    1871             :         return to_cpumask(sg->sgc->cpumask);
    1872             : }
    1873             : 
    1874             : extern int group_balance_cpu(struct sched_group *sg);
    1875             : 
    1876             : #ifdef CONFIG_SCHED_DEBUG
    1877             : void update_sched_domain_debugfs(void);
    1878             : void dirty_sched_domain_sysctl(int cpu);
    1879             : #else
    1880             : static inline void update_sched_domain_debugfs(void)
    1881             : {
    1882             : }
    1883             : static inline void dirty_sched_domain_sysctl(int cpu)
    1884             : {
    1885             : }
    1886             : #endif
    1887             : 
    1888             : extern int sched_update_scaling(void);
    1889             : 
    1890             : static inline const struct cpumask *task_user_cpus(struct task_struct *p)
    1891             : {
    1892             :         if (!p->user_cpus_ptr)
    1893             :                 return cpu_possible_mask; /* &init_task.cpus_mask */
    1894             :         return p->user_cpus_ptr;
    1895             : }
    1896             : #endif /* CONFIG_SMP */
    1897             : 
    1898             : #include "stats.h"
    1899             : 
    1900             : #if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
    1901             : 
    1902             : extern void __sched_core_account_forceidle(struct rq *rq);
    1903             : 
    1904             : static inline void sched_core_account_forceidle(struct rq *rq)
    1905             : {
    1906             :         if (schedstat_enabled())
    1907             :                 __sched_core_account_forceidle(rq);
    1908             : }
    1909             : 
    1910             : extern void __sched_core_tick(struct rq *rq);
    1911             : 
    1912             : static inline void sched_core_tick(struct rq *rq)
    1913             : {
    1914             :         if (sched_core_enabled(rq) && schedstat_enabled())
    1915             :                 __sched_core_tick(rq);
    1916             : }
    1917             : 
    1918             : #else
    1919             : 
    1920             : static inline void sched_core_account_forceidle(struct rq *rq) {}
    1921             : 
    1922             : static inline void sched_core_tick(struct rq *rq) {}
    1923             : 
    1924             : #endif /* CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS */
    1925             : 
    1926             : #ifdef CONFIG_CGROUP_SCHED
    1927             : 
    1928             : /*
    1929             :  * Return the group to which this tasks belongs.
    1930             :  *
    1931             :  * We cannot use task_css() and friends because the cgroup subsystem
    1932             :  * changes that value before the cgroup_subsys::attach() method is called,
    1933             :  * therefore we cannot pin it and might observe the wrong value.
    1934             :  *
    1935             :  * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
    1936             :  * core changes this before calling sched_move_task().
    1937             :  *
    1938             :  * Instead we use a 'copy' which is updated from sched_move_task() while
    1939             :  * holding both task_struct::pi_lock and rq::lock.
    1940             :  */
    1941             : static inline struct task_group *task_group(struct task_struct *p)
    1942             : {
    1943             :         return p->sched_task_group;
    1944             : }
    1945             : 
    1946             : /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
    1947             : static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
    1948             : {
    1949             : #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
    1950             :         struct task_group *tg = task_group(p);
    1951             : #endif
    1952             : 
    1953             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1954             :         set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
    1955             :         p->se.cfs_rq = tg->cfs_rq[cpu];
    1956             :         p->se.parent = tg->se[cpu];
    1957             :         p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
    1958             : #endif
    1959             : 
    1960             : #ifdef CONFIG_RT_GROUP_SCHED
    1961             :         p->rt.rt_rq  = tg->rt_rq[cpu];
    1962             :         p->rt.parent = tg->rt_se[cpu];
    1963             : #endif
    1964             : }
    1965             : 
    1966             : #else /* CONFIG_CGROUP_SCHED */
    1967             : 
    1968             : static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
    1969             : static inline struct task_group *task_group(struct task_struct *p)
    1970             : {
    1971             :         return NULL;
    1972             : }
    1973             : 
    1974             : #endif /* CONFIG_CGROUP_SCHED */
    1975             : 
    1976             : static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
    1977             : {
    1978         383 :         set_task_rq(p, cpu);
    1979             : #ifdef CONFIG_SMP
    1980             :         /*
    1981             :          * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
    1982             :          * successfully executed on another CPU. We must ensure that updates of
    1983             :          * per-task data have been completed by this moment.
    1984             :          */
    1985             :         smp_wmb();
    1986             :         WRITE_ONCE(task_thread_info(p)->cpu, cpu);
    1987             :         p->wake_cpu = cpu;
    1988             : #endif
    1989             : }
    1990             : 
    1991             : /*
    1992             :  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
    1993             :  */
    1994             : #ifdef CONFIG_SCHED_DEBUG
    1995             : # define const_debug __read_mostly
    1996             : #else
    1997             : # define const_debug const
    1998             : #endif
    1999             : 
    2000             : #define SCHED_FEAT(name, enabled)       \
    2001             :         __SCHED_FEAT_##name ,
    2002             : 
    2003             : enum {
    2004             : #include "features.h"
    2005             :         __SCHED_FEAT_NR,
    2006             : };
    2007             : 
    2008             : #undef SCHED_FEAT
    2009             : 
    2010             : #ifdef CONFIG_SCHED_DEBUG
    2011             : 
    2012             : /*
    2013             :  * To support run-time toggling of sched features, all the translation units
    2014             :  * (but core.c) reference the sysctl_sched_features defined in core.c.
    2015             :  */
    2016             : extern const_debug unsigned int sysctl_sched_features;
    2017             : 
    2018             : #ifdef CONFIG_JUMP_LABEL
    2019             : #define SCHED_FEAT(name, enabled)                                       \
    2020             : static __always_inline bool static_branch_##name(struct static_key *key) \
    2021             : {                                                                       \
    2022             :         return static_key_##enabled(key);                               \
    2023             : }
    2024             : 
    2025             : #include "features.h"
    2026             : #undef SCHED_FEAT
    2027             : 
    2028             : extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
    2029             : #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
    2030             : 
    2031             : #else /* !CONFIG_JUMP_LABEL */
    2032             : 
    2033             : #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    2034             : 
    2035             : #endif /* CONFIG_JUMP_LABEL */
    2036             : 
    2037             : #else /* !SCHED_DEBUG */
    2038             : 
    2039             : /*
    2040             :  * Each translation unit has its own copy of sysctl_sched_features to allow
    2041             :  * constants propagation at compile time and compiler optimization based on
    2042             :  * features default.
    2043             :  */
    2044             : #define SCHED_FEAT(name, enabled)       \
    2045             :         (1UL << __SCHED_FEAT_##name) * enabled |
    2046             : static const_debug __maybe_unused unsigned int sysctl_sched_features =
    2047             : #include "features.h"
    2048             :         0;
    2049             : #undef SCHED_FEAT
    2050             : 
    2051             : #define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    2052             : 
    2053             : #endif /* SCHED_DEBUG */
    2054             : 
    2055             : extern struct static_key_false sched_numa_balancing;
    2056             : extern struct static_key_false sched_schedstats;
    2057             : 
    2058             : static inline u64 global_rt_period(void)
    2059             : {
    2060           4 :         return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
    2061             : }
    2062             : 
    2063             : static inline u64 global_rt_runtime(void)
    2064             : {
    2065           4 :         if (sysctl_sched_rt_runtime < 0)
    2066             :                 return RUNTIME_INF;
    2067             : 
    2068           4 :         return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
    2069             : }
    2070             : 
    2071             : static inline int task_current(struct rq *rq, struct task_struct *p)
    2072             : {
    2073             :         return rq->curr == p;
    2074             : }
    2075             : 
    2076             : static inline int task_on_cpu(struct rq *rq, struct task_struct *p)
    2077             : {
    2078             : #ifdef CONFIG_SMP
    2079             :         return p->on_cpu;
    2080             : #else
    2081           0 :         return task_current(rq, p);
    2082             : #endif
    2083             : }
    2084             : 
    2085             : static inline int task_on_rq_queued(struct task_struct *p)
    2086             : {
    2087           0 :         return p->on_rq == TASK_ON_RQ_QUEUED;
    2088             : }
    2089             : 
    2090             : static inline int task_on_rq_migrating(struct task_struct *p)
    2091             : {
    2092        5653 :         return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
    2093             : }
    2094             : 
    2095             : /* Wake flags. The first three directly map to some SD flag value */
    2096             : #define WF_EXEC     0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
    2097             : #define WF_FORK     0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
    2098             : #define WF_TTWU     0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
    2099             : 
    2100             : #define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
    2101             : #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
    2102             : 
    2103             : #ifdef CONFIG_SMP
    2104             : static_assert(WF_EXEC == SD_BALANCE_EXEC);
    2105             : static_assert(WF_FORK == SD_BALANCE_FORK);
    2106             : static_assert(WF_TTWU == SD_BALANCE_WAKE);
    2107             : #endif
    2108             : 
    2109             : /*
    2110             :  * To aid in avoiding the subversion of "niceness" due to uneven distribution
    2111             :  * of tasks with abnormal "nice" values across CPUs the contribution that
    2112             :  * each task makes to its run queue's load is weighted according to its
    2113             :  * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
    2114             :  * scaled version of the new time slice allocation that they receive on time
    2115             :  * slice expiry etc.
    2116             :  */
    2117             : 
    2118             : #define WEIGHT_IDLEPRIO         3
    2119             : #define WMULT_IDLEPRIO          1431655765
    2120             : 
    2121             : extern const int                sched_prio_to_weight[40];
    2122             : extern const u32                sched_prio_to_wmult[40];
    2123             : 
    2124             : /*
    2125             :  * {de,en}queue flags:
    2126             :  *
    2127             :  * DEQUEUE_SLEEP  - task is no longer runnable
    2128             :  * ENQUEUE_WAKEUP - task just became runnable
    2129             :  *
    2130             :  * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
    2131             :  *                are in a known state which allows modification. Such pairs
    2132             :  *                should preserve as much state as possible.
    2133             :  *
    2134             :  * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
    2135             :  *        in the runqueue.
    2136             :  *
    2137             :  * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
    2138             :  * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
    2139             :  * ENQUEUE_MIGRATED  - the task was migrated during wakeup
    2140             :  *
    2141             :  */
    2142             : 
    2143             : #define DEQUEUE_SLEEP           0x01
    2144             : #define DEQUEUE_SAVE            0x02 /* Matches ENQUEUE_RESTORE */
    2145             : #define DEQUEUE_MOVE            0x04 /* Matches ENQUEUE_MOVE */
    2146             : #define DEQUEUE_NOCLOCK         0x08 /* Matches ENQUEUE_NOCLOCK */
    2147             : 
    2148             : #define ENQUEUE_WAKEUP          0x01
    2149             : #define ENQUEUE_RESTORE         0x02
    2150             : #define ENQUEUE_MOVE            0x04
    2151             : #define ENQUEUE_NOCLOCK         0x08
    2152             : 
    2153             : #define ENQUEUE_HEAD            0x10
    2154             : #define ENQUEUE_REPLENISH       0x20
    2155             : #ifdef CONFIG_SMP
    2156             : #define ENQUEUE_MIGRATED        0x40
    2157             : #else
    2158             : #define ENQUEUE_MIGRATED        0x00
    2159             : #endif
    2160             : 
    2161             : #define RETRY_TASK              ((void *)-1UL)
    2162             : 
    2163             : struct affinity_context {
    2164             :         const struct cpumask *new_mask;
    2165             :         struct cpumask *user_mask;
    2166             :         unsigned int flags;
    2167             : };
    2168             : 
    2169             : struct sched_class {
    2170             : 
    2171             : #ifdef CONFIG_UCLAMP_TASK
    2172             :         int uclamp_enabled;
    2173             : #endif
    2174             : 
    2175             :         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
    2176             :         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
    2177             :         void (*yield_task)   (struct rq *rq);
    2178             :         bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
    2179             : 
    2180             :         void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
    2181             : 
    2182             :         struct task_struct *(*pick_next_task)(struct rq *rq);
    2183             : 
    2184             :         void (*put_prev_task)(struct rq *rq, struct task_struct *p);
    2185             :         void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
    2186             : 
    2187             : #ifdef CONFIG_SMP
    2188             :         int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    2189             :         int  (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
    2190             : 
    2191             :         struct task_struct * (*pick_task)(struct rq *rq);
    2192             : 
    2193             :         void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
    2194             : 
    2195             :         void (*task_woken)(struct rq *this_rq, struct task_struct *task);
    2196             : 
    2197             :         void (*set_cpus_allowed)(struct task_struct *p, struct affinity_context *ctx);
    2198             : 
    2199             :         void (*rq_online)(struct rq *rq);
    2200             :         void (*rq_offline)(struct rq *rq);
    2201             : 
    2202             :         struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
    2203             : #endif
    2204             : 
    2205             :         void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
    2206             :         void (*task_fork)(struct task_struct *p);
    2207             :         void (*task_dead)(struct task_struct *p);
    2208             : 
    2209             :         /*
    2210             :          * The switched_from() call is allowed to drop rq->lock, therefore we
    2211             :          * cannot assume the switched_from/switched_to pair is serialized by
    2212             :          * rq->lock. They are however serialized by p->pi_lock.
    2213             :          */
    2214             :         void (*switched_from)(struct rq *this_rq, struct task_struct *task);
    2215             :         void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
    2216             :         void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
    2217             :                               int oldprio);
    2218             : 
    2219             :         unsigned int (*get_rr_interval)(struct rq *rq,
    2220             :                                         struct task_struct *task);
    2221             : 
    2222             :         void (*update_curr)(struct rq *rq);
    2223             : 
    2224             : #ifdef CONFIG_FAIR_GROUP_SCHED
    2225             :         void (*task_change_group)(struct task_struct *p);
    2226             : #endif
    2227             : 
    2228             : #ifdef CONFIG_SCHED_CORE
    2229             :         int (*task_is_throttled)(struct task_struct *p, int cpu);
    2230             : #endif
    2231             : };
    2232             : 
    2233        2517 : static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
    2234             : {
    2235        2517 :         WARN_ON_ONCE(rq->curr != prev);
    2236        2517 :         prev->sched_class->put_prev_task(rq, prev);
    2237        2517 : }
    2238             : 
    2239             : static inline void set_next_task(struct rq *rq, struct task_struct *next)
    2240             : {
    2241           4 :         next->sched_class->set_next_task(rq, next, false);
    2242             : }
    2243             : 
    2244             : 
    2245             : /*
    2246             :  * Helper to define a sched_class instance; each one is placed in a separate
    2247             :  * section which is ordered by the linker script:
    2248             :  *
    2249             :  *   include/asm-generic/vmlinux.lds.h
    2250             :  *
    2251             :  * *CAREFUL* they are laid out in *REVERSE* order!!!
    2252             :  *
    2253             :  * Also enforce alignment on the instance, not the type, to guarantee layout.
    2254             :  */
    2255             : #define DEFINE_SCHED_CLASS(name) \
    2256             : const struct sched_class name##_sched_class \
    2257             :         __aligned(__alignof__(struct sched_class)) \
    2258             :         __section("__" #name "_sched_class")
    2259             : 
    2260             : /* Defined in include/asm-generic/vmlinux.lds.h */
    2261             : extern struct sched_class __sched_class_highest[];
    2262             : extern struct sched_class __sched_class_lowest[];
    2263             : 
    2264             : #define for_class_range(class, _from, _to) \
    2265             :         for (class = (_from); class < (_to); class++)
    2266             : 
    2267             : #define for_each_class(class) \
    2268             :         for_class_range(class, __sched_class_highest, __sched_class_lowest)
    2269             : 
    2270             : #define sched_class_above(_a, _b)       ((_a) < (_b))
    2271             : 
    2272             : extern const struct sched_class stop_sched_class;
    2273             : extern const struct sched_class dl_sched_class;
    2274             : extern const struct sched_class rt_sched_class;
    2275             : extern const struct sched_class fair_sched_class;
    2276             : extern const struct sched_class idle_sched_class;
    2277             : 
    2278             : static inline bool sched_stop_runnable(struct rq *rq)
    2279             : {
    2280             :         return rq->stop && task_on_rq_queued(rq->stop);
    2281             : }
    2282             : 
    2283             : static inline bool sched_dl_runnable(struct rq *rq)
    2284             : {
    2285             :         return rq->dl.dl_nr_running > 0;
    2286             : }
    2287             : 
    2288             : static inline bool sched_rt_runnable(struct rq *rq)
    2289             : {
    2290             :         return rq->rt.rt_queued > 0;
    2291             : }
    2292             : 
    2293             : static inline bool sched_fair_runnable(struct rq *rq)
    2294             : {
    2295             :         return rq->cfs.nr_running > 0;
    2296             : }
    2297             : 
    2298             : extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    2299             : extern struct task_struct *pick_next_task_idle(struct rq *rq);
    2300             : 
    2301             : #define SCA_CHECK               0x01
    2302             : #define SCA_MIGRATE_DISABLE     0x02
    2303             : #define SCA_MIGRATE_ENABLE      0x04
    2304             : #define SCA_USER                0x08
    2305             : 
    2306             : #ifdef CONFIG_SMP
    2307             : 
    2308             : extern void update_group_capacity(struct sched_domain *sd, int cpu);
    2309             : 
    2310             : extern void trigger_load_balance(struct rq *rq);
    2311             : 
    2312             : extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
    2313             : 
    2314             : static inline struct task_struct *get_push_task(struct rq *rq)
    2315             : {
    2316             :         struct task_struct *p = rq->curr;
    2317             : 
    2318             :         lockdep_assert_rq_held(rq);
    2319             : 
    2320             :         if (rq->push_busy)
    2321             :                 return NULL;
    2322             : 
    2323             :         if (p->nr_cpus_allowed == 1)
    2324             :                 return NULL;
    2325             : 
    2326             :         if (p->migration_disabled)
    2327             :                 return NULL;
    2328             : 
    2329             :         rq->push_busy = true;
    2330             :         return get_task_struct(p);
    2331             : }
    2332             : 
    2333             : extern int push_cpu_stop(void *arg);
    2334             : 
    2335             : #endif
    2336             : 
    2337             : #ifdef CONFIG_CPU_IDLE
    2338             : static inline void idle_set_state(struct rq *rq,
    2339             :                                   struct cpuidle_state *idle_state)
    2340             : {
    2341             :         rq->idle_state = idle_state;
    2342             : }
    2343             : 
    2344             : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    2345             : {
    2346             :         SCHED_WARN_ON(!rcu_read_lock_held());
    2347             : 
    2348             :         return rq->idle_state;
    2349             : }
    2350             : #else
    2351             : static inline void idle_set_state(struct rq *rq,
    2352             :                                   struct cpuidle_state *idle_state)
    2353             : {
    2354             : }
    2355             : 
    2356             : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    2357             : {
    2358             :         return NULL;
    2359             : }
    2360             : #endif
    2361             : 
    2362             : extern void schedule_idle(void);
    2363             : 
    2364             : extern void sysrq_sched_debug_show(void);
    2365             : extern void sched_init_granularity(void);
    2366             : extern void update_max_interval(void);
    2367             : 
    2368             : extern void init_sched_dl_class(void);
    2369             : extern void init_sched_rt_class(void);
    2370             : extern void init_sched_fair_class(void);
    2371             : 
    2372             : extern void reweight_task(struct task_struct *p, int prio);
    2373             : 
    2374             : extern void resched_curr(struct rq *rq);
    2375             : extern void resched_cpu(int cpu);
    2376             : 
    2377             : extern struct rt_bandwidth def_rt_bandwidth;
    2378             : extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
    2379             : extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
    2380             : 
    2381             : extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
    2382             : extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
    2383             : extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
    2384             : 
    2385             : #define BW_SHIFT                20
    2386             : #define BW_UNIT                 (1 << BW_SHIFT)
    2387             : #define RATIO_SHIFT             8
    2388             : #define MAX_BW_BITS             (64 - BW_SHIFT)
    2389             : #define MAX_BW                  ((1ULL << MAX_BW_BITS) - 1)
    2390             : unsigned long to_ratio(u64 period, u64 runtime);
    2391             : 
    2392             : extern void init_entity_runnable_average(struct sched_entity *se);
    2393             : extern void post_init_entity_util_avg(struct task_struct *p);
    2394             : 
    2395             : #ifdef CONFIG_NO_HZ_FULL
    2396             : extern bool sched_can_stop_tick(struct rq *rq);
    2397             : extern int __init sched_tick_offload_init(void);
    2398             : 
    2399             : /*
    2400             :  * Tick may be needed by tasks in the runqueue depending on their policy and
    2401             :  * requirements. If tick is needed, lets send the target an IPI to kick it out of
    2402             :  * nohz mode if necessary.
    2403             :  */
    2404             : static inline void sched_update_tick_dependency(struct rq *rq)
    2405             : {
    2406             :         int cpu = cpu_of(rq);
    2407             : 
    2408             :         if (!tick_nohz_full_cpu(cpu))
    2409             :                 return;
    2410             : 
    2411             :         if (sched_can_stop_tick(rq))
    2412             :                 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
    2413             :         else
    2414             :                 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
    2415             : }
    2416             : #else
    2417             : static inline int sched_tick_offload_init(void) { return 0; }
    2418             : static inline void sched_update_tick_dependency(struct rq *rq) { }
    2419             : #endif
    2420             : 
    2421             : static inline void add_nr_running(struct rq *rq, unsigned count)
    2422             : {
    2423        2446 :         unsigned prev_nr = rq->nr_running;
    2424             : 
    2425        2446 :         rq->nr_running = prev_nr + count;
    2426             :         if (trace_sched_update_nr_running_tp_enabled()) {
    2427             :                 call_trace_sched_update_nr_running(rq, count);
    2428             :         }
    2429             : 
    2430             : #ifdef CONFIG_SMP
    2431             :         if (prev_nr < 2 && rq->nr_running >= 2) {
    2432             :                 if (!READ_ONCE(rq->rd->overload))
    2433             :                         WRITE_ONCE(rq->rd->overload, 1);
    2434             :         }
    2435             : #endif
    2436             : 
    2437        2446 :         sched_update_tick_dependency(rq);
    2438             : }
    2439             : 
    2440             : static inline void sub_nr_running(struct rq *rq, unsigned count)
    2441             : {
    2442        2444 :         rq->nr_running -= count;
    2443             :         if (trace_sched_update_nr_running_tp_enabled()) {
    2444             :                 call_trace_sched_update_nr_running(rq, -count);
    2445             :         }
    2446             : 
    2447             :         /* Check if we still need preemption */
    2448        2444 :         sched_update_tick_dependency(rq);
    2449             : }
    2450             : 
    2451             : extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
    2452             : extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
    2453             : 
    2454             : extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
    2455             : 
    2456             : #ifdef CONFIG_PREEMPT_RT
    2457             : #define SCHED_NR_MIGRATE_BREAK 8
    2458             : #else
    2459             : #define SCHED_NR_MIGRATE_BREAK 32
    2460             : #endif
    2461             : 
    2462             : extern const_debug unsigned int sysctl_sched_nr_migrate;
    2463             : extern const_debug unsigned int sysctl_sched_migration_cost;
    2464             : 
    2465             : #ifdef CONFIG_SCHED_DEBUG
    2466             : extern unsigned int sysctl_sched_latency;
    2467             : extern unsigned int sysctl_sched_min_granularity;
    2468             : extern unsigned int sysctl_sched_idle_min_granularity;
    2469             : extern unsigned int sysctl_sched_wakeup_granularity;
    2470             : extern int sysctl_resched_latency_warn_ms;
    2471             : extern int sysctl_resched_latency_warn_once;
    2472             : 
    2473             : extern unsigned int sysctl_sched_tunable_scaling;
    2474             : 
    2475             : extern unsigned int sysctl_numa_balancing_scan_delay;
    2476             : extern unsigned int sysctl_numa_balancing_scan_period_min;
    2477             : extern unsigned int sysctl_numa_balancing_scan_period_max;
    2478             : extern unsigned int sysctl_numa_balancing_scan_size;
    2479             : extern unsigned int sysctl_numa_balancing_hot_threshold;
    2480             : #endif
    2481             : 
    2482             : #ifdef CONFIG_SCHED_HRTICK
    2483             : 
    2484             : /*
    2485             :  * Use hrtick when:
    2486             :  *  - enabled by features
    2487             :  *  - hrtimer is actually high res
    2488             :  */
    2489             : static inline int hrtick_enabled(struct rq *rq)
    2490             : {
    2491             :         if (!cpu_active(cpu_of(rq)))
    2492             :                 return 0;
    2493             :         return hrtimer_is_hres_active(&rq->hrtick_timer);
    2494             : }
    2495             : 
    2496             : static inline int hrtick_enabled_fair(struct rq *rq)
    2497             : {
    2498             :         if (!sched_feat(HRTICK))
    2499             :                 return 0;
    2500             :         return hrtick_enabled(rq);
    2501             : }
    2502             : 
    2503             : static inline int hrtick_enabled_dl(struct rq *rq)
    2504             : {
    2505             :         if (!sched_feat(HRTICK_DL))
    2506             :                 return 0;
    2507             :         return hrtick_enabled(rq);
    2508             : }
    2509             : 
    2510             : void hrtick_start(struct rq *rq, u64 delay);
    2511             : 
    2512             : #else
    2513             : 
    2514             : static inline int hrtick_enabled_fair(struct rq *rq)
    2515             : {
    2516             :         return 0;
    2517             : }
    2518             : 
    2519             : static inline int hrtick_enabled_dl(struct rq *rq)
    2520             : {
    2521             :         return 0;
    2522             : }
    2523             : 
    2524             : static inline int hrtick_enabled(struct rq *rq)
    2525             : {
    2526             :         return 0;
    2527             : }
    2528             : 
    2529             : #endif /* CONFIG_SCHED_HRTICK */
    2530             : 
    2531             : #ifndef arch_scale_freq_tick
    2532             : static __always_inline
    2533             : void arch_scale_freq_tick(void)
    2534             : {
    2535             : }
    2536             : #endif
    2537             : 
    2538             : #ifndef arch_scale_freq_capacity
    2539             : /**
    2540             :  * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
    2541             :  * @cpu: the CPU in question.
    2542             :  *
    2543             :  * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
    2544             :  *
    2545             :  *     f_curr
    2546             :  *     ------ * SCHED_CAPACITY_SCALE
    2547             :  *     f_max
    2548             :  */
    2549             : static __always_inline
    2550             : unsigned long arch_scale_freq_capacity(int cpu)
    2551             : {
    2552             :         return SCHED_CAPACITY_SCALE;
    2553             : }
    2554             : #endif
    2555             : 
    2556             : #ifdef CONFIG_SCHED_DEBUG
    2557             : /*
    2558             :  * In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
    2559             :  * acquire rq lock instead of rq_lock(). So at the end of these two functions
    2560             :  * we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
    2561             :  * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
    2562             :  */
    2563             : static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
    2564             : {
    2565             :         rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    2566             :         /* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */
    2567             : #ifdef CONFIG_SMP
    2568             :         rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    2569             : #endif
    2570             : }
    2571             : #else
    2572             : static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
    2573             : #endif
    2574             : 
    2575             : #ifdef CONFIG_SMP
    2576             : 
    2577             : static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
    2578             : {
    2579             : #ifdef CONFIG_SCHED_CORE
    2580             :         /*
    2581             :          * In order to not have {0,2},{1,3} turn into into an AB-BA,
    2582             :          * order by core-id first and cpu-id second.
    2583             :          *
    2584             :          * Notably:
    2585             :          *
    2586             :          *      double_rq_lock(0,3); will take core-0, core-1 lock
    2587             :          *      double_rq_lock(1,2); will take core-1, core-0 lock
    2588             :          *
    2589             :          * when only cpu-id is considered.
    2590             :          */
    2591             :         if (rq1->core->cpu < rq2->core->cpu)
    2592             :                 return true;
    2593             :         if (rq1->core->cpu > rq2->core->cpu)
    2594             :                 return false;
    2595             : 
    2596             :         /*
    2597             :          * __sched_core_flip() relies on SMT having cpu-id lock order.
    2598             :          */
    2599             : #endif
    2600             :         return rq1->cpu < rq2->cpu;
    2601             : }
    2602             : 
    2603             : extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
    2604             : 
    2605             : #ifdef CONFIG_PREEMPTION
    2606             : 
    2607             : /*
    2608             :  * fair double_lock_balance: Safely acquires both rq->locks in a fair
    2609             :  * way at the expense of forcing extra atomic operations in all
    2610             :  * invocations.  This assures that the double_lock is acquired using the
    2611             :  * same underlying policy as the spinlock_t on this architecture, which
    2612             :  * reduces latency compared to the unfair variant below.  However, it
    2613             :  * also adds more overhead and therefore may reduce throughput.
    2614             :  */
    2615             : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2616             :         __releases(this_rq->lock)
    2617             :         __acquires(busiest->lock)
    2618             :         __acquires(this_rq->lock)
    2619             : {
    2620             :         raw_spin_rq_unlock(this_rq);
    2621             :         double_rq_lock(this_rq, busiest);
    2622             : 
    2623             :         return 1;
    2624             : }
    2625             : 
    2626             : #else
    2627             : /*
    2628             :  * Unfair double_lock_balance: Optimizes throughput at the expense of
    2629             :  * latency by eliminating extra atomic operations when the locks are
    2630             :  * already in proper order on entry.  This favors lower CPU-ids and will
    2631             :  * grant the double lock to lower CPUs over higher ids under contention,
    2632             :  * regardless of entry order into the function.
    2633             :  */
    2634             : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2635             :         __releases(this_rq->lock)
    2636             :         __acquires(busiest->lock)
    2637             :         __acquires(this_rq->lock)
    2638             : {
    2639             :         if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
    2640             :             likely(raw_spin_rq_trylock(busiest))) {
    2641             :                 double_rq_clock_clear_update(this_rq, busiest);
    2642             :                 return 0;
    2643             :         }
    2644             : 
    2645             :         if (rq_order_less(this_rq, busiest)) {
    2646             :                 raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
    2647             :                 double_rq_clock_clear_update(this_rq, busiest);
    2648             :                 return 0;
    2649             :         }
    2650             : 
    2651             :         raw_spin_rq_unlock(this_rq);
    2652             :         double_rq_lock(this_rq, busiest);
    2653             : 
    2654             :         return 1;
    2655             : }
    2656             : 
    2657             : #endif /* CONFIG_PREEMPTION */
    2658             : 
    2659             : /*
    2660             :  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
    2661             :  */
    2662             : static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2663             : {
    2664             :         lockdep_assert_irqs_disabled();
    2665             : 
    2666             :         return _double_lock_balance(this_rq, busiest);
    2667             : }
    2668             : 
    2669             : static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
    2670             :         __releases(busiest->lock)
    2671             : {
    2672             :         if (__rq_lockp(this_rq) != __rq_lockp(busiest))
    2673             :                 raw_spin_rq_unlock(busiest);
    2674             :         lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_);
    2675             : }
    2676             : 
    2677             : static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
    2678             : {
    2679             :         if (l1 > l2)
    2680             :                 swap(l1, l2);
    2681             : 
    2682             :         spin_lock(l1);
    2683             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2684             : }
    2685             : 
    2686             : static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
    2687             : {
    2688             :         if (l1 > l2)
    2689             :                 swap(l1, l2);
    2690             : 
    2691             :         spin_lock_irq(l1);
    2692             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2693             : }
    2694             : 
    2695             : static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
    2696             : {
    2697             :         if (l1 > l2)
    2698             :                 swap(l1, l2);
    2699             : 
    2700             :         raw_spin_lock(l1);
    2701             :         raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2702             : }
    2703             : 
    2704             : /*
    2705             :  * double_rq_unlock - safely unlock two runqueues
    2706             :  *
    2707             :  * Note this does not restore interrupts like task_rq_unlock,
    2708             :  * you need to do so manually after calling.
    2709             :  */
    2710             : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2711             :         __releases(rq1->lock)
    2712             :         __releases(rq2->lock)
    2713             : {
    2714             :         if (__rq_lockp(rq1) != __rq_lockp(rq2))
    2715             :                 raw_spin_rq_unlock(rq2);
    2716             :         else
    2717             :                 __release(rq2->lock);
    2718             :         raw_spin_rq_unlock(rq1);
    2719             : }
    2720             : 
    2721             : extern void set_rq_online (struct rq *rq);
    2722             : extern void set_rq_offline(struct rq *rq);
    2723             : extern bool sched_smp_initialized;
    2724             : 
    2725             : #else /* CONFIG_SMP */
    2726             : 
    2727             : /*
    2728             :  * double_rq_lock - safely lock two runqueues
    2729             :  *
    2730             :  * Note this does not disable interrupts like task_rq_lock,
    2731             :  * you need to do so manually before calling.
    2732             :  */
    2733           0 : static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
    2734             :         __acquires(rq1->lock)
    2735             :         __acquires(rq2->lock)
    2736             : {
    2737           0 :         WARN_ON_ONCE(!irqs_disabled());
    2738           0 :         WARN_ON_ONCE(rq1 != rq2);
    2739           0 :         raw_spin_rq_lock(rq1);
    2740             :         __acquire(rq2->lock);        /* Fake it out ;) */
    2741           0 :         double_rq_clock_clear_update(rq1, rq2);
    2742           0 : }
    2743             : 
    2744             : /*
    2745             :  * double_rq_unlock - safely unlock two runqueues
    2746             :  *
    2747             :  * Note this does not restore interrupts like task_rq_unlock,
    2748             :  * you need to do so manually after calling.
    2749             :  */
    2750           0 : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2751             :         __releases(rq1->lock)
    2752             :         __releases(rq2->lock)
    2753             : {
    2754           0 :         WARN_ON_ONCE(rq1 != rq2);
    2755           0 :         raw_spin_rq_unlock(rq1);
    2756             :         __release(rq2->lock);
    2757           0 : }
    2758             : 
    2759             : #endif
    2760             : 
    2761             : extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
    2762             : extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
    2763             : 
    2764             : #ifdef  CONFIG_SCHED_DEBUG
    2765             : extern bool sched_debug_verbose;
    2766             : 
    2767             : extern void print_cfs_stats(struct seq_file *m, int cpu);
    2768             : extern void print_rt_stats(struct seq_file *m, int cpu);
    2769             : extern void print_dl_stats(struct seq_file *m, int cpu);
    2770             : extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
    2771             : extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
    2772             : extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
    2773             : 
    2774             : extern void resched_latency_warn(int cpu, u64 latency);
    2775             : #ifdef CONFIG_NUMA_BALANCING
    2776             : extern void
    2777             : show_numa_stats(struct task_struct *p, struct seq_file *m);
    2778             : extern void
    2779             : print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
    2780             :         unsigned long tpf, unsigned long gsf, unsigned long gpf);
    2781             : #endif /* CONFIG_NUMA_BALANCING */
    2782             : #else
    2783             : static inline void resched_latency_warn(int cpu, u64 latency) {}
    2784             : #endif /* CONFIG_SCHED_DEBUG */
    2785             : 
    2786             : extern void init_cfs_rq(struct cfs_rq *cfs_rq);
    2787             : extern void init_rt_rq(struct rt_rq *rt_rq);
    2788             : extern void init_dl_rq(struct dl_rq *dl_rq);
    2789             : 
    2790             : extern void cfs_bandwidth_usage_inc(void);
    2791             : extern void cfs_bandwidth_usage_dec(void);
    2792             : 
    2793             : #ifdef CONFIG_NO_HZ_COMMON
    2794             : #define NOHZ_BALANCE_KICK_BIT   0
    2795             : #define NOHZ_STATS_KICK_BIT     1
    2796             : #define NOHZ_NEWILB_KICK_BIT    2
    2797             : #define NOHZ_NEXT_KICK_BIT      3
    2798             : 
    2799             : /* Run rebalance_domains() */
    2800             : #define NOHZ_BALANCE_KICK       BIT(NOHZ_BALANCE_KICK_BIT)
    2801             : /* Update blocked load */
    2802             : #define NOHZ_STATS_KICK         BIT(NOHZ_STATS_KICK_BIT)
    2803             : /* Update blocked load when entering idle */
    2804             : #define NOHZ_NEWILB_KICK        BIT(NOHZ_NEWILB_KICK_BIT)
    2805             : /* Update nohz.next_balance */
    2806             : #define NOHZ_NEXT_KICK          BIT(NOHZ_NEXT_KICK_BIT)
    2807             : 
    2808             : #define NOHZ_KICK_MASK  (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
    2809             : 
    2810             : #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
    2811             : 
    2812             : extern void nohz_balance_exit_idle(struct rq *rq);
    2813             : #else
    2814             : static inline void nohz_balance_exit_idle(struct rq *rq) { }
    2815             : #endif
    2816             : 
    2817             : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
    2818             : extern void nohz_run_idle_balance(int cpu);
    2819             : #else
    2820             : static inline void nohz_run_idle_balance(int cpu) { }
    2821             : #endif
    2822             : 
    2823             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    2824             : struct irqtime {
    2825             :         u64                     total;
    2826             :         u64                     tick_delta;
    2827             :         u64                     irq_start_time;
    2828             :         struct u64_stats_sync   sync;
    2829             : };
    2830             : 
    2831             : DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
    2832             : 
    2833             : /*
    2834             :  * Returns the irqtime minus the softirq time computed by ksoftirqd.
    2835             :  * Otherwise ksoftirqd's sum_exec_runtime is subtracted its own runtime
    2836             :  * and never move forward.
    2837             :  */
    2838             : static inline u64 irq_time_read(int cpu)
    2839             : {
    2840             :         struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
    2841             :         unsigned int seq;
    2842             :         u64 total;
    2843             : 
    2844             :         do {
    2845             :                 seq = __u64_stats_fetch_begin(&irqtime->sync);
    2846             :                 total = irqtime->total;
    2847             :         } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
    2848             : 
    2849             :         return total;
    2850             : }
    2851             : #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
    2852             : 
    2853             : #ifdef CONFIG_CPU_FREQ
    2854             : DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
    2855             : 
    2856             : /**
    2857             :  * cpufreq_update_util - Take a note about CPU utilization changes.
    2858             :  * @rq: Runqueue to carry out the update for.
    2859             :  * @flags: Update reason flags.
    2860             :  *
    2861             :  * This function is called by the scheduler on the CPU whose utilization is
    2862             :  * being updated.
    2863             :  *
    2864             :  * It can only be called from RCU-sched read-side critical sections.
    2865             :  *
    2866             :  * The way cpufreq is currently arranged requires it to evaluate the CPU
    2867             :  * performance state (frequency/voltage) on a regular basis to prevent it from
    2868             :  * being stuck in a completely inadequate performance level for too long.
    2869             :  * That is not guaranteed to happen if the updates are only triggered from CFS
    2870             :  * and DL, though, because they may not be coming in if only RT tasks are
    2871             :  * active all the time (or there are RT tasks only).
    2872             :  *
    2873             :  * As a workaround for that issue, this function is called periodically by the
    2874             :  * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
    2875             :  * but that really is a band-aid.  Going forward it should be replaced with
    2876             :  * solutions targeted more specifically at RT tasks.
    2877             :  */
    2878             : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
    2879             : {
    2880             :         struct update_util_data *data;
    2881             : 
    2882             :         data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
    2883             :                                                   cpu_of(rq)));
    2884             :         if (data)
    2885             :                 data->func(data, rq_clock(rq), flags);
    2886             : }
    2887             : #else
    2888             : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
    2889             : #endif /* CONFIG_CPU_FREQ */
    2890             : 
    2891             : #ifdef arch_scale_freq_capacity
    2892             : # ifndef arch_scale_freq_invariant
    2893             : #  define arch_scale_freq_invariant()   true
    2894             : # endif
    2895             : #else
    2896             : # define arch_scale_freq_invariant()    false
    2897             : #endif
    2898             : 
    2899             : #ifdef CONFIG_SMP
    2900             : static inline unsigned long capacity_orig_of(int cpu)
    2901             : {
    2902             :         return cpu_rq(cpu)->cpu_capacity_orig;
    2903             : }
    2904             : 
    2905             : /**
    2906             :  * enum cpu_util_type - CPU utilization type
    2907             :  * @FREQUENCY_UTIL:     Utilization used to select frequency
    2908             :  * @ENERGY_UTIL:        Utilization used during energy calculation
    2909             :  *
    2910             :  * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
    2911             :  * need to be aggregated differently depending on the usage made of them. This
    2912             :  * enum is used within effective_cpu_util() to differentiate the types of
    2913             :  * utilization expected by the callers, and adjust the aggregation accordingly.
    2914             :  */
    2915             : enum cpu_util_type {
    2916             :         FREQUENCY_UTIL,
    2917             :         ENERGY_UTIL,
    2918             : };
    2919             : 
    2920             : unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
    2921             :                                  enum cpu_util_type type,
    2922             :                                  struct task_struct *p);
    2923             : 
    2924             : /*
    2925             :  * Verify the fitness of task @p to run on @cpu taking into account the
    2926             :  * CPU original capacity and the runtime/deadline ratio of the task.
    2927             :  *
    2928             :  * The function will return true if the original capacity of @cpu is
    2929             :  * greater than or equal to task's deadline density right shifted by
    2930             :  * (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
    2931             :  */
    2932             : static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
    2933             : {
    2934             :         unsigned long cap = arch_scale_cpu_capacity(cpu);
    2935             : 
    2936             :         return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT);
    2937             : }
    2938             : 
    2939             : static inline unsigned long cpu_bw_dl(struct rq *rq)
    2940             : {
    2941             :         return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
    2942             : }
    2943             : 
    2944             : static inline unsigned long cpu_util_dl(struct rq *rq)
    2945             : {
    2946             :         return READ_ONCE(rq->avg_dl.util_avg);
    2947             : }
    2948             : 
    2949             : /**
    2950             :  * cpu_util_cfs() - Estimates the amount of CPU capacity used by CFS tasks.
    2951             :  * @cpu: the CPU to get the utilization for.
    2952             :  *
    2953             :  * The unit of the return value must be the same as the one of CPU capacity
    2954             :  * so that CPU utilization can be compared with CPU capacity.
    2955             :  *
    2956             :  * CPU utilization is the sum of running time of runnable tasks plus the
    2957             :  * recent utilization of currently non-runnable tasks on that CPU.
    2958             :  * It represents the amount of CPU capacity currently used by CFS tasks in
    2959             :  * the range [0..max CPU capacity] with max CPU capacity being the CPU
    2960             :  * capacity at f_max.
    2961             :  *
    2962             :  * The estimated CPU utilization is defined as the maximum between CPU
    2963             :  * utilization and sum of the estimated utilization of the currently
    2964             :  * runnable tasks on that CPU. It preserves a utilization "snapshot" of
    2965             :  * previously-executed tasks, which helps better deduce how busy a CPU will
    2966             :  * be when a long-sleeping task wakes up. The contribution to CPU utilization
    2967             :  * of such a task would be significantly decayed at this point of time.
    2968             :  *
    2969             :  * CPU utilization can be higher than the current CPU capacity
    2970             :  * (f_curr/f_max * max CPU capacity) or even the max CPU capacity because
    2971             :  * of rounding errors as well as task migrations or wakeups of new tasks.
    2972             :  * CPU utilization has to be capped to fit into the [0..max CPU capacity]
    2973             :  * range. Otherwise a group of CPUs (CPU0 util = 121% + CPU1 util = 80%)
    2974             :  * could be seen as over-utilized even though CPU1 has 20% of spare CPU
    2975             :  * capacity. CPU utilization is allowed to overshoot current CPU capacity
    2976             :  * though since this is useful for predicting the CPU capacity required
    2977             :  * after task migrations (scheduler-driven DVFS).
    2978             :  *
    2979             :  * Return: (Estimated) utilization for the specified CPU.
    2980             :  */
    2981             : static inline unsigned long cpu_util_cfs(int cpu)
    2982             : {
    2983             :         struct cfs_rq *cfs_rq;
    2984             :         unsigned long util;
    2985             : 
    2986             :         cfs_rq = &cpu_rq(cpu)->cfs;
    2987             :         util = READ_ONCE(cfs_rq->avg.util_avg);
    2988             : 
    2989             :         if (sched_feat(UTIL_EST)) {
    2990             :                 util = max_t(unsigned long, util,
    2991             :                              READ_ONCE(cfs_rq->avg.util_est.enqueued));
    2992             :         }
    2993             : 
    2994             :         return min(util, capacity_orig_of(cpu));
    2995             : }
    2996             : 
    2997             : static inline unsigned long cpu_util_rt(struct rq *rq)
    2998             : {
    2999             :         return READ_ONCE(rq->avg_rt.util_avg);
    3000             : }
    3001             : #endif
    3002             : 
    3003             : #ifdef CONFIG_UCLAMP_TASK
    3004             : unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
    3005             : 
    3006             : static inline unsigned long uclamp_rq_get(struct rq *rq,
    3007             :                                           enum uclamp_id clamp_id)
    3008             : {
    3009             :         return READ_ONCE(rq->uclamp[clamp_id].value);
    3010             : }
    3011             : 
    3012             : static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
    3013             :                                  unsigned int value)
    3014             : {
    3015             :         WRITE_ONCE(rq->uclamp[clamp_id].value, value);
    3016             : }
    3017             : 
    3018             : static inline bool uclamp_rq_is_idle(struct rq *rq)
    3019             : {
    3020             :         return rq->uclamp_flags & UCLAMP_FLAG_IDLE;
    3021             : }
    3022             : 
    3023             : /**
    3024             :  * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
    3025             :  * @rq:         The rq to clamp against. Must not be NULL.
    3026             :  * @util:       The util value to clamp.
    3027             :  * @p:          The task to clamp against. Can be NULL if you want to clamp
    3028             :  *              against @rq only.
    3029             :  *
    3030             :  * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
    3031             :  *
    3032             :  * If sched_uclamp_used static key is disabled, then just return the util
    3033             :  * without any clamping since uclamp aggregation at the rq level in the fast
    3034             :  * path is disabled, rendering this operation a NOP.
    3035             :  *
    3036             :  * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
    3037             :  * will return the correct effective uclamp value of the task even if the
    3038             :  * static key is disabled.
    3039             :  */
    3040             : static __always_inline
    3041             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    3042             :                                   struct task_struct *p)
    3043             : {
    3044             :         unsigned long min_util = 0;
    3045             :         unsigned long max_util = 0;
    3046             : 
    3047             :         if (!static_branch_likely(&sched_uclamp_used))
    3048             :                 return util;
    3049             : 
    3050             :         if (p) {
    3051             :                 min_util = uclamp_eff_value(p, UCLAMP_MIN);
    3052             :                 max_util = uclamp_eff_value(p, UCLAMP_MAX);
    3053             : 
    3054             :                 /*
    3055             :                  * Ignore last runnable task's max clamp, as this task will
    3056             :                  * reset it. Similarly, no need to read the rq's min clamp.
    3057             :                  */
    3058             :                 if (uclamp_rq_is_idle(rq))
    3059             :                         goto out;
    3060             :         }
    3061             : 
    3062             :         min_util = max_t(unsigned long, min_util, uclamp_rq_get(rq, UCLAMP_MIN));
    3063             :         max_util = max_t(unsigned long, max_util, uclamp_rq_get(rq, UCLAMP_MAX));
    3064             : out:
    3065             :         /*
    3066             :          * Since CPU's {min,max}_util clamps are MAX aggregated considering
    3067             :          * RUNNABLE tasks with _different_ clamps, we can end up with an
    3068             :          * inversion. Fix it now when the clamps are applied.
    3069             :          */
    3070             :         if (unlikely(min_util >= max_util))
    3071             :                 return min_util;
    3072             : 
    3073             :         return clamp(util, min_util, max_util);
    3074             : }
    3075             : 
    3076             : /* Is the rq being capped/throttled by uclamp_max? */
    3077             : static inline bool uclamp_rq_is_capped(struct rq *rq)
    3078             : {
    3079             :         unsigned long rq_util;
    3080             :         unsigned long max_util;
    3081             : 
    3082             :         if (!static_branch_likely(&sched_uclamp_used))
    3083             :                 return false;
    3084             : 
    3085             :         rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
    3086             :         max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
    3087             : 
    3088             :         return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
    3089             : }
    3090             : 
    3091             : /*
    3092             :  * When uclamp is compiled in, the aggregation at rq level is 'turned off'
    3093             :  * by default in the fast path and only gets turned on once userspace performs
    3094             :  * an operation that requires it.
    3095             :  *
    3096             :  * Returns true if userspace opted-in to use uclamp and aggregation at rq level
    3097             :  * hence is active.
    3098             :  */
    3099             : static inline bool uclamp_is_used(void)
    3100             : {
    3101             :         return static_branch_likely(&sched_uclamp_used);
    3102             : }
    3103             : #else /* CONFIG_UCLAMP_TASK */
    3104             : static inline unsigned long uclamp_eff_value(struct task_struct *p,
    3105             :                                              enum uclamp_id clamp_id)
    3106             : {
    3107             :         if (clamp_id == UCLAMP_MIN)
    3108             :                 return 0;
    3109             : 
    3110             :         return SCHED_CAPACITY_SCALE;
    3111             : }
    3112             : 
    3113             : static inline
    3114             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    3115             :                                   struct task_struct *p)
    3116             : {
    3117             :         return util;
    3118             : }
    3119             : 
    3120             : static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
    3121             : 
    3122             : static inline bool uclamp_is_used(void)
    3123             : {
    3124             :         return false;
    3125             : }
    3126             : 
    3127             : static inline unsigned long uclamp_rq_get(struct rq *rq,
    3128             :                                           enum uclamp_id clamp_id)
    3129             : {
    3130             :         if (clamp_id == UCLAMP_MIN)
    3131             :                 return 0;
    3132             : 
    3133             :         return SCHED_CAPACITY_SCALE;
    3134             : }
    3135             : 
    3136             : static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
    3137             :                                  unsigned int value)
    3138             : {
    3139             : }
    3140             : 
    3141             : static inline bool uclamp_rq_is_idle(struct rq *rq)
    3142             : {
    3143             :         return false;
    3144             : }
    3145             : #endif /* CONFIG_UCLAMP_TASK */
    3146             : 
    3147             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
    3148             : static inline unsigned long cpu_util_irq(struct rq *rq)
    3149             : {
    3150             :         return rq->avg_irq.util_avg;
    3151             : }
    3152             : 
    3153             : static inline
    3154             : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    3155             : {
    3156             :         util *= (max - irq);
    3157             :         util /= max;
    3158             : 
    3159             :         return util;
    3160             : 
    3161             : }
    3162             : #else
    3163             : static inline unsigned long cpu_util_irq(struct rq *rq)
    3164             : {
    3165             :         return 0;
    3166             : }
    3167             : 
    3168             : static inline
    3169             : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    3170             : {
    3171             :         return util;
    3172             : }
    3173             : #endif
    3174             : 
    3175             : #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
    3176             : 
    3177             : #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
    3178             : 
    3179             : DECLARE_STATIC_KEY_FALSE(sched_energy_present);
    3180             : 
    3181             : static inline bool sched_energy_enabled(void)
    3182             : {
    3183             :         return static_branch_unlikely(&sched_energy_present);
    3184             : }
    3185             : 
    3186             : #else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
    3187             : 
    3188             : #define perf_domain_span(pd) NULL
    3189             : static inline bool sched_energy_enabled(void) { return false; }
    3190             : 
    3191             : #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
    3192             : 
    3193             : #ifdef CONFIG_MEMBARRIER
    3194             : /*
    3195             :  * The scheduler provides memory barriers required by membarrier between:
    3196             :  * - prior user-space memory accesses and store to rq->membarrier_state,
    3197             :  * - store to rq->membarrier_state and following user-space memory accesses.
    3198             :  * In the same way it provides those guarantees around store to rq->curr.
    3199             :  */
    3200             : static inline void membarrier_switch_mm(struct rq *rq,
    3201             :                                         struct mm_struct *prev_mm,
    3202             :                                         struct mm_struct *next_mm)
    3203             : {
    3204             :         int membarrier_state;
    3205             : 
    3206           0 :         if (prev_mm == next_mm)
    3207             :                 return;
    3208             : 
    3209           0 :         membarrier_state = atomic_read(&next_mm->membarrier_state);
    3210           0 :         if (READ_ONCE(rq->membarrier_state) == membarrier_state)
    3211             :                 return;
    3212             : 
    3213           0 :         WRITE_ONCE(rq->membarrier_state, membarrier_state);
    3214             : }
    3215             : #else
    3216             : static inline void membarrier_switch_mm(struct rq *rq,
    3217             :                                         struct mm_struct *prev_mm,
    3218             :                                         struct mm_struct *next_mm)
    3219             : {
    3220             : }
    3221             : #endif
    3222             : 
    3223             : #ifdef CONFIG_SMP
    3224             : static inline bool is_per_cpu_kthread(struct task_struct *p)
    3225             : {
    3226             :         if (!(p->flags & PF_KTHREAD))
    3227             :                 return false;
    3228             : 
    3229             :         if (p->nr_cpus_allowed != 1)
    3230             :                 return false;
    3231             : 
    3232             :         return true;
    3233             : }
    3234             : #endif
    3235             : 
    3236             : extern void swake_up_all_locked(struct swait_queue_head *q);
    3237             : extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
    3238             : 
    3239             : #ifdef CONFIG_PREEMPT_DYNAMIC
    3240             : extern int preempt_dynamic_mode;
    3241             : extern int sched_dynamic_mode(const char *str);
    3242             : extern void sched_dynamic_update(int mode);
    3243             : #endif
    3244             : 
    3245             : static inline void update_current_exec_runtime(struct task_struct *curr,
    3246             :                                                 u64 now, u64 delta_exec)
    3247             : {
    3248           0 :         curr->se.sum_exec_runtime += delta_exec;
    3249           0 :         account_group_exec_runtime(curr, delta_exec);
    3250             : 
    3251           0 :         curr->se.exec_start = now;
    3252           0 :         cgroup_account_cputime(curr, delta_exec);
    3253             : }
    3254             : 
    3255             : #ifdef CONFIG_SCHED_MM_CID
    3256             : 
    3257             : #define SCHED_MM_CID_PERIOD_NS  (100ULL * 1000000)      /* 100ms */
    3258             : #define MM_CID_SCAN_DELAY       100                     /* 100ms */
    3259             : 
    3260             : extern raw_spinlock_t cid_lock;
    3261             : extern int use_cid_lock;
    3262             : 
    3263             : extern void sched_mm_cid_migrate_from(struct task_struct *t);
    3264             : extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t);
    3265             : extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
    3266             : extern void init_sched_mm_cid(struct task_struct *t);
    3267             : 
    3268             : static inline void __mm_cid_put(struct mm_struct *mm, int cid)
    3269             : {
    3270             :         if (cid < 0)
    3271             :                 return;
    3272             :         cpumask_clear_cpu(cid, mm_cidmask(mm));
    3273             : }
    3274             : 
    3275             : /*
    3276             :  * The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
    3277             :  * the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
    3278             :  * be held to transition to other states.
    3279             :  *
    3280             :  * State transitions synchronized with cmpxchg or try_cmpxchg need to be
    3281             :  * consistent across cpus, which prevents use of this_cpu_cmpxchg.
    3282             :  */
    3283             : static inline void mm_cid_put_lazy(struct task_struct *t)
    3284             : {
    3285             :         struct mm_struct *mm = t->mm;
    3286             :         struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
    3287             :         int cid;
    3288             : 
    3289             :         lockdep_assert_irqs_disabled();
    3290             :         cid = __this_cpu_read(pcpu_cid->cid);
    3291             :         if (!mm_cid_is_lazy_put(cid) ||
    3292             :             !try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
    3293             :                 return;
    3294             :         __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
    3295             : }
    3296             : 
    3297             : static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
    3298             : {
    3299             :         struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
    3300             :         int cid, res;
    3301             : 
    3302             :         lockdep_assert_irqs_disabled();
    3303             :         cid = __this_cpu_read(pcpu_cid->cid);
    3304             :         for (;;) {
    3305             :                 if (mm_cid_is_unset(cid))
    3306             :                         return MM_CID_UNSET;
    3307             :                 /*
    3308             :                  * Attempt transition from valid or lazy-put to unset.
    3309             :                  */
    3310             :                 res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
    3311             :                 if (res == cid)
    3312             :                         break;
    3313             :                 cid = res;
    3314             :         }
    3315             :         return cid;
    3316             : }
    3317             : 
    3318             : static inline void mm_cid_put(struct mm_struct *mm)
    3319             : {
    3320             :         int cid;
    3321             : 
    3322             :         lockdep_assert_irqs_disabled();
    3323             :         cid = mm_cid_pcpu_unset(mm);
    3324             :         if (cid == MM_CID_UNSET)
    3325             :                 return;
    3326             :         __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
    3327             : }
    3328             : 
    3329             : static inline int __mm_cid_try_get(struct mm_struct *mm)
    3330             : {
    3331             :         struct cpumask *cpumask;
    3332             :         int cid;
    3333             : 
    3334             :         cpumask = mm_cidmask(mm);
    3335             :         /*
    3336             :          * Retry finding first zero bit if the mask is temporarily
    3337             :          * filled. This only happens during concurrent remote-clear
    3338             :          * which owns a cid without holding a rq lock.
    3339             :          */
    3340             :         for (;;) {
    3341             :                 cid = cpumask_first_zero(cpumask);
    3342             :                 if (cid < nr_cpu_ids)
    3343             :                         break;
    3344             :                 cpu_relax();
    3345             :         }
    3346             :         if (cpumask_test_and_set_cpu(cid, cpumask))
    3347             :                 return -1;
    3348             :         return cid;
    3349             : }
    3350             : 
    3351             : /*
    3352             :  * Save a snapshot of the current runqueue time of this cpu
    3353             :  * with the per-cpu cid value, allowing to estimate how recently it was used.
    3354             :  */
    3355             : static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
    3356             : {
    3357             :         struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
    3358             : 
    3359             :         lockdep_assert_rq_held(rq);
    3360             :         WRITE_ONCE(pcpu_cid->time, rq->clock);
    3361             : }
    3362             : 
    3363             : static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
    3364             : {
    3365             :         int cid;
    3366             : 
    3367             :         /*
    3368             :          * All allocations (even those using the cid_lock) are lock-free. If
    3369             :          * use_cid_lock is set, hold the cid_lock to perform cid allocation to
    3370             :          * guarantee forward progress.
    3371             :          */
    3372             :         if (!READ_ONCE(use_cid_lock)) {
    3373             :                 cid = __mm_cid_try_get(mm);
    3374             :                 if (cid >= 0)
    3375             :                         goto end;
    3376             :                 raw_spin_lock(&cid_lock);
    3377             :         } else {
    3378             :                 raw_spin_lock(&cid_lock);
    3379             :                 cid = __mm_cid_try_get(mm);
    3380             :                 if (cid >= 0)
    3381             :                         goto unlock;
    3382             :         }
    3383             : 
    3384             :         /*
    3385             :          * cid concurrently allocated. Retry while forcing following
    3386             :          * allocations to use the cid_lock to ensure forward progress.
    3387             :          */
    3388             :         WRITE_ONCE(use_cid_lock, 1);
    3389             :         /*
    3390             :          * Set use_cid_lock before allocation. Only care about program order
    3391             :          * because this is only required for forward progress.
    3392             :          */
    3393             :         barrier();
    3394             :         /*
    3395             :          * Retry until it succeeds. It is guaranteed to eventually succeed once
    3396             :          * all newcoming allocations observe the use_cid_lock flag set.
    3397             :          */
    3398             :         do {
    3399             :                 cid = __mm_cid_try_get(mm);
    3400             :                 cpu_relax();
    3401             :         } while (cid < 0);
    3402             :         /*
    3403             :          * Allocate before clearing use_cid_lock. Only care about
    3404             :          * program order because this is for forward progress.
    3405             :          */
    3406             :         barrier();
    3407             :         WRITE_ONCE(use_cid_lock, 0);
    3408             : unlock:
    3409             :         raw_spin_unlock(&cid_lock);
    3410             : end:
    3411             :         mm_cid_snapshot_time(rq, mm);
    3412             :         return cid;
    3413             : }
    3414             : 
    3415             : static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
    3416             : {
    3417             :         struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
    3418             :         struct cpumask *cpumask;
    3419             :         int cid;
    3420             : 
    3421             :         lockdep_assert_rq_held(rq);
    3422             :         cpumask = mm_cidmask(mm);
    3423             :         cid = __this_cpu_read(pcpu_cid->cid);
    3424             :         if (mm_cid_is_valid(cid)) {
    3425             :                 mm_cid_snapshot_time(rq, mm);
    3426             :                 return cid;
    3427             :         }
    3428             :         if (mm_cid_is_lazy_put(cid)) {
    3429             :                 if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
    3430             :                         __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
    3431             :         }
    3432             :         cid = __mm_cid_get(rq, mm);
    3433             :         __this_cpu_write(pcpu_cid->cid, cid);
    3434             :         return cid;
    3435             : }
    3436             : 
    3437             : static inline void switch_mm_cid(struct rq *rq,
    3438             :                                  struct task_struct *prev,
    3439             :                                  struct task_struct *next)
    3440             : {
    3441             :         /*
    3442             :          * Provide a memory barrier between rq->curr store and load of
    3443             :          * {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
    3444             :          *
    3445             :          * Should be adapted if context_switch() is modified.
    3446             :          */
    3447             :         if (!next->mm) {                                // to kernel
    3448             :                 /*
    3449             :                  * user -> kernel transition does not guarantee a barrier, but
    3450             :                  * we can use the fact that it performs an atomic operation in
    3451             :                  * mmgrab().
    3452             :                  */
    3453             :                 if (prev->mm)                           // from user
    3454             :                         smp_mb__after_mmgrab();
    3455             :                 /*
    3456             :                  * kernel -> kernel transition does not change rq->curr->mm
    3457             :                  * state. It stays NULL.
    3458             :                  */
    3459             :         } else {                                        // to user
    3460             :                 /*
    3461             :                  * kernel -> user transition does not provide a barrier
    3462             :                  * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
    3463             :                  * Provide it here.
    3464             :                  */
    3465             :                 if (!prev->mm)                          // from kernel
    3466             :                         smp_mb();
    3467             :                 /*
    3468             :                  * user -> user transition guarantees a memory barrier through
    3469             :                  * switch_mm() when current->mm changes. If current->mm is
    3470             :                  * unchanged, no barrier is needed.
    3471             :                  */
    3472             :         }
    3473             :         if (prev->mm_cid_active) {
    3474             :                 mm_cid_snapshot_time(rq, prev->mm);
    3475             :                 mm_cid_put_lazy(prev);
    3476             :                 prev->mm_cid = -1;
    3477             :         }
    3478             :         if (next->mm_cid_active)
    3479             :                 next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
    3480             : }
    3481             : 
    3482             : #else
    3483             : static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
    3484             : static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
    3485             : static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
    3486             : static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
    3487             : static inline void init_sched_mm_cid(struct task_struct *t) { }
    3488             : #endif
    3489             : 
    3490             : #endif /* _KERNEL_SCHED_SCHED_H */

Generated by: LCOV version 1.14