LCOV - code coverage report
Current view: top level - kernel/sched - sched.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 50 83 60.2 %
Date: 2023-08-24 13:40:31 Functions: 1 3 33.3 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : /*
       3             :  * Scheduler internal types and methods:
       4             :  */
       5             : #ifndef _KERNEL_SCHED_SCHED_H
       6             : #define _KERNEL_SCHED_SCHED_H
       7             : 
       8             : #include <linux/sched/affinity.h>
       9             : #include <linux/sched/autogroup.h>
      10             : #include <linux/sched/cpufreq.h>
      11             : #include <linux/sched/deadline.h>
      12             : #include <linux/sched.h>
      13             : #include <linux/sched/loadavg.h>
      14             : #include <linux/sched/mm.h>
      15             : #include <linux/sched/rseq_api.h>
      16             : #include <linux/sched/signal.h>
      17             : #include <linux/sched/smt.h>
      18             : #include <linux/sched/stat.h>
      19             : #include <linux/sched/sysctl.h>
      20             : #include <linux/sched/task_flags.h>
      21             : #include <linux/sched/task.h>
      22             : #include <linux/sched/topology.h>
      23             : 
      24             : #include <linux/atomic.h>
      25             : #include <linux/bitmap.h>
      26             : #include <linux/bug.h>
      27             : #include <linux/capability.h>
      28             : #include <linux/cgroup_api.h>
      29             : #include <linux/cgroup.h>
      30             : #include <linux/context_tracking.h>
      31             : #include <linux/cpufreq.h>
      32             : #include <linux/cpumask_api.h>
      33             : #include <linux/ctype.h>
      34             : #include <linux/file.h>
      35             : #include <linux/fs_api.h>
      36             : #include <linux/hrtimer_api.h>
      37             : #include <linux/interrupt.h>
      38             : #include <linux/irq_work.h>
      39             : #include <linux/jiffies.h>
      40             : #include <linux/kref_api.h>
      41             : #include <linux/kthread.h>
      42             : #include <linux/ktime_api.h>
      43             : #include <linux/lockdep_api.h>
      44             : #include <linux/lockdep.h>
      45             : #include <linux/minmax.h>
      46             : #include <linux/mm.h>
      47             : #include <linux/module.h>
      48             : #include <linux/mutex_api.h>
      49             : #include <linux/plist.h>
      50             : #include <linux/poll.h>
      51             : #include <linux/proc_fs.h>
      52             : #include <linux/profile.h>
      53             : #include <linux/psi.h>
      54             : #include <linux/rcupdate.h>
      55             : #include <linux/seq_file.h>
      56             : #include <linux/seqlock.h>
      57             : #include <linux/softirq.h>
      58             : #include <linux/spinlock_api.h>
      59             : #include <linux/static_key.h>
      60             : #include <linux/stop_machine.h>
      61             : #include <linux/syscalls_api.h>
      62             : #include <linux/syscalls.h>
      63             : #include <linux/tick.h>
      64             : #include <linux/topology.h>
      65             : #include <linux/types.h>
      66             : #include <linux/u64_stats_sync_api.h>
      67             : #include <linux/uaccess.h>
      68             : #include <linux/wait_api.h>
      69             : #include <linux/wait_bit.h>
      70             : #include <linux/workqueue_api.h>
      71             : 
      72             : #include <trace/events/power.h>
      73             : #include <trace/events/sched.h>
      74             : 
      75             : #include "../workqueue_internal.h"
      76             : 
      77             : #ifdef CONFIG_CGROUP_SCHED
      78             : #include <linux/cgroup.h>
      79             : #include <linux/psi.h>
      80             : #endif
      81             : 
      82             : #ifdef CONFIG_SCHED_DEBUG
      83             : # include <linux/static_key.h>
      84             : #endif
      85             : 
      86             : #ifdef CONFIG_PARAVIRT
      87             : # include <asm/paravirt.h>
      88             : # include <asm/paravirt_api_clock.h>
      89             : #endif
      90             : 
      91             : #include "cpupri.h"
      92             : #include "cpudeadline.h"
      93             : 
      94             : #ifdef CONFIG_SCHED_DEBUG
      95             : # define SCHED_WARN_ON(x)      WARN_ONCE(x, #x)
      96             : #else
      97             : # define SCHED_WARN_ON(x)      ({ (void)(x), 0; })
      98             : #endif
      99             : 
     100             : struct rq;
     101             : struct cpuidle_state;
     102             : 
     103             : /* task_struct::on_rq states: */
     104             : #define TASK_ON_RQ_QUEUED       1
     105             : #define TASK_ON_RQ_MIGRATING    2
     106             : 
     107             : extern __read_mostly int scheduler_running;
     108             : 
     109             : extern unsigned long calc_load_update;
     110             : extern atomic_long_t calc_load_tasks;
     111             : 
     112             : extern unsigned int sysctl_sched_child_runs_first;
     113             : 
     114             : extern void calc_global_load_tick(struct rq *this_rq);
     115             : extern long calc_load_fold_active(struct rq *this_rq, long adjust);
     116             : 
     117             : extern void call_trace_sched_update_nr_running(struct rq *rq, int count);
     118             : 
     119             : extern unsigned int sysctl_sched_rt_period;
     120             : extern int sysctl_sched_rt_runtime;
     121             : extern int sched_rr_timeslice;
     122             : 
     123             : /*
     124             :  * Helpers for converting nanosecond timing to jiffy resolution
     125             :  */
     126             : #define NS_TO_JIFFIES(TIME)     ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
     127             : 
     128             : /*
     129             :  * Increase resolution of nice-level calculations for 64-bit architectures.
     130             :  * The extra resolution improves shares distribution and load balancing of
     131             :  * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
     132             :  * hierarchies, especially on larger systems. This is not a user-visible change
     133             :  * and does not change the user-interface for setting shares/weights.
     134             :  *
     135             :  * We increase resolution only if we have enough bits to allow this increased
     136             :  * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
     137             :  * are pretty high and the returns do not justify the increased costs.
     138             :  *
     139             :  * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
     140             :  * increase coverage and consistency always enable it on 64-bit platforms.
     141             :  */
     142             : #ifdef CONFIG_64BIT
     143             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
     144             : # define scale_load(w)          ((w) << SCHED_FIXEDPOINT_SHIFT)
     145             : # define scale_load_down(w) \
     146             : ({ \
     147             :         unsigned long __w = (w); \
     148             :         if (__w) \
     149             :                 __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \
     150             :         __w; \
     151             : })
     152             : #else
     153             : # define NICE_0_LOAD_SHIFT      (SCHED_FIXEDPOINT_SHIFT)
     154             : # define scale_load(w)          (w)
     155             : # define scale_load_down(w)     (w)
     156             : #endif
     157             : 
     158             : /*
     159             :  * Task weight (visible to users) and its load (invisible to users) have
     160             :  * independent resolution, but they should be well calibrated. We use
     161             :  * scale_load() and scale_load_down(w) to convert between them. The
     162             :  * following must be true:
     163             :  *
     164             :  *  scale_load(sched_prio_to_weight[NICE_TO_PRIO(0)-MAX_RT_PRIO]) == NICE_0_LOAD
     165             :  *
     166             :  */
     167             : #define NICE_0_LOAD             (1L << NICE_0_LOAD_SHIFT)
     168             : 
     169             : /*
     170             :  * Single value that decides SCHED_DEADLINE internal math precision.
     171             :  * 10 -> just above 1us
     172             :  * 9  -> just above 0.5us
     173             :  */
     174             : #define DL_SCALE                10
     175             : 
     176             : /*
     177             :  * Single value that denotes runtime == period, ie unlimited time.
     178             :  */
     179             : #define RUNTIME_INF             ((u64)~0ULL)
     180             : 
     181             : static inline int idle_policy(int policy)
     182             : {
     183        2930 :         return policy == SCHED_IDLE;
     184             : }
     185             : static inline int fair_policy(int policy)
     186             : {
     187         346 :         return policy == SCHED_NORMAL || policy == SCHED_BATCH;
     188             : }
     189             : 
     190             : static inline int rt_policy(int policy)
     191             : {
     192         183 :         return policy == SCHED_FIFO || policy == SCHED_RR;
     193             : }
     194             : 
     195             : static inline int dl_policy(int policy)
     196             : {
     197             :         return policy == SCHED_DEADLINE;
     198             : }
     199             : static inline bool valid_policy(int policy)
     200             : {
     201         346 :         return idle_policy(policy) || fair_policy(policy) ||
     202         173 :                 rt_policy(policy) || dl_policy(policy);
     203             : }
     204             : 
     205             : static inline int task_has_idle_policy(struct task_struct *p)
     206             : {
     207        5866 :         return idle_policy(p->policy);
     208             : }
     209             : 
     210             : static inline int task_has_rt_policy(struct task_struct *p)
     211             : {
     212          10 :         return rt_policy(p->policy);
     213             : }
     214             : 
     215             : static inline int task_has_dl_policy(struct task_struct *p)
     216             : {
     217           5 :         return dl_policy(p->policy);
     218             : }
     219             : 
     220             : #define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
     221             : 
     222             : static inline void update_avg(u64 *avg, u64 sample)
     223             : {
     224             :         s64 diff = sample - *avg;
     225             :         *avg += diff / 8;
     226             : }
     227             : 
     228             : /*
     229             :  * Shifting a value by an exponent greater *or equal* to the size of said value
     230             :  * is UB; cap at size-1.
     231             :  */
     232             : #define shr_bound(val, shift)                                                   \
     233             :         (val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
     234             : 
     235             : /*
     236             :  * !! For sched_setattr_nocheck() (kernel) only !!
     237             :  *
     238             :  * This is actually gross. :(
     239             :  *
     240             :  * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
     241             :  * tasks, but still be able to sleep. We need this on platforms that cannot
     242             :  * atomically change clock frequency. Remove once fast switching will be
     243             :  * available on such platforms.
     244             :  *
     245             :  * SUGOV stands for SchedUtil GOVernor.
     246             :  */
     247             : #define SCHED_FLAG_SUGOV        0x10000000
     248             : 
     249             : #define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV)
     250             : 
     251             : static inline bool dl_entity_is_special(const struct sched_dl_entity *dl_se)
     252             : {
     253             : #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
     254             :         return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
     255             : #else
     256             :         return false;
     257             : #endif
     258             : }
     259             : 
     260             : /*
     261             :  * Tells if entity @a should preempt entity @b.
     262             :  */
     263             : static inline bool dl_entity_preempt(const struct sched_dl_entity *a,
     264             :                                      const struct sched_dl_entity *b)
     265             : {
     266           0 :         return dl_entity_is_special(a) ||
     267           0 :                dl_time_before(a->deadline, b->deadline);
     268             : }
     269             : 
     270             : /*
     271             :  * This is the priority-queue data structure of the RT scheduling class:
     272             :  */
     273             : struct rt_prio_array {
     274             :         DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
     275             :         struct list_head queue[MAX_RT_PRIO];
     276             : };
     277             : 
     278             : struct rt_bandwidth {
     279             :         /* nests inside the rq lock: */
     280             :         raw_spinlock_t          rt_runtime_lock;
     281             :         ktime_t                 rt_period;
     282             :         u64                     rt_runtime;
     283             :         struct hrtimer          rt_period_timer;
     284             :         unsigned int            rt_period_active;
     285             : };
     286             : 
     287             : void __dl_clear_params(struct task_struct *p);
     288             : 
     289             : static inline int dl_bandwidth_enabled(void)
     290             : {
     291             :         return sysctl_sched_rt_runtime >= 0;
     292             : }
     293             : 
     294             : /*
     295             :  * To keep the bandwidth of -deadline tasks under control
     296             :  * we need some place where:
     297             :  *  - store the maximum -deadline bandwidth of each cpu;
     298             :  *  - cache the fraction of bandwidth that is currently allocated in
     299             :  *    each root domain;
     300             :  *
     301             :  * This is all done in the data structure below. It is similar to the
     302             :  * one used for RT-throttling (rt_bandwidth), with the main difference
     303             :  * that, since here we are only interested in admission control, we
     304             :  * do not decrease any runtime while the group "executes", neither we
     305             :  * need a timer to replenish it.
     306             :  *
     307             :  * With respect to SMP, bandwidth is given on a per root domain basis,
     308             :  * meaning that:
     309             :  *  - bw (< 100%) is the deadline bandwidth of each CPU;
     310             :  *  - total_bw is the currently allocated bandwidth in each root domain;
     311             :  */
     312             : struct dl_bw {
     313             :         raw_spinlock_t          lock;
     314             :         u64                     bw;
     315             :         u64                     total_bw;
     316             : };
     317             : 
     318             : extern void init_dl_bw(struct dl_bw *dl_b);
     319             : extern int  sched_dl_global_validate(void);
     320             : extern void sched_dl_do_global(void);
     321             : extern int  sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
     322             : extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
     323             : extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
     324             : extern bool __checkparam_dl(const struct sched_attr *attr);
     325             : extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
     326             : extern int  dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
     327             : extern int  dl_bw_check_overflow(int cpu);
     328             : 
     329             : #ifdef CONFIG_CGROUP_SCHED
     330             : 
     331             : struct cfs_rq;
     332             : struct rt_rq;
     333             : 
     334             : extern struct list_head task_groups;
     335             : 
     336             : struct cfs_bandwidth {
     337             : #ifdef CONFIG_CFS_BANDWIDTH
     338             :         raw_spinlock_t          lock;
     339             :         ktime_t                 period;
     340             :         u64                     quota;
     341             :         u64                     runtime;
     342             :         u64                     burst;
     343             :         u64                     runtime_snap;
     344             :         s64                     hierarchical_quota;
     345             : 
     346             :         u8                      idle;
     347             :         u8                      period_active;
     348             :         u8                      slack_started;
     349             :         struct hrtimer          period_timer;
     350             :         struct hrtimer          slack_timer;
     351             :         struct list_head        throttled_cfs_rq;
     352             : 
     353             :         /* Statistics: */
     354             :         int                     nr_periods;
     355             :         int                     nr_throttled;
     356             :         int                     nr_burst;
     357             :         u64                     throttled_time;
     358             :         u64                     burst_time;
     359             : #endif
     360             : };
     361             : 
     362             : /* Task group related information */
     363             : struct task_group {
     364             :         struct cgroup_subsys_state css;
     365             : 
     366             : #ifdef CONFIG_FAIR_GROUP_SCHED
     367             :         /* schedulable entities of this group on each CPU */
     368             :         struct sched_entity     **se;
     369             :         /* runqueue "owned" by this group on each CPU */
     370             :         struct cfs_rq           **cfs_rq;
     371             :         unsigned long           shares;
     372             : 
     373             :         /* A positive value indicates that this is a SCHED_IDLE group. */
     374             :         int                     idle;
     375             : 
     376             : #ifdef  CONFIG_SMP
     377             :         /*
     378             :          * load_avg can be heavily contended at clock tick time, so put
     379             :          * it in its own cacheline separated from the fields above which
     380             :          * will also be accessed at each tick.
     381             :          */
     382             :         atomic_long_t           load_avg ____cacheline_aligned;
     383             : #endif
     384             : #endif
     385             : 
     386             : #ifdef CONFIG_RT_GROUP_SCHED
     387             :         struct sched_rt_entity  **rt_se;
     388             :         struct rt_rq            **rt_rq;
     389             : 
     390             :         struct rt_bandwidth     rt_bandwidth;
     391             : #endif
     392             : 
     393             :         struct rcu_head         rcu;
     394             :         struct list_head        list;
     395             : 
     396             :         struct task_group       *parent;
     397             :         struct list_head        siblings;
     398             :         struct list_head        children;
     399             : 
     400             : #ifdef CONFIG_SCHED_AUTOGROUP
     401             :         struct autogroup        *autogroup;
     402             : #endif
     403             : 
     404             :         struct cfs_bandwidth    cfs_bandwidth;
     405             : 
     406             : #ifdef CONFIG_UCLAMP_TASK_GROUP
     407             :         /* The two decimal precision [%] value requested from user-space */
     408             :         unsigned int            uclamp_pct[UCLAMP_CNT];
     409             :         /* Clamp values requested for a task group */
     410             :         struct uclamp_se        uclamp_req[UCLAMP_CNT];
     411             :         /* Effective clamp values used for a task group */
     412             :         struct uclamp_se        uclamp[UCLAMP_CNT];
     413             : #endif
     414             : 
     415             : };
     416             : 
     417             : #ifdef CONFIG_FAIR_GROUP_SCHED
     418             : #define ROOT_TASK_GROUP_LOAD    NICE_0_LOAD
     419             : 
     420             : /*
     421             :  * A weight of 0 or 1 can cause arithmetics problems.
     422             :  * A weight of a cfs_rq is the sum of weights of which entities
     423             :  * are queued on this cfs_rq, so a weight of a entity should not be
     424             :  * too large, so as the shares value of a task group.
     425             :  * (The default weight is 1024 - so there's no practical
     426             :  *  limitation from this.)
     427             :  */
     428             : #define MIN_SHARES              (1UL <<  1)
     429             : #define MAX_SHARES              (1UL << 18)
     430             : #endif
     431             : 
     432             : typedef int (*tg_visitor)(struct task_group *, void *);
     433             : 
     434             : extern int walk_tg_tree_from(struct task_group *from,
     435             :                              tg_visitor down, tg_visitor up, void *data);
     436             : 
     437             : /*
     438             :  * Iterate the full tree, calling @down when first entering a node and @up when
     439             :  * leaving it for the final time.
     440             :  *
     441             :  * Caller must hold rcu_lock or sufficient equivalent.
     442             :  */
     443             : static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
     444             : {
     445             :         return walk_tg_tree_from(&root_task_group, down, up, data);
     446             : }
     447             : 
     448             : extern int tg_nop(struct task_group *tg, void *data);
     449             : 
     450             : extern void free_fair_sched_group(struct task_group *tg);
     451             : extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
     452             : extern void online_fair_sched_group(struct task_group *tg);
     453             : extern void unregister_fair_sched_group(struct task_group *tg);
     454             : extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
     455             :                         struct sched_entity *se, int cpu,
     456             :                         struct sched_entity *parent);
     457             : extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     458             : 
     459             : extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
     460             : extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
     461             : extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
     462             : 
     463             : extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
     464             :                 struct sched_rt_entity *rt_se, int cpu,
     465             :                 struct sched_rt_entity *parent);
     466             : extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
     467             : extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
     468             : extern long sched_group_rt_runtime(struct task_group *tg);
     469             : extern long sched_group_rt_period(struct task_group *tg);
     470             : extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
     471             : 
     472             : extern struct task_group *sched_create_group(struct task_group *parent);
     473             : extern void sched_online_group(struct task_group *tg,
     474             :                                struct task_group *parent);
     475             : extern void sched_destroy_group(struct task_group *tg);
     476             : extern void sched_release_group(struct task_group *tg);
     477             : 
     478             : extern void sched_move_task(struct task_struct *tsk);
     479             : 
     480             : #ifdef CONFIG_FAIR_GROUP_SCHED
     481             : extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
     482             : 
     483             : extern int sched_group_set_idle(struct task_group *tg, long idle);
     484             : 
     485             : #ifdef CONFIG_SMP
     486             : extern void set_task_rq_fair(struct sched_entity *se,
     487             :                              struct cfs_rq *prev, struct cfs_rq *next);
     488             : #else /* !CONFIG_SMP */
     489             : static inline void set_task_rq_fair(struct sched_entity *se,
     490             :                              struct cfs_rq *prev, struct cfs_rq *next) { }
     491             : #endif /* CONFIG_SMP */
     492             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     493             : 
     494             : #else /* CONFIG_CGROUP_SCHED */
     495             : 
     496             : struct cfs_bandwidth { };
     497             : 
     498             : #endif  /* CONFIG_CGROUP_SCHED */
     499             : 
     500             : extern void unregister_rt_sched_group(struct task_group *tg);
     501             : extern void free_rt_sched_group(struct task_group *tg);
     502             : extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
     503             : 
     504             : /*
     505             :  * u64_u32_load/u64_u32_store
     506             :  *
     507             :  * Use a copy of a u64 value to protect against data race. This is only
     508             :  * applicable for 32-bits architectures.
     509             :  */
     510             : #ifdef CONFIG_64BIT
     511             : # define u64_u32_load_copy(var, copy)       var
     512             : # define u64_u32_store_copy(var, copy, val) (var = val)
     513             : #else
     514             : # define u64_u32_load_copy(var, copy)                                   \
     515             : ({                                                                      \
     516             :         u64 __val, __val_copy;                                          \
     517             :         do {                                                            \
     518             :                 __val_copy = copy;                                      \
     519             :                 /*                                                      \
     520             :                  * paired with u64_u32_store_copy(), ordering access    \
     521             :                  * to var and copy.                                     \
     522             :                  */                                                     \
     523             :                 smp_rmb();                                              \
     524             :                 __val = var;                                            \
     525             :         } while (__val != __val_copy);                                  \
     526             :         __val;                                                          \
     527             : })
     528             : # define u64_u32_store_copy(var, copy, val)                             \
     529             : do {                                                                    \
     530             :         typeof(val) __val = (val);                                      \
     531             :         var = __val;                                                    \
     532             :         /*                                                              \
     533             :          * paired with u64_u32_load_copy(), ordering access to var and  \
     534             :          * copy.                                                        \
     535             :          */                                                             \
     536             :         smp_wmb();                                                      \
     537             :         copy = __val;                                                   \
     538             : } while (0)
     539             : #endif
     540             : # define u64_u32_load(var)      u64_u32_load_copy(var, var##_copy)
     541             : # define u64_u32_store(var, val) u64_u32_store_copy(var, var##_copy, val)
     542             : 
     543             : /* CFS-related fields in a runqueue */
     544             : struct cfs_rq {
     545             :         struct load_weight      load;
     546             :         unsigned int            nr_running;
     547             :         unsigned int            h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
     548             :         unsigned int            idle_nr_running;   /* SCHED_IDLE */
     549             :         unsigned int            idle_h_nr_running; /* SCHED_IDLE */
     550             : 
     551             :         u64                     exec_clock;
     552             :         u64                     min_vruntime;
     553             : #ifdef CONFIG_SCHED_CORE
     554             :         unsigned int            forceidle_seq;
     555             :         u64                     min_vruntime_fi;
     556             : #endif
     557             : 
     558             : #ifndef CONFIG_64BIT
     559             :         u64                     min_vruntime_copy;
     560             : #endif
     561             : 
     562             :         struct rb_root_cached   tasks_timeline;
     563             : 
     564             :         /*
     565             :          * 'curr' points to currently running entity on this cfs_rq.
     566             :          * It is set to NULL otherwise (i.e when none are currently running).
     567             :          */
     568             :         struct sched_entity     *curr;
     569             :         struct sched_entity     *next;
     570             :         struct sched_entity     *last;
     571             :         struct sched_entity     *skip;
     572             : 
     573             : #ifdef  CONFIG_SCHED_DEBUG
     574             :         unsigned int            nr_spread_over;
     575             : #endif
     576             : 
     577             : #ifdef CONFIG_SMP
     578             :         /*
     579             :          * CFS load tracking
     580             :          */
     581             :         struct sched_avg        avg;
     582             : #ifndef CONFIG_64BIT
     583             :         u64                     last_update_time_copy;
     584             : #endif
     585             :         struct {
     586             :                 raw_spinlock_t  lock ____cacheline_aligned;
     587             :                 int             nr;
     588             :                 unsigned long   load_avg;
     589             :                 unsigned long   util_avg;
     590             :                 unsigned long   runnable_avg;
     591             :         } removed;
     592             : 
     593             : #ifdef CONFIG_FAIR_GROUP_SCHED
     594             :         unsigned long           tg_load_avg_contrib;
     595             :         long                    propagate;
     596             :         long                    prop_runnable_sum;
     597             : 
     598             :         /*
     599             :          *   h_load = weight * f(tg)
     600             :          *
     601             :          * Where f(tg) is the recursive weight fraction assigned to
     602             :          * this group.
     603             :          */
     604             :         unsigned long           h_load;
     605             :         u64                     last_h_load_update;
     606             :         struct sched_entity     *h_load_next;
     607             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     608             : #endif /* CONFIG_SMP */
     609             : 
     610             : #ifdef CONFIG_FAIR_GROUP_SCHED
     611             :         struct rq               *rq;    /* CPU runqueue to which this cfs_rq is attached */
     612             : 
     613             :         /*
     614             :          * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
     615             :          * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
     616             :          * (like users, containers etc.)
     617             :          *
     618             :          * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
     619             :          * This list is used during load balance.
     620             :          */
     621             :         int                     on_list;
     622             :         struct list_head        leaf_cfs_rq_list;
     623             :         struct task_group       *tg;    /* group that "owns" this runqueue */
     624             : 
     625             :         /* Locally cached copy of our task_group's idle value */
     626             :         int                     idle;
     627             : 
     628             : #ifdef CONFIG_CFS_BANDWIDTH
     629             :         int                     runtime_enabled;
     630             :         s64                     runtime_remaining;
     631             : 
     632             :         u64                     throttled_pelt_idle;
     633             : #ifndef CONFIG_64BIT
     634             :         u64                     throttled_pelt_idle_copy;
     635             : #endif
     636             :         u64                     throttled_clock;
     637             :         u64                     throttled_clock_pelt;
     638             :         u64                     throttled_clock_pelt_time;
     639             :         int                     throttled;
     640             :         int                     throttle_count;
     641             :         struct list_head        throttled_list;
     642             : #ifdef CONFIG_SMP
     643             :         struct list_head        throttled_csd_list;
     644             : #endif
     645             : #endif /* CONFIG_CFS_BANDWIDTH */
     646             : #endif /* CONFIG_FAIR_GROUP_SCHED */
     647             : };
     648             : 
     649             : static inline int rt_bandwidth_enabled(void)
     650             : {
     651           0 :         return sysctl_sched_rt_runtime >= 0;
     652             : }
     653             : 
     654             : /* RT IPI pull logic requires IRQ_WORK */
     655             : #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
     656             : # define HAVE_RT_PUSH_IPI
     657             : #endif
     658             : 
     659             : /* Real-Time classes' related field in a runqueue: */
     660             : struct rt_rq {
     661             :         struct rt_prio_array    active;
     662             :         unsigned int            rt_nr_running;
     663             :         unsigned int            rr_nr_running;
     664             : #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
     665             :         struct {
     666             :                 int             curr; /* highest queued rt task prio */
     667             : #ifdef CONFIG_SMP
     668             :                 int             next; /* next highest */
     669             : #endif
     670             :         } highest_prio;
     671             : #endif
     672             : #ifdef CONFIG_SMP
     673             :         unsigned int            rt_nr_migratory;
     674             :         unsigned int            rt_nr_total;
     675             :         int                     overloaded;
     676             :         struct plist_head       pushable_tasks;
     677             : 
     678             : #endif /* CONFIG_SMP */
     679             :         int                     rt_queued;
     680             : 
     681             :         int                     rt_throttled;
     682             :         u64                     rt_time;
     683             :         u64                     rt_runtime;
     684             :         /* Nests inside the rq lock: */
     685             :         raw_spinlock_t          rt_runtime_lock;
     686             : 
     687             : #ifdef CONFIG_RT_GROUP_SCHED
     688             :         unsigned int            rt_nr_boosted;
     689             : 
     690             :         struct rq               *rq;
     691             :         struct task_group       *tg;
     692             : #endif
     693             : };
     694             : 
     695             : static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
     696             : {
     697             :         return rt_rq->rt_queued && rt_rq->rt_nr_running;
     698             : }
     699             : 
     700             : /* Deadline class' related fields in a runqueue */
     701             : struct dl_rq {
     702             :         /* runqueue is an rbtree, ordered by deadline */
     703             :         struct rb_root_cached   root;
     704             : 
     705             :         unsigned int            dl_nr_running;
     706             : 
     707             : #ifdef CONFIG_SMP
     708             :         /*
     709             :          * Deadline values of the currently executing and the
     710             :          * earliest ready task on this rq. Caching these facilitates
     711             :          * the decision whether or not a ready but not running task
     712             :          * should migrate somewhere else.
     713             :          */
     714             :         struct {
     715             :                 u64             curr;
     716             :                 u64             next;
     717             :         } earliest_dl;
     718             : 
     719             :         unsigned int            dl_nr_migratory;
     720             :         int                     overloaded;
     721             : 
     722             :         /*
     723             :          * Tasks on this rq that can be pushed away. They are kept in
     724             :          * an rb-tree, ordered by tasks' deadlines, with caching
     725             :          * of the leftmost (earliest deadline) element.
     726             :          */
     727             :         struct rb_root_cached   pushable_dl_tasks_root;
     728             : #else
     729             :         struct dl_bw            dl_bw;
     730             : #endif
     731             :         /*
     732             :          * "Active utilization" for this runqueue: increased when a
     733             :          * task wakes up (becomes TASK_RUNNING) and decreased when a
     734             :          * task blocks
     735             :          */
     736             :         u64                     running_bw;
     737             : 
     738             :         /*
     739             :          * Utilization of the tasks "assigned" to this runqueue (including
     740             :          * the tasks that are in runqueue and the tasks that executed on this
     741             :          * CPU and blocked). Increased when a task moves to this runqueue, and
     742             :          * decreased when the task moves away (migrates, changes scheduling
     743             :          * policy, or terminates).
     744             :          * This is needed to compute the "inactive utilization" for the
     745             :          * runqueue (inactive utilization = this_bw - running_bw).
     746             :          */
     747             :         u64                     this_bw;
     748             :         u64                     extra_bw;
     749             : 
     750             :         /*
     751             :          * Maximum available bandwidth for reclaiming by SCHED_FLAG_RECLAIM
     752             :          * tasks of this rq. Used in calculation of reclaimable bandwidth(GRUB).
     753             :          */
     754             :         u64                     max_bw;
     755             : 
     756             :         /*
     757             :          * Inverse of the fraction of CPU utilization that can be reclaimed
     758             :          * by the GRUB algorithm.
     759             :          */
     760             :         u64                     bw_ratio;
     761             : };
     762             : 
     763             : #ifdef CONFIG_FAIR_GROUP_SCHED
     764             : /* An entity is a task if it doesn't "own" a runqueue */
     765             : #define entity_is_task(se)      (!se->my_q)
     766             : 
     767             : static inline void se_update_runnable(struct sched_entity *se)
     768             : {
     769             :         if (!entity_is_task(se))
     770             :                 se->runnable_weight = se->my_q->h_nr_running;
     771             : }
     772             : 
     773             : static inline long se_runnable(struct sched_entity *se)
     774             : {
     775             :         if (entity_is_task(se))
     776             :                 return !!se->on_rq;
     777             :         else
     778             :                 return se->runnable_weight;
     779             : }
     780             : 
     781             : #else
     782             : #define entity_is_task(se)      1
     783             : 
     784             : static inline void se_update_runnable(struct sched_entity *se) {}
     785             : 
     786             : static inline long se_runnable(struct sched_entity *se)
     787             : {
     788             :         return !!se->on_rq;
     789             : }
     790             : #endif
     791             : 
     792             : #ifdef CONFIG_SMP
     793             : /*
     794             :  * XXX we want to get rid of these helpers and use the full load resolution.
     795             :  */
     796             : static inline long se_weight(struct sched_entity *se)
     797             : {
     798             :         return scale_load_down(se->load.weight);
     799             : }
     800             : 
     801             : 
     802             : static inline bool sched_asym_prefer(int a, int b)
     803             : {
     804             :         return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
     805             : }
     806             : 
     807             : struct perf_domain {
     808             :         struct em_perf_domain *em_pd;
     809             :         struct perf_domain *next;
     810             :         struct rcu_head rcu;
     811             : };
     812             : 
     813             : /* Scheduling group status flags */
     814             : #define SG_OVERLOAD             0x1 /* More than one runnable task on a CPU. */
     815             : #define SG_OVERUTILIZED         0x2 /* One or more CPUs are over-utilized. */
     816             : 
     817             : /*
     818             :  * We add the notion of a root-domain which will be used to define per-domain
     819             :  * variables. Each exclusive cpuset essentially defines an island domain by
     820             :  * fully partitioning the member CPUs from any other cpuset. Whenever a new
     821             :  * exclusive cpuset is created, we also create and attach a new root-domain
     822             :  * object.
     823             :  *
     824             :  */
     825             : struct root_domain {
     826             :         atomic_t                refcount;
     827             :         atomic_t                rto_count;
     828             :         struct rcu_head         rcu;
     829             :         cpumask_var_t           span;
     830             :         cpumask_var_t           online;
     831             : 
     832             :         /*
     833             :          * Indicate pullable load on at least one CPU, e.g:
     834             :          * - More than one runnable task
     835             :          * - Running task is misfit
     836             :          */
     837             :         int                     overload;
     838             : 
     839             :         /* Indicate one or more cpus over-utilized (tipping point) */
     840             :         int                     overutilized;
     841             : 
     842             :         /*
     843             :          * The bit corresponding to a CPU gets set here if such CPU has more
     844             :          * than one runnable -deadline task (as it is below for RT tasks).
     845             :          */
     846             :         cpumask_var_t           dlo_mask;
     847             :         atomic_t                dlo_count;
     848             :         struct dl_bw            dl_bw;
     849             :         struct cpudl            cpudl;
     850             : 
     851             :         /*
     852             :          * Indicate whether a root_domain's dl_bw has been checked or
     853             :          * updated. It's monotonously increasing value.
     854             :          *
     855             :          * Also, some corner cases, like 'wrap around' is dangerous, but given
     856             :          * that u64 is 'big enough'. So that shouldn't be a concern.
     857             :          */
     858             :         u64 visit_gen;
     859             : 
     860             : #ifdef HAVE_RT_PUSH_IPI
     861             :         /*
     862             :          * For IPI pull requests, loop across the rto_mask.
     863             :          */
     864             :         struct irq_work         rto_push_work;
     865             :         raw_spinlock_t          rto_lock;
     866             :         /* These are only updated and read within rto_lock */
     867             :         int                     rto_loop;
     868             :         int                     rto_cpu;
     869             :         /* These atomics are updated outside of a lock */
     870             :         atomic_t                rto_loop_next;
     871             :         atomic_t                rto_loop_start;
     872             : #endif
     873             :         /*
     874             :          * The "RT overload" flag: it gets set if a CPU has more than
     875             :          * one runnable RT task.
     876             :          */
     877             :         cpumask_var_t           rto_mask;
     878             :         struct cpupri           cpupri;
     879             : 
     880             :         unsigned long           max_cpu_capacity;
     881             : 
     882             :         /*
     883             :          * NULL-terminated list of performance domains intersecting with the
     884             :          * CPUs of the rd. Protected by RCU.
     885             :          */
     886             :         struct perf_domain __rcu *pd;
     887             : };
     888             : 
     889             : extern void init_defrootdomain(void);
     890             : extern int sched_init_domains(const struct cpumask *cpu_map);
     891             : extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
     892             : extern void sched_get_rd(struct root_domain *rd);
     893             : extern void sched_put_rd(struct root_domain *rd);
     894             : 
     895             : #ifdef HAVE_RT_PUSH_IPI
     896             : extern void rto_push_irq_work_func(struct irq_work *work);
     897             : #endif
     898             : #endif /* CONFIG_SMP */
     899             : 
     900             : #ifdef CONFIG_UCLAMP_TASK
     901             : /*
     902             :  * struct uclamp_bucket - Utilization clamp bucket
     903             :  * @value: utilization clamp value for tasks on this clamp bucket
     904             :  * @tasks: number of RUNNABLE tasks on this clamp bucket
     905             :  *
     906             :  * Keep track of how many tasks are RUNNABLE for a given utilization
     907             :  * clamp value.
     908             :  */
     909             : struct uclamp_bucket {
     910             :         unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
     911             :         unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
     912             : };
     913             : 
     914             : /*
     915             :  * struct uclamp_rq - rq's utilization clamp
     916             :  * @value: currently active clamp values for a rq
     917             :  * @bucket: utilization clamp buckets affecting a rq
     918             :  *
     919             :  * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
     920             :  * A clamp value is affecting a rq when there is at least one task RUNNABLE
     921             :  * (or actually running) with that value.
     922             :  *
     923             :  * There are up to UCLAMP_CNT possible different clamp values, currently there
     924             :  * are only two: minimum utilization and maximum utilization.
     925             :  *
     926             :  * All utilization clamping values are MAX aggregated, since:
     927             :  * - for util_min: we want to run the CPU at least at the max of the minimum
     928             :  *   utilization required by its currently RUNNABLE tasks.
     929             :  * - for util_max: we want to allow the CPU to run up to the max of the
     930             :  *   maximum utilization allowed by its currently RUNNABLE tasks.
     931             :  *
     932             :  * Since on each system we expect only a limited number of different
     933             :  * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
     934             :  * the metrics required to compute all the per-rq utilization clamp values.
     935             :  */
     936             : struct uclamp_rq {
     937             :         unsigned int value;
     938             :         struct uclamp_bucket bucket[UCLAMP_BUCKETS];
     939             : };
     940             : 
     941             : DECLARE_STATIC_KEY_FALSE(sched_uclamp_used);
     942             : #endif /* CONFIG_UCLAMP_TASK */
     943             : 
     944             : struct rq;
     945             : struct balance_callback {
     946             :         struct balance_callback *next;
     947             :         void (*func)(struct rq *rq);
     948             : };
     949             : 
     950             : /*
     951             :  * This is the main, per-CPU runqueue data structure.
     952             :  *
     953             :  * Locking rule: those places that want to lock multiple runqueues
     954             :  * (such as the load balancing or the thread migration code), lock
     955             :  * acquire operations must be ordered by ascending &runqueue.
     956             :  */
     957             : struct rq {
     958             :         /* runqueue lock: */
     959             :         raw_spinlock_t          __lock;
     960             : 
     961             :         /*
     962             :          * nr_running and cpu_load should be in the same cacheline because
     963             :          * remote CPUs use both these fields when doing load calculation.
     964             :          */
     965             :         unsigned int            nr_running;
     966             : #ifdef CONFIG_NUMA_BALANCING
     967             :         unsigned int            nr_numa_running;
     968             :         unsigned int            nr_preferred_running;
     969             :         unsigned int            numa_migrate_on;
     970             : #endif
     971             : #ifdef CONFIG_NO_HZ_COMMON
     972             : #ifdef CONFIG_SMP
     973             :         unsigned long           last_blocked_load_update_tick;
     974             :         unsigned int            has_blocked_load;
     975             :         call_single_data_t      nohz_csd;
     976             : #endif /* CONFIG_SMP */
     977             :         unsigned int            nohz_tick_stopped;
     978             :         atomic_t                nohz_flags;
     979             : #endif /* CONFIG_NO_HZ_COMMON */
     980             : 
     981             : #ifdef CONFIG_SMP
     982             :         unsigned int            ttwu_pending;
     983             : #endif
     984             :         u64                     nr_switches;
     985             : 
     986             : #ifdef CONFIG_UCLAMP_TASK
     987             :         /* Utilization clamp values based on CPU's RUNNABLE tasks */
     988             :         struct uclamp_rq        uclamp[UCLAMP_CNT] ____cacheline_aligned;
     989             :         unsigned int            uclamp_flags;
     990             : #define UCLAMP_FLAG_IDLE 0x01
     991             : #endif
     992             : 
     993             :         struct cfs_rq           cfs;
     994             :         struct rt_rq            rt;
     995             :         struct dl_rq            dl;
     996             : 
     997             : #ifdef CONFIG_FAIR_GROUP_SCHED
     998             :         /* list of leaf cfs_rq on this CPU: */
     999             :         struct list_head        leaf_cfs_rq_list;
    1000             :         struct list_head        *tmp_alone_branch;
    1001             : #endif /* CONFIG_FAIR_GROUP_SCHED */
    1002             : 
    1003             :         /*
    1004             :          * This is part of a global counter where only the total sum
    1005             :          * over all CPUs matters. A task can increase this counter on
    1006             :          * one CPU and if it got migrated afterwards it may decrease
    1007             :          * it on another CPU. Always updated under the runqueue lock:
    1008             :          */
    1009             :         unsigned int            nr_uninterruptible;
    1010             : 
    1011             :         struct task_struct __rcu        *curr;
    1012             :         struct task_struct      *idle;
    1013             :         struct task_struct      *stop;
    1014             :         unsigned long           next_balance;
    1015             :         struct mm_struct        *prev_mm;
    1016             : 
    1017             :         unsigned int            clock_update_flags;
    1018             :         u64                     clock;
    1019             :         /* Ensure that all clocks are in the same cache line */
    1020             :         u64                     clock_task ____cacheline_aligned;
    1021             :         u64                     clock_pelt;
    1022             :         unsigned long           lost_idle_time;
    1023             :         u64                     clock_pelt_idle;
    1024             :         u64                     clock_idle;
    1025             : #ifndef CONFIG_64BIT
    1026             :         u64                     clock_pelt_idle_copy;
    1027             :         u64                     clock_idle_copy;
    1028             : #endif
    1029             : 
    1030             :         atomic_t                nr_iowait;
    1031             : 
    1032             : #ifdef CONFIG_SCHED_DEBUG
    1033             :         u64 last_seen_need_resched_ns;
    1034             :         int ticks_without_resched;
    1035             : #endif
    1036             : 
    1037             : #ifdef CONFIG_MEMBARRIER
    1038             :         int membarrier_state;
    1039             : #endif
    1040             : 
    1041             : #ifdef CONFIG_SMP
    1042             :         struct root_domain              *rd;
    1043             :         struct sched_domain __rcu       *sd;
    1044             : 
    1045             :         unsigned long           cpu_capacity;
    1046             :         unsigned long           cpu_capacity_orig;
    1047             : 
    1048             :         struct balance_callback *balance_callback;
    1049             : 
    1050             :         unsigned char           nohz_idle_balance;
    1051             :         unsigned char           idle_balance;
    1052             : 
    1053             :         unsigned long           misfit_task_load;
    1054             : 
    1055             :         /* For active balancing */
    1056             :         int                     active_balance;
    1057             :         int                     push_cpu;
    1058             :         struct cpu_stop_work    active_balance_work;
    1059             : 
    1060             :         /* CPU of this runqueue: */
    1061             :         int                     cpu;
    1062             :         int                     online;
    1063             : 
    1064             :         struct list_head cfs_tasks;
    1065             : 
    1066             :         struct sched_avg        avg_rt;
    1067             :         struct sched_avg        avg_dl;
    1068             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
    1069             :         struct sched_avg        avg_irq;
    1070             : #endif
    1071             : #ifdef CONFIG_SCHED_THERMAL_PRESSURE
    1072             :         struct sched_avg        avg_thermal;
    1073             : #endif
    1074             :         u64                     idle_stamp;
    1075             :         u64                     avg_idle;
    1076             : 
    1077             :         unsigned long           wake_stamp;
    1078             :         u64                     wake_avg_idle;
    1079             : 
    1080             :         /* This is used to determine avg_idle's max value */
    1081             :         u64                     max_idle_balance_cost;
    1082             : 
    1083             : #ifdef CONFIG_HOTPLUG_CPU
    1084             :         struct rcuwait          hotplug_wait;
    1085             : #endif
    1086             : #endif /* CONFIG_SMP */
    1087             : 
    1088             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    1089             :         u64                     prev_irq_time;
    1090             : #endif
    1091             : #ifdef CONFIG_PARAVIRT
    1092             :         u64                     prev_steal_time;
    1093             : #endif
    1094             : #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
    1095             :         u64                     prev_steal_time_rq;
    1096             : #endif
    1097             : 
    1098             :         /* calc_load related fields */
    1099             :         unsigned long           calc_load_update;
    1100             :         long                    calc_load_active;
    1101             : 
    1102             : #ifdef CONFIG_SCHED_HRTICK
    1103             : #ifdef CONFIG_SMP
    1104             :         call_single_data_t      hrtick_csd;
    1105             : #endif
    1106             :         struct hrtimer          hrtick_timer;
    1107             :         ktime_t                 hrtick_time;
    1108             : #endif
    1109             : 
    1110             : #ifdef CONFIG_SCHEDSTATS
    1111             :         /* latency stats */
    1112             :         struct sched_info       rq_sched_info;
    1113             :         unsigned long long      rq_cpu_time;
    1114             :         /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
    1115             : 
    1116             :         /* sys_sched_yield() stats */
    1117             :         unsigned int            yld_count;
    1118             : 
    1119             :         /* schedule() stats */
    1120             :         unsigned int            sched_count;
    1121             :         unsigned int            sched_goidle;
    1122             : 
    1123             :         /* try_to_wake_up() stats */
    1124             :         unsigned int            ttwu_count;
    1125             :         unsigned int            ttwu_local;
    1126             : #endif
    1127             : 
    1128             : #ifdef CONFIG_CPU_IDLE
    1129             :         /* Must be inspected within a rcu lock section */
    1130             :         struct cpuidle_state    *idle_state;
    1131             : #endif
    1132             : 
    1133             : #ifdef CONFIG_SMP
    1134             :         unsigned int            nr_pinned;
    1135             : #endif
    1136             :         unsigned int            push_busy;
    1137             :         struct cpu_stop_work    push_work;
    1138             : 
    1139             : #ifdef CONFIG_SCHED_CORE
    1140             :         /* per rq */
    1141             :         struct rq               *core;
    1142             :         struct task_struct      *core_pick;
    1143             :         unsigned int            core_enabled;
    1144             :         unsigned int            core_sched_seq;
    1145             :         struct rb_root          core_tree;
    1146             : 
    1147             :         /* shared state -- careful with sched_core_cpu_deactivate() */
    1148             :         unsigned int            core_task_seq;
    1149             :         unsigned int            core_pick_seq;
    1150             :         unsigned long           core_cookie;
    1151             :         unsigned int            core_forceidle_count;
    1152             :         unsigned int            core_forceidle_seq;
    1153             :         unsigned int            core_forceidle_occupation;
    1154             :         u64                     core_forceidle_start;
    1155             : #endif
    1156             : 
    1157             :         /* Scratch cpumask to be temporarily used under rq_lock */
    1158             :         cpumask_var_t           scratch_mask;
    1159             : 
    1160             : #if defined(CONFIG_CFS_BANDWIDTH) && defined(CONFIG_SMP)
    1161             :         call_single_data_t      cfsb_csd;
    1162             :         struct list_head        cfsb_csd_list;
    1163             : #endif
    1164             : };
    1165             : 
    1166             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1167             : 
    1168             : /* CPU runqueue to which this cfs_rq is attached */
    1169             : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1170             : {
    1171             :         return cfs_rq->rq;
    1172             : }
    1173             : 
    1174             : #else
    1175             : 
    1176             : static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
    1177             : {
    1178        7240 :         return container_of(cfs_rq, struct rq, cfs);
    1179             : }
    1180             : #endif
    1181             : 
    1182             : static inline int cpu_of(struct rq *rq)
    1183             : {
    1184             : #ifdef CONFIG_SMP
    1185             :         return rq->cpu;
    1186             : #else
    1187             :         return 0;
    1188             : #endif
    1189             : }
    1190             : 
    1191             : #define MDF_PUSH        0x01
    1192             : 
    1193             : static inline bool is_migration_disabled(struct task_struct *p)
    1194             : {
    1195             : #ifdef CONFIG_SMP
    1196             :         return p->migration_disabled;
    1197             : #else
    1198             :         return false;
    1199             : #endif
    1200             : }
    1201             : 
    1202             : DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
    1203             : 
    1204             : #define cpu_rq(cpu)             (&per_cpu(runqueues, (cpu)))
    1205             : #define this_rq()               this_cpu_ptr(&runqueues)
    1206             : #define task_rq(p)              cpu_rq(task_cpu(p))
    1207             : #define cpu_curr(cpu)           (cpu_rq(cpu)->curr)
    1208             : #define raw_rq()                raw_cpu_ptr(&runqueues)
    1209             : 
    1210             : struct sched_group;
    1211             : #ifdef CONFIG_SCHED_CORE
    1212             : static inline struct cpumask *sched_group_span(struct sched_group *sg);
    1213             : 
    1214             : DECLARE_STATIC_KEY_FALSE(__sched_core_enabled);
    1215             : 
    1216             : static inline bool sched_core_enabled(struct rq *rq)
    1217             : {
    1218             :         return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled;
    1219             : }
    1220             : 
    1221             : static inline bool sched_core_disabled(void)
    1222             : {
    1223             :         return !static_branch_unlikely(&__sched_core_enabled);
    1224             : }
    1225             : 
    1226             : /*
    1227             :  * Be careful with this function; not for general use. The return value isn't
    1228             :  * stable unless you actually hold a relevant rq->__lock.
    1229             :  */
    1230             : static inline raw_spinlock_t *rq_lockp(struct rq *rq)
    1231             : {
    1232             :         if (sched_core_enabled(rq))
    1233             :                 return &rq->core->__lock;
    1234             : 
    1235             :         return &rq->__lock;
    1236             : }
    1237             : 
    1238             : static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
    1239             : {
    1240             :         if (rq->core_enabled)
    1241             :                 return &rq->core->__lock;
    1242             : 
    1243             :         return &rq->__lock;
    1244             : }
    1245             : 
    1246             : bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
    1247             :                         bool fi);
    1248             : 
    1249             : /*
    1250             :  * Helpers to check if the CPU's core cookie matches with the task's cookie
    1251             :  * when core scheduling is enabled.
    1252             :  * A special case is that the task's cookie always matches with CPU's core
    1253             :  * cookie if the CPU is in an idle core.
    1254             :  */
    1255             : static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
    1256             : {
    1257             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1258             :         if (!sched_core_enabled(rq))
    1259             :                 return true;
    1260             : 
    1261             :         return rq->core->core_cookie == p->core_cookie;
    1262             : }
    1263             : 
    1264             : static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
    1265             : {
    1266             :         bool idle_core = true;
    1267             :         int cpu;
    1268             : 
    1269             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1270             :         if (!sched_core_enabled(rq))
    1271             :                 return true;
    1272             : 
    1273             :         for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) {
    1274             :                 if (!available_idle_cpu(cpu)) {
    1275             :                         idle_core = false;
    1276             :                         break;
    1277             :                 }
    1278             :         }
    1279             : 
    1280             :         /*
    1281             :          * A CPU in an idle core is always the best choice for tasks with
    1282             :          * cookies.
    1283             :          */
    1284             :         return idle_core || rq->core->core_cookie == p->core_cookie;
    1285             : }
    1286             : 
    1287             : static inline bool sched_group_cookie_match(struct rq *rq,
    1288             :                                             struct task_struct *p,
    1289             :                                             struct sched_group *group)
    1290             : {
    1291             :         int cpu;
    1292             : 
    1293             :         /* Ignore cookie match if core scheduler is not enabled on the CPU. */
    1294             :         if (!sched_core_enabled(rq))
    1295             :                 return true;
    1296             : 
    1297             :         for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) {
    1298             :                 if (sched_core_cookie_match(cpu_rq(cpu), p))
    1299             :                         return true;
    1300             :         }
    1301             :         return false;
    1302             : }
    1303             : 
    1304             : static inline bool sched_core_enqueued(struct task_struct *p)
    1305             : {
    1306             :         return !RB_EMPTY_NODE(&p->core_node);
    1307             : }
    1308             : 
    1309             : extern void sched_core_enqueue(struct rq *rq, struct task_struct *p);
    1310             : extern void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags);
    1311             : 
    1312             : extern void sched_core_get(void);
    1313             : extern void sched_core_put(void);
    1314             : 
    1315             : #else /* !CONFIG_SCHED_CORE */
    1316             : 
    1317             : static inline bool sched_core_enabled(struct rq *rq)
    1318             : {
    1319             :         return false;
    1320             : }
    1321             : 
    1322             : static inline bool sched_core_disabled(void)
    1323             : {
    1324             :         return true;
    1325             : }
    1326             : 
    1327             : static inline raw_spinlock_t *rq_lockp(struct rq *rq)
    1328             : {
    1329             :         return &rq->__lock;
    1330             : }
    1331             : 
    1332             : static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
    1333             : {
    1334             :         return &rq->__lock;
    1335             : }
    1336             : 
    1337             : static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p)
    1338             : {
    1339             :         return true;
    1340             : }
    1341             : 
    1342             : static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p)
    1343             : {
    1344             :         return true;
    1345             : }
    1346             : 
    1347             : static inline bool sched_group_cookie_match(struct rq *rq,
    1348             :                                             struct task_struct *p,
    1349             :                                             struct sched_group *group)
    1350             : {
    1351             :         return true;
    1352             : }
    1353             : #endif /* CONFIG_SCHED_CORE */
    1354             : 
    1355             : static inline void lockdep_assert_rq_held(struct rq *rq)
    1356             : {
    1357        9137 :         lockdep_assert_held(__rq_lockp(rq));
    1358             : }
    1359             : 
    1360             : extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass);
    1361             : extern bool raw_spin_rq_trylock(struct rq *rq);
    1362             : extern void raw_spin_rq_unlock(struct rq *rq);
    1363             : 
    1364             : static inline void raw_spin_rq_lock(struct rq *rq)
    1365             : {
    1366        2450 :         raw_spin_rq_lock_nested(rq, 0);
    1367             : }
    1368             : 
    1369             : static inline void raw_spin_rq_lock_irq(struct rq *rq)
    1370             : {
    1371           0 :         local_irq_disable();
    1372           0 :         raw_spin_rq_lock(rq);
    1373             : }
    1374             : 
    1375             : static inline void raw_spin_rq_unlock_irq(struct rq *rq)
    1376             : {
    1377        1032 :         raw_spin_rq_unlock(rq);
    1378             :         local_irq_enable();
    1379             : }
    1380             : 
    1381             : static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq)
    1382             : {
    1383             :         unsigned long flags;
    1384          16 :         local_irq_save(flags);
    1385          16 :         raw_spin_rq_lock(rq);
    1386             :         return flags;
    1387             : }
    1388             : 
    1389             : static inline void raw_spin_rq_unlock_irqrestore(struct rq *rq, unsigned long flags)
    1390             : {
    1391          16 :         raw_spin_rq_unlock(rq);
    1392          32 :         local_irq_restore(flags);
    1393             : }
    1394             : 
    1395             : #define raw_spin_rq_lock_irqsave(rq, flags)     \
    1396             : do {                                            \
    1397             :         flags = _raw_spin_rq_lock_irqsave(rq);  \
    1398             : } while (0)
    1399             : 
    1400             : #ifdef CONFIG_SCHED_SMT
    1401             : extern void __update_idle_core(struct rq *rq);
    1402             : 
    1403             : static inline void update_idle_core(struct rq *rq)
    1404             : {
    1405             :         if (static_branch_unlikely(&sched_smt_present))
    1406             :                 __update_idle_core(rq);
    1407             : }
    1408             : 
    1409             : #else
    1410             : static inline void update_idle_core(struct rq *rq) { }
    1411             : #endif
    1412             : 
    1413             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1414             : static inline struct task_struct *task_of(struct sched_entity *se)
    1415             : {
    1416             :         SCHED_WARN_ON(!entity_is_task(se));
    1417             :         return container_of(se, struct task_struct, se);
    1418             : }
    1419             : 
    1420             : static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
    1421             : {
    1422             :         return p->se.cfs_rq;
    1423             : }
    1424             : 
    1425             : /* runqueue on which this entity is (to be) queued */
    1426             : static inline struct cfs_rq *cfs_rq_of(const struct sched_entity *se)
    1427             : {
    1428             :         return se->cfs_rq;
    1429             : }
    1430             : 
    1431             : /* runqueue "owned" by this group */
    1432             : static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
    1433             : {
    1434             :         return grp->my_q;
    1435             : }
    1436             : 
    1437             : #else
    1438             : 
    1439             : #define task_of(_se)    container_of(_se, struct task_struct, se)
    1440             : 
    1441             : static inline struct cfs_rq *task_cfs_rq(const struct task_struct *p)
    1442             : {
    1443        1203 :         return &task_rq(p)->cfs;
    1444             : }
    1445             : 
    1446             : static inline struct cfs_rq *cfs_rq_of(const struct sched_entity *se)
    1447             : {
    1448        6705 :         const struct task_struct *p = task_of(se);
    1449        6705 :         struct rq *rq = task_rq(p);
    1450             : 
    1451             :         return &rq->cfs;
    1452             : }
    1453             : 
    1454             : /* runqueue "owned" by this group */
    1455             : static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
    1456             : {
    1457             :         return NULL;
    1458             : }
    1459             : #endif
    1460             : 
    1461             : extern void update_rq_clock(struct rq *rq);
    1462             : 
    1463             : /*
    1464             :  * rq::clock_update_flags bits
    1465             :  *
    1466             :  * %RQCF_REQ_SKIP - will request skipping of clock update on the next
    1467             :  *  call to __schedule(). This is an optimisation to avoid
    1468             :  *  neighbouring rq clock updates.
    1469             :  *
    1470             :  * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
    1471             :  *  in effect and calls to update_rq_clock() are being ignored.
    1472             :  *
    1473             :  * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
    1474             :  *  made to update_rq_clock() since the last time rq::lock was pinned.
    1475             :  *
    1476             :  * If inside of __schedule(), clock_update_flags will have been
    1477             :  * shifted left (a left shift is a cheap operation for the fast path
    1478             :  * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
    1479             :  *
    1480             :  *      if (rq-clock_update_flags >= RQCF_UPDATED)
    1481             :  *
    1482             :  * to check if %RQCF_UPDATED is set. It'll never be shifted more than
    1483             :  * one position though, because the next rq_unpin_lock() will shift it
    1484             :  * back.
    1485             :  */
    1486             : #define RQCF_REQ_SKIP           0x01
    1487             : #define RQCF_ACT_SKIP           0x02
    1488             : #define RQCF_UPDATED            0x04
    1489             : 
    1490             : static inline void assert_clock_updated(struct rq *rq)
    1491             : {
    1492             :         /*
    1493             :          * The only reason for not seeing a clock update since the
    1494             :          * last rq_pin_lock() is if we're currently skipping updates.
    1495             :          */
    1496             :         SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
    1497             : }
    1498             : 
    1499             : static inline u64 rq_clock(struct rq *rq)
    1500             : {
    1501           0 :         lockdep_assert_rq_held(rq);
    1502           0 :         assert_clock_updated(rq);
    1503             : 
    1504             :         return rq->clock;
    1505             : }
    1506             : 
    1507             : static inline u64 rq_clock_task(struct rq *rq)
    1508             : {
    1509        4999 :         lockdep_assert_rq_held(rq);
    1510        4999 :         assert_clock_updated(rq);
    1511             : 
    1512             :         return rq->clock_task;
    1513             : }
    1514             : 
    1515             : /**
    1516             :  * By default the decay is the default pelt decay period.
    1517             :  * The decay shift can change the decay period in
    1518             :  * multiples of 32.
    1519             :  *  Decay shift         Decay period(ms)
    1520             :  *      0                       32
    1521             :  *      1                       64
    1522             :  *      2                       128
    1523             :  *      3                       256
    1524             :  *      4                       512
    1525             :  */
    1526             : extern int sched_thermal_decay_shift;
    1527             : 
    1528             : static inline u64 rq_clock_thermal(struct rq *rq)
    1529             : {
    1530           5 :         return rq_clock_task(rq) >> sched_thermal_decay_shift;
    1531             : }
    1532             : 
    1533             : static inline void rq_clock_skip_update(struct rq *rq)
    1534             : {
    1535         505 :         lockdep_assert_rq_held(rq);
    1536         505 :         rq->clock_update_flags |= RQCF_REQ_SKIP;
    1537             : }
    1538             : 
    1539             : /*
    1540             :  * See rt task throttling, which is the only time a skip
    1541             :  * request is canceled.
    1542             :  */
    1543             : static inline void rq_clock_cancel_skipupdate(struct rq *rq)
    1544             : {
    1545           0 :         lockdep_assert_rq_held(rq);
    1546           0 :         rq->clock_update_flags &= ~RQCF_REQ_SKIP;
    1547             : }
    1548             : 
    1549             : /*
    1550             :  * During cpu offlining and rq wide unthrottling, we can trigger
    1551             :  * an update_rq_clock() for several cfs and rt runqueues (Typically
    1552             :  * when using list_for_each_entry_*)
    1553             :  * rq_clock_start_loop_update() can be called after updating the clock
    1554             :  * once and before iterating over the list to prevent multiple update.
    1555             :  * After the iterative traversal, we need to call rq_clock_stop_loop_update()
    1556             :  * to clear RQCF_ACT_SKIP of rq->clock_update_flags.
    1557             :  */
    1558             : static inline void rq_clock_start_loop_update(struct rq *rq)
    1559             : {
    1560             :         lockdep_assert_rq_held(rq);
    1561             :         SCHED_WARN_ON(rq->clock_update_flags & RQCF_ACT_SKIP);
    1562             :         rq->clock_update_flags |= RQCF_ACT_SKIP;
    1563             : }
    1564             : 
    1565             : static inline void rq_clock_stop_loop_update(struct rq *rq)
    1566             : {
    1567             :         lockdep_assert_rq_held(rq);
    1568             :         rq->clock_update_flags &= ~RQCF_ACT_SKIP;
    1569             : }
    1570             : 
    1571             : struct rq_flags {
    1572             :         unsigned long flags;
    1573             :         struct pin_cookie cookie;
    1574             : #ifdef CONFIG_SCHED_DEBUG
    1575             :         /*
    1576             :          * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
    1577             :          * current pin context is stashed here in case it needs to be
    1578             :          * restored in rq_repin_lock().
    1579             :          */
    1580             :         unsigned int clock_update_flags;
    1581             : #endif
    1582             : };
    1583             : 
    1584             : extern struct balance_callback balance_push_callback;
    1585             : 
    1586             : /*
    1587             :  * Lockdep annotation that avoids accidental unlocks; it's like a
    1588             :  * sticky/continuous lockdep_assert_held().
    1589             :  *
    1590             :  * This avoids code that has access to 'struct rq *rq' (basically everything in
    1591             :  * the scheduler) from accidentally unlocking the rq if they do not also have a
    1592             :  * copy of the (on-stack) 'struct rq_flags rf'.
    1593             :  *
    1594             :  * Also see Documentation/locking/lockdep-design.rst.
    1595             :  */
    1596             : static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
    1597             : {
    1598             :         rf->cookie = lockdep_pin_lock(__rq_lockp(rq));
    1599             : 
    1600             : #ifdef CONFIG_SCHED_DEBUG
    1601             :         rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    1602             :         rf->clock_update_flags = 0;
    1603             : #ifdef CONFIG_SMP
    1604             :         SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback);
    1605             : #endif
    1606             : #endif
    1607             : }
    1608             : 
    1609             : static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
    1610             : {
    1611             : #ifdef CONFIG_SCHED_DEBUG
    1612             :         if (rq->clock_update_flags > RQCF_ACT_SKIP)
    1613             :                 rf->clock_update_flags = RQCF_UPDATED;
    1614             : #endif
    1615             : 
    1616        1402 :         lockdep_unpin_lock(__rq_lockp(rq), rf->cookie);
    1617             : }
    1618             : 
    1619             : static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
    1620             : {
    1621             :         lockdep_repin_lock(__rq_lockp(rq), rf->cookie);
    1622             : 
    1623             : #ifdef CONFIG_SCHED_DEBUG
    1624             :         /*
    1625             :          * Restore the value we stashed in @rf for this pin context.
    1626             :          */
    1627             :         rq->clock_update_flags |= rf->clock_update_flags;
    1628             : #endif
    1629             : }
    1630             : 
    1631             : struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1632             :         __acquires(rq->lock);
    1633             : 
    1634             : struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
    1635             :         __acquires(p->pi_lock)
    1636             :         __acquires(rq->lock);
    1637             : 
    1638             : static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
    1639             :         __releases(rq->lock)
    1640             : {
    1641           0 :         rq_unpin_lock(rq, rf);
    1642           0 :         raw_spin_rq_unlock(rq);
    1643             : }
    1644             : 
    1645             : static inline void
    1646             : task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1647             :         __releases(rq->lock)
    1648             :         __releases(p->pi_lock)
    1649             : {
    1650         730 :         rq_unpin_lock(rq, rf);
    1651         365 :         raw_spin_rq_unlock(rq);
    1652         730 :         raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
    1653             : }
    1654             : 
    1655             : static inline void
    1656             : rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
    1657             :         __acquires(rq->lock)
    1658             : {
    1659             :         raw_spin_rq_lock_irqsave(rq, rf->flags);
    1660             :         rq_pin_lock(rq, rf);
    1661             : }
    1662             : 
    1663             : static inline void
    1664             : rq_lock_irq(struct rq *rq, struct rq_flags *rf)
    1665             :         __acquires(rq->lock)
    1666             : {
    1667             :         raw_spin_rq_lock_irq(rq);
    1668             :         rq_pin_lock(rq, rf);
    1669             : }
    1670             : 
    1671             : static inline void
    1672             : rq_lock(struct rq *rq, struct rq_flags *rf)
    1673             :         __acquires(rq->lock)
    1674             : {
    1675        2068 :         raw_spin_rq_lock(rq);
    1676        2068 :         rq_pin_lock(rq, rf);
    1677             : }
    1678             : 
    1679             : static inline void
    1680             : rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
    1681             :         __releases(rq->lock)
    1682             : {
    1683             :         rq_unpin_lock(rq, rf);
    1684             :         raw_spin_rq_unlock_irqrestore(rq, rf->flags);
    1685             : }
    1686             : 
    1687             : static inline void
    1688             : rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
    1689             :         __releases(rq->lock)
    1690             : {
    1691           0 :         rq_unpin_lock(rq, rf);
    1692           0 :         raw_spin_rq_unlock_irq(rq);
    1693             : }
    1694             : 
    1695             : static inline void
    1696             : rq_unlock(struct rq *rq, struct rq_flags *rf)
    1697             :         __releases(rq->lock)
    1698             : {
    1699        1897 :         rq_unpin_lock(rq, rf);
    1700        1036 :         raw_spin_rq_unlock(rq);
    1701             : }
    1702             : 
    1703             : static inline struct rq *
    1704             : this_rq_lock_irq(struct rq_flags *rf)
    1705             :         __acquires(rq->lock)
    1706             : {
    1707             :         struct rq *rq;
    1708             : 
    1709             :         local_irq_disable();
    1710           0 :         rq = this_rq();
    1711           0 :         rq_lock(rq, rf);
    1712             :         return rq;
    1713             : }
    1714             : 
    1715             : #ifdef CONFIG_NUMA
    1716             : enum numa_topology_type {
    1717             :         NUMA_DIRECT,
    1718             :         NUMA_GLUELESS_MESH,
    1719             :         NUMA_BACKPLANE,
    1720             : };
    1721             : extern enum numa_topology_type sched_numa_topology_type;
    1722             : extern int sched_max_numa_distance;
    1723             : extern bool find_numa_distance(int distance);
    1724             : extern void sched_init_numa(int offline_node);
    1725             : extern void sched_update_numa(int cpu, bool online);
    1726             : extern void sched_domains_numa_masks_set(unsigned int cpu);
    1727             : extern void sched_domains_numa_masks_clear(unsigned int cpu);
    1728             : extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
    1729             : #else
    1730             : static inline void sched_init_numa(int offline_node) { }
    1731             : static inline void sched_update_numa(int cpu, bool online) { }
    1732             : static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
    1733             : static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
    1734             : static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
    1735             : {
    1736             :         return nr_cpu_ids;
    1737             : }
    1738             : #endif
    1739             : 
    1740             : #ifdef CONFIG_NUMA_BALANCING
    1741             : /* The regions in numa_faults array from task_struct */
    1742             : enum numa_faults_stats {
    1743             :         NUMA_MEM = 0,
    1744             :         NUMA_CPU,
    1745             :         NUMA_MEMBUF,
    1746             :         NUMA_CPUBUF
    1747             : };
    1748             : extern void sched_setnuma(struct task_struct *p, int node);
    1749             : extern int migrate_task_to(struct task_struct *p, int cpu);
    1750             : extern int migrate_swap(struct task_struct *p, struct task_struct *t,
    1751             :                         int cpu, int scpu);
    1752             : extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
    1753             : #else
    1754             : static inline void
    1755             : init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
    1756             : {
    1757             : }
    1758             : #endif /* CONFIG_NUMA_BALANCING */
    1759             : 
    1760             : #ifdef CONFIG_SMP
    1761             : 
    1762             : static inline void
    1763             : queue_balance_callback(struct rq *rq,
    1764             :                        struct balance_callback *head,
    1765             :                        void (*func)(struct rq *rq))
    1766             : {
    1767             :         lockdep_assert_rq_held(rq);
    1768             : 
    1769             :         /*
    1770             :          * Don't (re)queue an already queued item; nor queue anything when
    1771             :          * balance_push() is active, see the comment with
    1772             :          * balance_push_callback.
    1773             :          */
    1774             :         if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
    1775             :                 return;
    1776             : 
    1777             :         head->func = func;
    1778             :         head->next = rq->balance_callback;
    1779             :         rq->balance_callback = head;
    1780             : }
    1781             : 
    1782             : #define rcu_dereference_check_sched_domain(p) \
    1783             :         rcu_dereference_check((p), \
    1784             :                               lockdep_is_held(&sched_domains_mutex))
    1785             : 
    1786             : /*
    1787             :  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
    1788             :  * See destroy_sched_domains: call_rcu for details.
    1789             :  *
    1790             :  * The domain tree of any CPU may only be accessed from within
    1791             :  * preempt-disabled sections.
    1792             :  */
    1793             : #define for_each_domain(cpu, __sd) \
    1794             :         for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
    1795             :                         __sd; __sd = __sd->parent)
    1796             : 
    1797             : /* A mask of all the SD flags that have the SDF_SHARED_CHILD metaflag */
    1798             : #define SD_FLAG(name, mflags) (name * !!((mflags) & SDF_SHARED_CHILD)) |
    1799             : static const unsigned int SD_SHARED_CHILD_MASK =
    1800             : #include <linux/sched/sd_flags.h>
    1801             : 0;
    1802             : #undef SD_FLAG
    1803             : 
    1804             : /**
    1805             :  * highest_flag_domain - Return highest sched_domain containing flag.
    1806             :  * @cpu:        The CPU whose highest level of sched domain is to
    1807             :  *              be returned.
    1808             :  * @flag:       The flag to check for the highest sched_domain
    1809             :  *              for the given CPU.
    1810             :  *
    1811             :  * Returns the highest sched_domain of a CPU which contains @flag. If @flag has
    1812             :  * the SDF_SHARED_CHILD metaflag, all the children domains also have @flag.
    1813             :  */
    1814             : static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
    1815             : {
    1816             :         struct sched_domain *sd, *hsd = NULL;
    1817             : 
    1818             :         for_each_domain(cpu, sd) {
    1819             :                 if (sd->flags & flag) {
    1820             :                         hsd = sd;
    1821             :                         continue;
    1822             :                 }
    1823             : 
    1824             :                 /*
    1825             :                  * Stop the search if @flag is known to be shared at lower
    1826             :                  * levels. It will not be found further up.
    1827             :                  */
    1828             :                 if (flag & SD_SHARED_CHILD_MASK)
    1829             :                         break;
    1830             :         }
    1831             : 
    1832             :         return hsd;
    1833             : }
    1834             : 
    1835             : static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
    1836             : {
    1837             :         struct sched_domain *sd;
    1838             : 
    1839             :         for_each_domain(cpu, sd) {
    1840             :                 if (sd->flags & flag)
    1841             :                         break;
    1842             :         }
    1843             : 
    1844             :         return sd;
    1845             : }
    1846             : 
    1847             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
    1848             : DECLARE_PER_CPU(int, sd_llc_size);
    1849             : DECLARE_PER_CPU(int, sd_llc_id);
    1850             : DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
    1851             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
    1852             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
    1853             : DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
    1854             : extern struct static_key_false sched_asym_cpucapacity;
    1855             : 
    1856             : static __always_inline bool sched_asym_cpucap_active(void)
    1857             : {
    1858             :         return static_branch_unlikely(&sched_asym_cpucapacity);
    1859             : }
    1860             : 
    1861             : struct sched_group_capacity {
    1862             :         atomic_t                ref;
    1863             :         /*
    1864             :          * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
    1865             :          * for a single CPU.
    1866             :          */
    1867             :         unsigned long           capacity;
    1868             :         unsigned long           min_capacity;           /* Min per-CPU capacity in group */
    1869             :         unsigned long           max_capacity;           /* Max per-CPU capacity in group */
    1870             :         unsigned long           next_update;
    1871             :         int                     imbalance;              /* XXX unrelated to capacity but shared group state */
    1872             : 
    1873             : #ifdef CONFIG_SCHED_DEBUG
    1874             :         int                     id;
    1875             : #endif
    1876             : 
    1877             :         unsigned long           cpumask[];              /* Balance mask */
    1878             : };
    1879             : 
    1880             : struct sched_group {
    1881             :         struct sched_group      *next;                  /* Must be a circular list */
    1882             :         atomic_t                ref;
    1883             : 
    1884             :         unsigned int            group_weight;
    1885             :         struct sched_group_capacity *sgc;
    1886             :         int                     asym_prefer_cpu;        /* CPU of highest priority in group */
    1887             :         int                     flags;
    1888             : 
    1889             :         /*
    1890             :          * The CPUs this group covers.
    1891             :          *
    1892             :          * NOTE: this field is variable length. (Allocated dynamically
    1893             :          * by attaching extra space to the end of the structure,
    1894             :          * depending on how many CPUs the kernel has booted up with)
    1895             :          */
    1896             :         unsigned long           cpumask[];
    1897             : };
    1898             : 
    1899             : static inline struct cpumask *sched_group_span(struct sched_group *sg)
    1900             : {
    1901             :         return to_cpumask(sg->cpumask);
    1902             : }
    1903             : 
    1904             : /*
    1905             :  * See build_balance_mask().
    1906             :  */
    1907             : static inline struct cpumask *group_balance_mask(struct sched_group *sg)
    1908             : {
    1909             :         return to_cpumask(sg->sgc->cpumask);
    1910             : }
    1911             : 
    1912             : extern int group_balance_cpu(struct sched_group *sg);
    1913             : 
    1914             : #ifdef CONFIG_SCHED_DEBUG
    1915             : void update_sched_domain_debugfs(void);
    1916             : void dirty_sched_domain_sysctl(int cpu);
    1917             : #else
    1918             : static inline void update_sched_domain_debugfs(void)
    1919             : {
    1920             : }
    1921             : static inline void dirty_sched_domain_sysctl(int cpu)
    1922             : {
    1923             : }
    1924             : #endif
    1925             : 
    1926             : extern int sched_update_scaling(void);
    1927             : 
    1928             : static inline const struct cpumask *task_user_cpus(struct task_struct *p)
    1929             : {
    1930             :         if (!p->user_cpus_ptr)
    1931             :                 return cpu_possible_mask; /* &init_task.cpus_mask */
    1932             :         return p->user_cpus_ptr;
    1933             : }
    1934             : #endif /* CONFIG_SMP */
    1935             : 
    1936             : #include "stats.h"
    1937             : 
    1938             : #if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS)
    1939             : 
    1940             : extern void __sched_core_account_forceidle(struct rq *rq);
    1941             : 
    1942             : static inline void sched_core_account_forceidle(struct rq *rq)
    1943             : {
    1944             :         if (schedstat_enabled())
    1945             :                 __sched_core_account_forceidle(rq);
    1946             : }
    1947             : 
    1948             : extern void __sched_core_tick(struct rq *rq);
    1949             : 
    1950             : static inline void sched_core_tick(struct rq *rq)
    1951             : {
    1952             :         if (sched_core_enabled(rq) && schedstat_enabled())
    1953             :                 __sched_core_tick(rq);
    1954             : }
    1955             : 
    1956             : #else
    1957             : 
    1958             : static inline void sched_core_account_forceidle(struct rq *rq) {}
    1959             : 
    1960             : static inline void sched_core_tick(struct rq *rq) {}
    1961             : 
    1962             : #endif /* CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS */
    1963             : 
    1964             : #ifdef CONFIG_CGROUP_SCHED
    1965             : 
    1966             : /*
    1967             :  * Return the group to which this tasks belongs.
    1968             :  *
    1969             :  * We cannot use task_css() and friends because the cgroup subsystem
    1970             :  * changes that value before the cgroup_subsys::attach() method is called,
    1971             :  * therefore we cannot pin it and might observe the wrong value.
    1972             :  *
    1973             :  * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
    1974             :  * core changes this before calling sched_move_task().
    1975             :  *
    1976             :  * Instead we use a 'copy' which is updated from sched_move_task() while
    1977             :  * holding both task_struct::pi_lock and rq::lock.
    1978             :  */
    1979             : static inline struct task_group *task_group(struct task_struct *p)
    1980             : {
    1981             :         return p->sched_task_group;
    1982             : }
    1983             : 
    1984             : /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
    1985             : static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
    1986             : {
    1987             : #if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
    1988             :         struct task_group *tg = task_group(p);
    1989             : #endif
    1990             : 
    1991             : #ifdef CONFIG_FAIR_GROUP_SCHED
    1992             :         set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
    1993             :         p->se.cfs_rq = tg->cfs_rq[cpu];
    1994             :         p->se.parent = tg->se[cpu];
    1995             :         p->se.depth = tg->se[cpu] ? tg->se[cpu]->depth + 1 : 0;
    1996             : #endif
    1997             : 
    1998             : #ifdef CONFIG_RT_GROUP_SCHED
    1999             :         p->rt.rt_rq  = tg->rt_rq[cpu];
    2000             :         p->rt.parent = tg->rt_se[cpu];
    2001             : #endif
    2002             : }
    2003             : 
    2004             : #else /* CONFIG_CGROUP_SCHED */
    2005             : 
    2006             : static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
    2007             : static inline struct task_group *task_group(struct task_struct *p)
    2008             : {
    2009             :         return NULL;
    2010             : }
    2011             : 
    2012             : #endif /* CONFIG_CGROUP_SCHED */
    2013             : 
    2014             : static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
    2015             : {
    2016         176 :         set_task_rq(p, cpu);
    2017             : #ifdef CONFIG_SMP
    2018             :         /*
    2019             :          * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
    2020             :          * successfully executed on another CPU. We must ensure that updates of
    2021             :          * per-task data have been completed by this moment.
    2022             :          */
    2023             :         smp_wmb();
    2024             :         WRITE_ONCE(task_thread_info(p)->cpu, cpu);
    2025             :         p->wake_cpu = cpu;
    2026             : #endif
    2027             : }
    2028             : 
    2029             : /*
    2030             :  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
    2031             :  */
    2032             : #ifdef CONFIG_SCHED_DEBUG
    2033             : # define const_debug __read_mostly
    2034             : #else
    2035             : # define const_debug const
    2036             : #endif
    2037             : 
    2038             : #define SCHED_FEAT(name, enabled)       \
    2039             :         __SCHED_FEAT_##name ,
    2040             : 
    2041             : enum {
    2042             : #include "features.h"
    2043             :         __SCHED_FEAT_NR,
    2044             : };
    2045             : 
    2046             : #undef SCHED_FEAT
    2047             : 
    2048             : #ifdef CONFIG_SCHED_DEBUG
    2049             : 
    2050             : /*
    2051             :  * To support run-time toggling of sched features, all the translation units
    2052             :  * (but core.c) reference the sysctl_sched_features defined in core.c.
    2053             :  */
    2054             : extern const_debug unsigned int sysctl_sched_features;
    2055             : 
    2056             : #ifdef CONFIG_JUMP_LABEL
    2057             : #define SCHED_FEAT(name, enabled)                                       \
    2058             : static __always_inline bool static_branch_##name(struct static_key *key) \
    2059             : {                                                                       \
    2060             :         return static_key_##enabled(key);                               \
    2061             : }
    2062             : 
    2063             : #include "features.h"
    2064             : #undef SCHED_FEAT
    2065             : 
    2066             : extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
    2067             : #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
    2068             : 
    2069             : #else /* !CONFIG_JUMP_LABEL */
    2070             : 
    2071             : #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    2072             : 
    2073             : #endif /* CONFIG_JUMP_LABEL */
    2074             : 
    2075             : #else /* !SCHED_DEBUG */
    2076             : 
    2077             : /*
    2078             :  * Each translation unit has its own copy of sysctl_sched_features to allow
    2079             :  * constants propagation at compile time and compiler optimization based on
    2080             :  * features default.
    2081             :  */
    2082             : #define SCHED_FEAT(name, enabled)       \
    2083             :         (1UL << __SCHED_FEAT_##name) * enabled |
    2084             : static const_debug __maybe_unused unsigned int sysctl_sched_features =
    2085             : #include "features.h"
    2086             :         0;
    2087             : #undef SCHED_FEAT
    2088             : 
    2089             : #define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
    2090             : 
    2091             : #endif /* SCHED_DEBUG */
    2092             : 
    2093             : extern struct static_key_false sched_numa_balancing;
    2094             : extern struct static_key_false sched_schedstats;
    2095             : 
    2096             : static inline u64 global_rt_period(void)
    2097             : {
    2098           4 :         return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
    2099             : }
    2100             : 
    2101             : static inline u64 global_rt_runtime(void)
    2102             : {
    2103           4 :         if (sysctl_sched_rt_runtime < 0)
    2104             :                 return RUNTIME_INF;
    2105             : 
    2106           4 :         return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
    2107             : }
    2108             : 
    2109             : static inline int task_current(struct rq *rq, struct task_struct *p)
    2110             : {
    2111          12 :         return rq->curr == p;
    2112             : }
    2113             : 
    2114             : static inline int task_on_cpu(struct rq *rq, struct task_struct *p)
    2115             : {
    2116             : #ifdef CONFIG_SMP
    2117             :         return p->on_cpu;
    2118             : #else
    2119          36 :         return task_current(rq, p);
    2120             : #endif
    2121             : }
    2122             : 
    2123             : static inline int task_on_rq_queued(struct task_struct *p)
    2124             : {
    2125          12 :         return p->on_rq == TASK_ON_RQ_QUEUED;
    2126             : }
    2127             : 
    2128             : static inline int task_on_rq_migrating(struct task_struct *p)
    2129             : {
    2130        2429 :         return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
    2131             : }
    2132             : 
    2133             : /* Wake flags. The first three directly map to some SD flag value */
    2134             : #define WF_EXEC     0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
    2135             : #define WF_FORK     0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
    2136             : #define WF_TTWU     0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
    2137             : 
    2138             : #define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
    2139             : #define WF_MIGRATED 0x20 /* Internal use, task got migrated */
    2140             : 
    2141             : #ifdef CONFIG_SMP
    2142             : static_assert(WF_EXEC == SD_BALANCE_EXEC);
    2143             : static_assert(WF_FORK == SD_BALANCE_FORK);
    2144             : static_assert(WF_TTWU == SD_BALANCE_WAKE);
    2145             : #endif
    2146             : 
    2147             : /*
    2148             :  * To aid in avoiding the subversion of "niceness" due to uneven distribution
    2149             :  * of tasks with abnormal "nice" values across CPUs the contribution that
    2150             :  * each task makes to its run queue's load is weighted according to its
    2151             :  * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
    2152             :  * scaled version of the new time slice allocation that they receive on time
    2153             :  * slice expiry etc.
    2154             :  */
    2155             : 
    2156             : #define WEIGHT_IDLEPRIO         3
    2157             : #define WMULT_IDLEPRIO          1431655765
    2158             : 
    2159             : extern const int                sched_prio_to_weight[40];
    2160             : extern const u32                sched_prio_to_wmult[40];
    2161             : 
    2162             : /*
    2163             :  * {de,en}queue flags:
    2164             :  *
    2165             :  * DEQUEUE_SLEEP  - task is no longer runnable
    2166             :  * ENQUEUE_WAKEUP - task just became runnable
    2167             :  *
    2168             :  * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
    2169             :  *                are in a known state which allows modification. Such pairs
    2170             :  *                should preserve as much state as possible.
    2171             :  *
    2172             :  * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
    2173             :  *        in the runqueue.
    2174             :  *
    2175             :  * ENQUEUE_HEAD      - place at front of runqueue (tail if not specified)
    2176             :  * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
    2177             :  * ENQUEUE_MIGRATED  - the task was migrated during wakeup
    2178             :  *
    2179             :  */
    2180             : 
    2181             : #define DEQUEUE_SLEEP           0x01
    2182             : #define DEQUEUE_SAVE            0x02 /* Matches ENQUEUE_RESTORE */
    2183             : #define DEQUEUE_MOVE            0x04 /* Matches ENQUEUE_MOVE */
    2184             : #define DEQUEUE_NOCLOCK         0x08 /* Matches ENQUEUE_NOCLOCK */
    2185             : 
    2186             : #define ENQUEUE_WAKEUP          0x01
    2187             : #define ENQUEUE_RESTORE         0x02
    2188             : #define ENQUEUE_MOVE            0x04
    2189             : #define ENQUEUE_NOCLOCK         0x08
    2190             : 
    2191             : #define ENQUEUE_HEAD            0x10
    2192             : #define ENQUEUE_REPLENISH       0x20
    2193             : #ifdef CONFIG_SMP
    2194             : #define ENQUEUE_MIGRATED        0x40
    2195             : #else
    2196             : #define ENQUEUE_MIGRATED        0x00
    2197             : #endif
    2198             : 
    2199             : #define RETRY_TASK              ((void *)-1UL)
    2200             : 
    2201             : struct affinity_context {
    2202             :         const struct cpumask *new_mask;
    2203             :         struct cpumask *user_mask;
    2204             :         unsigned int flags;
    2205             : };
    2206             : 
    2207             : struct sched_class {
    2208             : 
    2209             : #ifdef CONFIG_UCLAMP_TASK
    2210             :         int uclamp_enabled;
    2211             : #endif
    2212             : 
    2213             :         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
    2214             :         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
    2215             :         void (*yield_task)   (struct rq *rq);
    2216             :         bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
    2217             : 
    2218             :         void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
    2219             : 
    2220             :         struct task_struct *(*pick_next_task)(struct rq *rq);
    2221             : 
    2222             :         void (*put_prev_task)(struct rq *rq, struct task_struct *p);
    2223             :         void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
    2224             : 
    2225             : #ifdef CONFIG_SMP
    2226             :         int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    2227             :         int  (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
    2228             : 
    2229             :         struct task_struct * (*pick_task)(struct rq *rq);
    2230             : 
    2231             :         void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
    2232             : 
    2233             :         void (*task_woken)(struct rq *this_rq, struct task_struct *task);
    2234             : 
    2235             :         void (*set_cpus_allowed)(struct task_struct *p, struct affinity_context *ctx);
    2236             : 
    2237             :         void (*rq_online)(struct rq *rq);
    2238             :         void (*rq_offline)(struct rq *rq);
    2239             : 
    2240             :         struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
    2241             : #endif
    2242             : 
    2243             :         void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
    2244             :         void (*task_fork)(struct task_struct *p);
    2245             :         void (*task_dead)(struct task_struct *p);
    2246             : 
    2247             :         /*
    2248             :          * The switched_from() call is allowed to drop rq->lock, therefore we
    2249             :          * cannot assume the switched_from/switched_to pair is serialized by
    2250             :          * rq->lock. They are however serialized by p->pi_lock.
    2251             :          */
    2252             :         void (*switched_from)(struct rq *this_rq, struct task_struct *task);
    2253             :         void (*switched_to)  (struct rq *this_rq, struct task_struct *task);
    2254             :         void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
    2255             :                               int oldprio);
    2256             : 
    2257             :         unsigned int (*get_rr_interval)(struct rq *rq,
    2258             :                                         struct task_struct *task);
    2259             : 
    2260             :         void (*update_curr)(struct rq *rq);
    2261             : 
    2262             : #ifdef CONFIG_FAIR_GROUP_SCHED
    2263             :         void (*task_change_group)(struct task_struct *p);
    2264             : #endif
    2265             : 
    2266             : #ifdef CONFIG_SCHED_CORE
    2267             :         int (*task_is_throttled)(struct task_struct *p, int cpu);
    2268             : #endif
    2269             : };
    2270             : 
    2271        1036 : static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
    2272             : {
    2273        1036 :         WARN_ON_ONCE(rq->curr != prev);
    2274        1036 :         prev->sched_class->put_prev_task(rq, prev);
    2275        1036 : }
    2276             : 
    2277             : static inline void set_next_task(struct rq *rq, struct task_struct *next)
    2278             : {
    2279           4 :         next->sched_class->set_next_task(rq, next, false);
    2280             : }
    2281             : 
    2282             : 
    2283             : /*
    2284             :  * Helper to define a sched_class instance; each one is placed in a separate
    2285             :  * section which is ordered by the linker script:
    2286             :  *
    2287             :  *   include/asm-generic/vmlinux.lds.h
    2288             :  *
    2289             :  * *CAREFUL* they are laid out in *REVERSE* order!!!
    2290             :  *
    2291             :  * Also enforce alignment on the instance, not the type, to guarantee layout.
    2292             :  */
    2293             : #define DEFINE_SCHED_CLASS(name) \
    2294             : const struct sched_class name##_sched_class \
    2295             :         __aligned(__alignof__(struct sched_class)) \
    2296             :         __section("__" #name "_sched_class")
    2297             : 
    2298             : /* Defined in include/asm-generic/vmlinux.lds.h */
    2299             : extern struct sched_class __sched_class_highest[];
    2300             : extern struct sched_class __sched_class_lowest[];
    2301             : 
    2302             : #define for_class_range(class, _from, _to) \
    2303             :         for (class = (_from); class < (_to); class++)
    2304             : 
    2305             : #define for_each_class(class) \
    2306             :         for_class_range(class, __sched_class_highest, __sched_class_lowest)
    2307             : 
    2308             : #define sched_class_above(_a, _b)       ((_a) < (_b))
    2309             : 
    2310             : extern const struct sched_class stop_sched_class;
    2311             : extern const struct sched_class dl_sched_class;
    2312             : extern const struct sched_class rt_sched_class;
    2313             : extern const struct sched_class fair_sched_class;
    2314             : extern const struct sched_class idle_sched_class;
    2315             : 
    2316             : static inline bool sched_stop_runnable(struct rq *rq)
    2317             : {
    2318             :         return rq->stop && task_on_rq_queued(rq->stop);
    2319             : }
    2320             : 
    2321             : static inline bool sched_dl_runnable(struct rq *rq)
    2322             : {
    2323             :         return rq->dl.dl_nr_running > 0;
    2324             : }
    2325             : 
    2326             : static inline bool sched_rt_runnable(struct rq *rq)
    2327             : {
    2328             :         return rq->rt.rt_queued > 0;
    2329             : }
    2330             : 
    2331             : static inline bool sched_fair_runnable(struct rq *rq)
    2332             : {
    2333             :         return rq->cfs.nr_running > 0;
    2334             : }
    2335             : 
    2336             : extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
    2337             : extern struct task_struct *pick_next_task_idle(struct rq *rq);
    2338             : 
    2339             : #define SCA_CHECK               0x01
    2340             : #define SCA_MIGRATE_DISABLE     0x02
    2341             : #define SCA_MIGRATE_ENABLE      0x04
    2342             : #define SCA_USER                0x08
    2343             : 
    2344             : #ifdef CONFIG_SMP
    2345             : 
    2346             : extern void update_group_capacity(struct sched_domain *sd, int cpu);
    2347             : 
    2348             : extern void trigger_load_balance(struct rq *rq);
    2349             : 
    2350             : extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
    2351             : 
    2352             : static inline struct task_struct *get_push_task(struct rq *rq)
    2353             : {
    2354             :         struct task_struct *p = rq->curr;
    2355             : 
    2356             :         lockdep_assert_rq_held(rq);
    2357             : 
    2358             :         if (rq->push_busy)
    2359             :                 return NULL;
    2360             : 
    2361             :         if (p->nr_cpus_allowed == 1)
    2362             :                 return NULL;
    2363             : 
    2364             :         if (p->migration_disabled)
    2365             :                 return NULL;
    2366             : 
    2367             :         rq->push_busy = true;
    2368             :         return get_task_struct(p);
    2369             : }
    2370             : 
    2371             : extern int push_cpu_stop(void *arg);
    2372             : 
    2373             : #endif
    2374             : 
    2375             : #ifdef CONFIG_CPU_IDLE
    2376             : static inline void idle_set_state(struct rq *rq,
    2377             :                                   struct cpuidle_state *idle_state)
    2378             : {
    2379             :         rq->idle_state = idle_state;
    2380             : }
    2381             : 
    2382             : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    2383             : {
    2384             :         SCHED_WARN_ON(!rcu_read_lock_held());
    2385             : 
    2386             :         return rq->idle_state;
    2387             : }
    2388             : #else
    2389             : static inline void idle_set_state(struct rq *rq,
    2390             :                                   struct cpuidle_state *idle_state)
    2391             : {
    2392             : }
    2393             : 
    2394             : static inline struct cpuidle_state *idle_get_state(struct rq *rq)
    2395             : {
    2396             :         return NULL;
    2397             : }
    2398             : #endif
    2399             : 
    2400             : extern void schedule_idle(void);
    2401             : 
    2402             : extern void sysrq_sched_debug_show(void);
    2403             : extern void sched_init_granularity(void);
    2404             : extern void update_max_interval(void);
    2405             : 
    2406             : extern void init_sched_dl_class(void);
    2407             : extern void init_sched_rt_class(void);
    2408             : extern void init_sched_fair_class(void);
    2409             : 
    2410             : extern void reweight_task(struct task_struct *p, int prio);
    2411             : 
    2412             : extern void resched_curr(struct rq *rq);
    2413             : extern void resched_cpu(int cpu);
    2414             : 
    2415             : extern struct rt_bandwidth def_rt_bandwidth;
    2416             : extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
    2417             : extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
    2418             : 
    2419             : extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
    2420             : extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
    2421             : 
    2422             : #define BW_SHIFT                20
    2423             : #define BW_UNIT                 (1 << BW_SHIFT)
    2424             : #define RATIO_SHIFT             8
    2425             : #define MAX_BW_BITS             (64 - BW_SHIFT)
    2426             : #define MAX_BW                  ((1ULL << MAX_BW_BITS) - 1)
    2427             : unsigned long to_ratio(u64 period, u64 runtime);
    2428             : 
    2429             : extern void init_entity_runnable_average(struct sched_entity *se);
    2430             : extern void post_init_entity_util_avg(struct task_struct *p);
    2431             : 
    2432             : #ifdef CONFIG_NO_HZ_FULL
    2433             : extern bool sched_can_stop_tick(struct rq *rq);
    2434             : extern int __init sched_tick_offload_init(void);
    2435             : 
    2436             : /*
    2437             :  * Tick may be needed by tasks in the runqueue depending on their policy and
    2438             :  * requirements. If tick is needed, lets send the target an IPI to kick it out of
    2439             :  * nohz mode if necessary.
    2440             :  */
    2441             : static inline void sched_update_tick_dependency(struct rq *rq)
    2442             : {
    2443             :         int cpu = cpu_of(rq);
    2444             : 
    2445             :         if (!tick_nohz_full_cpu(cpu))
    2446             :                 return;
    2447             : 
    2448             :         if (sched_can_stop_tick(rq))
    2449             :                 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
    2450             :         else
    2451             :                 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
    2452             : }
    2453             : #else
    2454             : static inline int sched_tick_offload_init(void) { return 0; }
    2455             : static inline void sched_update_tick_dependency(struct rq *rq) { }
    2456             : #endif
    2457             : 
    2458             : static inline void add_nr_running(struct rq *rq, unsigned count)
    2459             : {
    2460        1035 :         unsigned prev_nr = rq->nr_running;
    2461             : 
    2462        1035 :         rq->nr_running = prev_nr + count;
    2463             :         if (trace_sched_update_nr_running_tp_enabled()) {
    2464             :                 call_trace_sched_update_nr_running(rq, count);
    2465             :         }
    2466             : 
    2467             : #ifdef CONFIG_SMP
    2468             :         if (prev_nr < 2 && rq->nr_running >= 2) {
    2469             :                 if (!READ_ONCE(rq->rd->overload))
    2470             :                         WRITE_ONCE(rq->rd->overload, 1);
    2471             :         }
    2472             : #endif
    2473             : 
    2474        1035 :         sched_update_tick_dependency(rq);
    2475             : }
    2476             : 
    2477             : static inline void sub_nr_running(struct rq *rq, unsigned count)
    2478             : {
    2479        1033 :         rq->nr_running -= count;
    2480             :         if (trace_sched_update_nr_running_tp_enabled()) {
    2481             :                 call_trace_sched_update_nr_running(rq, -count);
    2482             :         }
    2483             : 
    2484             :         /* Check if we still need preemption */
    2485        1033 :         sched_update_tick_dependency(rq);
    2486             : }
    2487             : 
    2488             : extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
    2489             : extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
    2490             : 
    2491             : extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
    2492             : 
    2493             : #ifdef CONFIG_PREEMPT_RT
    2494             : #define SCHED_NR_MIGRATE_BREAK 8
    2495             : #else
    2496             : #define SCHED_NR_MIGRATE_BREAK 32
    2497             : #endif
    2498             : 
    2499             : extern const_debug unsigned int sysctl_sched_nr_migrate;
    2500             : extern const_debug unsigned int sysctl_sched_migration_cost;
    2501             : 
    2502             : #ifdef CONFIG_SCHED_DEBUG
    2503             : extern unsigned int sysctl_sched_latency;
    2504             : extern unsigned int sysctl_sched_min_granularity;
    2505             : extern unsigned int sysctl_sched_idle_min_granularity;
    2506             : extern unsigned int sysctl_sched_wakeup_granularity;
    2507             : extern int sysctl_resched_latency_warn_ms;
    2508             : extern int sysctl_resched_latency_warn_once;
    2509             : 
    2510             : extern unsigned int sysctl_sched_tunable_scaling;
    2511             : 
    2512             : extern unsigned int sysctl_numa_balancing_scan_delay;
    2513             : extern unsigned int sysctl_numa_balancing_scan_period_min;
    2514             : extern unsigned int sysctl_numa_balancing_scan_period_max;
    2515             : extern unsigned int sysctl_numa_balancing_scan_size;
    2516             : extern unsigned int sysctl_numa_balancing_hot_threshold;
    2517             : #endif
    2518             : 
    2519             : #ifdef CONFIG_SCHED_HRTICK
    2520             : 
    2521             : /*
    2522             :  * Use hrtick when:
    2523             :  *  - enabled by features
    2524             :  *  - hrtimer is actually high res
    2525             :  */
    2526             : static inline int hrtick_enabled(struct rq *rq)
    2527             : {
    2528             :         if (!cpu_active(cpu_of(rq)))
    2529             :                 return 0;
    2530             :         return hrtimer_is_hres_active(&rq->hrtick_timer);
    2531             : }
    2532             : 
    2533             : static inline int hrtick_enabled_fair(struct rq *rq)
    2534             : {
    2535             :         if (!sched_feat(HRTICK))
    2536             :                 return 0;
    2537             :         return hrtick_enabled(rq);
    2538             : }
    2539             : 
    2540             : static inline int hrtick_enabled_dl(struct rq *rq)
    2541             : {
    2542             :         if (!sched_feat(HRTICK_DL))
    2543             :                 return 0;
    2544             :         return hrtick_enabled(rq);
    2545             : }
    2546             : 
    2547             : void hrtick_start(struct rq *rq, u64 delay);
    2548             : 
    2549             : #else
    2550             : 
    2551             : static inline int hrtick_enabled_fair(struct rq *rq)
    2552             : {
    2553             :         return 0;
    2554             : }
    2555             : 
    2556             : static inline int hrtick_enabled_dl(struct rq *rq)
    2557             : {
    2558             :         return 0;
    2559             : }
    2560             : 
    2561             : static inline int hrtick_enabled(struct rq *rq)
    2562             : {
    2563             :         return 0;
    2564             : }
    2565             : 
    2566             : #endif /* CONFIG_SCHED_HRTICK */
    2567             : 
    2568             : #ifndef arch_scale_freq_tick
    2569             : static __always_inline
    2570             : void arch_scale_freq_tick(void)
    2571             : {
    2572             : }
    2573             : #endif
    2574             : 
    2575             : #ifndef arch_scale_freq_capacity
    2576             : /**
    2577             :  * arch_scale_freq_capacity - get the frequency scale factor of a given CPU.
    2578             :  * @cpu: the CPU in question.
    2579             :  *
    2580             :  * Return: the frequency scale factor normalized against SCHED_CAPACITY_SCALE, i.e.
    2581             :  *
    2582             :  *     f_curr
    2583             :  *     ------ * SCHED_CAPACITY_SCALE
    2584             :  *     f_max
    2585             :  */
    2586             : static __always_inline
    2587             : unsigned long arch_scale_freq_capacity(int cpu)
    2588             : {
    2589             :         return SCHED_CAPACITY_SCALE;
    2590             : }
    2591             : #endif
    2592             : 
    2593             : #ifdef CONFIG_SCHED_DEBUG
    2594             : /*
    2595             :  * In double_lock_balance()/double_rq_lock(), we use raw_spin_rq_lock() to
    2596             :  * acquire rq lock instead of rq_lock(). So at the end of these two functions
    2597             :  * we need to call double_rq_clock_clear_update() to clear RQCF_UPDATED of
    2598             :  * rq->clock_update_flags to avoid the WARN_DOUBLE_CLOCK warning.
    2599             :  */
    2600             : static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
    2601             : {
    2602             :         rq1->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    2603             :         /* rq1 == rq2 for !CONFIG_SMP, so just clear RQCF_UPDATED once. */
    2604             : #ifdef CONFIG_SMP
    2605             :         rq2->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
    2606             : #endif
    2607             : }
    2608             : #else
    2609             : static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
    2610             : #endif
    2611             : 
    2612             : #ifdef CONFIG_SMP
    2613             : 
    2614             : static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
    2615             : {
    2616             : #ifdef CONFIG_SCHED_CORE
    2617             :         /*
    2618             :          * In order to not have {0,2},{1,3} turn into into an AB-BA,
    2619             :          * order by core-id first and cpu-id second.
    2620             :          *
    2621             :          * Notably:
    2622             :          *
    2623             :          *      double_rq_lock(0,3); will take core-0, core-1 lock
    2624             :          *      double_rq_lock(1,2); will take core-1, core-0 lock
    2625             :          *
    2626             :          * when only cpu-id is considered.
    2627             :          */
    2628             :         if (rq1->core->cpu < rq2->core->cpu)
    2629             :                 return true;
    2630             :         if (rq1->core->cpu > rq2->core->cpu)
    2631             :                 return false;
    2632             : 
    2633             :         /*
    2634             :          * __sched_core_flip() relies on SMT having cpu-id lock order.
    2635             :          */
    2636             : #endif
    2637             :         return rq1->cpu < rq2->cpu;
    2638             : }
    2639             : 
    2640             : extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
    2641             : 
    2642             : #ifdef CONFIG_PREEMPTION
    2643             : 
    2644             : /*
    2645             :  * fair double_lock_balance: Safely acquires both rq->locks in a fair
    2646             :  * way at the expense of forcing extra atomic operations in all
    2647             :  * invocations.  This assures that the double_lock is acquired using the
    2648             :  * same underlying policy as the spinlock_t on this architecture, which
    2649             :  * reduces latency compared to the unfair variant below.  However, it
    2650             :  * also adds more overhead and therefore may reduce throughput.
    2651             :  */
    2652             : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2653             :         __releases(this_rq->lock)
    2654             :         __acquires(busiest->lock)
    2655             :         __acquires(this_rq->lock)
    2656             : {
    2657             :         raw_spin_rq_unlock(this_rq);
    2658             :         double_rq_lock(this_rq, busiest);
    2659             : 
    2660             :         return 1;
    2661             : }
    2662             : 
    2663             : #else
    2664             : /*
    2665             :  * Unfair double_lock_balance: Optimizes throughput at the expense of
    2666             :  * latency by eliminating extra atomic operations when the locks are
    2667             :  * already in proper order on entry.  This favors lower CPU-ids and will
    2668             :  * grant the double lock to lower CPUs over higher ids under contention,
    2669             :  * regardless of entry order into the function.
    2670             :  */
    2671             : static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2672             :         __releases(this_rq->lock)
    2673             :         __acquires(busiest->lock)
    2674             :         __acquires(this_rq->lock)
    2675             : {
    2676             :         if (__rq_lockp(this_rq) == __rq_lockp(busiest) ||
    2677             :             likely(raw_spin_rq_trylock(busiest))) {
    2678             :                 double_rq_clock_clear_update(this_rq, busiest);
    2679             :                 return 0;
    2680             :         }
    2681             : 
    2682             :         if (rq_order_less(this_rq, busiest)) {
    2683             :                 raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING);
    2684             :                 double_rq_clock_clear_update(this_rq, busiest);
    2685             :                 return 0;
    2686             :         }
    2687             : 
    2688             :         raw_spin_rq_unlock(this_rq);
    2689             :         double_rq_lock(this_rq, busiest);
    2690             : 
    2691             :         return 1;
    2692             : }
    2693             : 
    2694             : #endif /* CONFIG_PREEMPTION */
    2695             : 
    2696             : /*
    2697             :  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
    2698             :  */
    2699             : static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
    2700             : {
    2701             :         lockdep_assert_irqs_disabled();
    2702             : 
    2703             :         return _double_lock_balance(this_rq, busiest);
    2704             : }
    2705             : 
    2706             : static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
    2707             :         __releases(busiest->lock)
    2708             : {
    2709             :         if (__rq_lockp(this_rq) != __rq_lockp(busiest))
    2710             :                 raw_spin_rq_unlock(busiest);
    2711             :         lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_);
    2712             : }
    2713             : 
    2714             : static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
    2715             : {
    2716             :         if (l1 > l2)
    2717             :                 swap(l1, l2);
    2718             : 
    2719             :         spin_lock(l1);
    2720             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2721             : }
    2722             : 
    2723             : static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
    2724             : {
    2725             :         if (l1 > l2)
    2726             :                 swap(l1, l2);
    2727             : 
    2728             :         spin_lock_irq(l1);
    2729             :         spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2730             : }
    2731             : 
    2732             : static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
    2733             : {
    2734             :         if (l1 > l2)
    2735             :                 swap(l1, l2);
    2736             : 
    2737             :         raw_spin_lock(l1);
    2738             :         raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
    2739             : }
    2740             : 
    2741             : /*
    2742             :  * double_rq_unlock - safely unlock two runqueues
    2743             :  *
    2744             :  * Note this does not restore interrupts like task_rq_unlock,
    2745             :  * you need to do so manually after calling.
    2746             :  */
    2747             : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2748             :         __releases(rq1->lock)
    2749             :         __releases(rq2->lock)
    2750             : {
    2751             :         if (__rq_lockp(rq1) != __rq_lockp(rq2))
    2752             :                 raw_spin_rq_unlock(rq2);
    2753             :         else
    2754             :                 __release(rq2->lock);
    2755             :         raw_spin_rq_unlock(rq1);
    2756             : }
    2757             : 
    2758             : extern void set_rq_online (struct rq *rq);
    2759             : extern void set_rq_offline(struct rq *rq);
    2760             : extern bool sched_smp_initialized;
    2761             : 
    2762             : #else /* CONFIG_SMP */
    2763             : 
    2764             : /*
    2765             :  * double_rq_lock - safely lock two runqueues
    2766             :  *
    2767             :  * Note this does not disable interrupts like task_rq_lock,
    2768             :  * you need to do so manually before calling.
    2769             :  */
    2770           0 : static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
    2771             :         __acquires(rq1->lock)
    2772             :         __acquires(rq2->lock)
    2773             : {
    2774           0 :         WARN_ON_ONCE(!irqs_disabled());
    2775           0 :         WARN_ON_ONCE(rq1 != rq2);
    2776           0 :         raw_spin_rq_lock(rq1);
    2777             :         __acquire(rq2->lock);        /* Fake it out ;) */
    2778           0 :         double_rq_clock_clear_update(rq1, rq2);
    2779           0 : }
    2780             : 
    2781             : /*
    2782             :  * double_rq_unlock - safely unlock two runqueues
    2783             :  *
    2784             :  * Note this does not restore interrupts like task_rq_unlock,
    2785             :  * you need to do so manually after calling.
    2786             :  */
    2787           0 : static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
    2788             :         __releases(rq1->lock)
    2789             :         __releases(rq2->lock)
    2790             : {
    2791           0 :         WARN_ON_ONCE(rq1 != rq2);
    2792           0 :         raw_spin_rq_unlock(rq1);
    2793             :         __release(rq2->lock);
    2794           0 : }
    2795             : 
    2796             : #endif
    2797             : 
    2798             : extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
    2799             : extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
    2800             : 
    2801             : #ifdef  CONFIG_SCHED_DEBUG
    2802             : extern bool sched_debug_verbose;
    2803             : 
    2804             : extern void print_cfs_stats(struct seq_file *m, int cpu);
    2805             : extern void print_rt_stats(struct seq_file *m, int cpu);
    2806             : extern void print_dl_stats(struct seq_file *m, int cpu);
    2807             : extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
    2808             : extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
    2809             : extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
    2810             : 
    2811             : extern void resched_latency_warn(int cpu, u64 latency);
    2812             : #ifdef CONFIG_NUMA_BALANCING
    2813             : extern void
    2814             : show_numa_stats(struct task_struct *p, struct seq_file *m);
    2815             : extern void
    2816             : print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
    2817             :         unsigned long tpf, unsigned long gsf, unsigned long gpf);
    2818             : #endif /* CONFIG_NUMA_BALANCING */
    2819             : #else
    2820             : static inline void resched_latency_warn(int cpu, u64 latency) {}
    2821             : #endif /* CONFIG_SCHED_DEBUG */
    2822             : 
    2823             : extern void init_cfs_rq(struct cfs_rq *cfs_rq);
    2824             : extern void init_rt_rq(struct rt_rq *rt_rq);
    2825             : extern void init_dl_rq(struct dl_rq *dl_rq);
    2826             : 
    2827             : extern void cfs_bandwidth_usage_inc(void);
    2828             : extern void cfs_bandwidth_usage_dec(void);
    2829             : 
    2830             : #ifdef CONFIG_NO_HZ_COMMON
    2831             : #define NOHZ_BALANCE_KICK_BIT   0
    2832             : #define NOHZ_STATS_KICK_BIT     1
    2833             : #define NOHZ_NEWILB_KICK_BIT    2
    2834             : #define NOHZ_NEXT_KICK_BIT      3
    2835             : 
    2836             : /* Run rebalance_domains() */
    2837             : #define NOHZ_BALANCE_KICK       BIT(NOHZ_BALANCE_KICK_BIT)
    2838             : /* Update blocked load */
    2839             : #define NOHZ_STATS_KICK         BIT(NOHZ_STATS_KICK_BIT)
    2840             : /* Update blocked load when entering idle */
    2841             : #define NOHZ_NEWILB_KICK        BIT(NOHZ_NEWILB_KICK_BIT)
    2842             : /* Update nohz.next_balance */
    2843             : #define NOHZ_NEXT_KICK          BIT(NOHZ_NEXT_KICK_BIT)
    2844             : 
    2845             : #define NOHZ_KICK_MASK  (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK | NOHZ_NEXT_KICK)
    2846             : 
    2847             : #define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
    2848             : 
    2849             : extern void nohz_balance_exit_idle(struct rq *rq);
    2850             : #else
    2851             : static inline void nohz_balance_exit_idle(struct rq *rq) { }
    2852             : #endif
    2853             : 
    2854             : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
    2855             : extern void nohz_run_idle_balance(int cpu);
    2856             : #else
    2857             : static inline void nohz_run_idle_balance(int cpu) { }
    2858             : #endif
    2859             : 
    2860             : #ifdef CONFIG_IRQ_TIME_ACCOUNTING
    2861             : struct irqtime {
    2862             :         u64                     total;
    2863             :         u64                     tick_delta;
    2864             :         u64                     irq_start_time;
    2865             :         struct u64_stats_sync   sync;
    2866             : };
    2867             : 
    2868             : DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
    2869             : 
    2870             : /*
    2871             :  * Returns the irqtime minus the softirq time computed by ksoftirqd.
    2872             :  * Otherwise ksoftirqd's sum_exec_runtime is subtracted its own runtime
    2873             :  * and never move forward.
    2874             :  */
    2875             : static inline u64 irq_time_read(int cpu)
    2876             : {
    2877             :         struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
    2878             :         unsigned int seq;
    2879             :         u64 total;
    2880             : 
    2881             :         do {
    2882             :                 seq = __u64_stats_fetch_begin(&irqtime->sync);
    2883             :                 total = irqtime->total;
    2884             :         } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
    2885             : 
    2886             :         return total;
    2887             : }
    2888             : #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
    2889             : 
    2890             : #ifdef CONFIG_CPU_FREQ
    2891             : DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
    2892             : 
    2893             : /**
    2894             :  * cpufreq_update_util - Take a note about CPU utilization changes.
    2895             :  * @rq: Runqueue to carry out the update for.
    2896             :  * @flags: Update reason flags.
    2897             :  *
    2898             :  * This function is called by the scheduler on the CPU whose utilization is
    2899             :  * being updated.
    2900             :  *
    2901             :  * It can only be called from RCU-sched read-side critical sections.
    2902             :  *
    2903             :  * The way cpufreq is currently arranged requires it to evaluate the CPU
    2904             :  * performance state (frequency/voltage) on a regular basis to prevent it from
    2905             :  * being stuck in a completely inadequate performance level for too long.
    2906             :  * That is not guaranteed to happen if the updates are only triggered from CFS
    2907             :  * and DL, though, because they may not be coming in if only RT tasks are
    2908             :  * active all the time (or there are RT tasks only).
    2909             :  *
    2910             :  * As a workaround for that issue, this function is called periodically by the
    2911             :  * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
    2912             :  * but that really is a band-aid.  Going forward it should be replaced with
    2913             :  * solutions targeted more specifically at RT tasks.
    2914             :  */
    2915             : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
    2916             : {
    2917             :         struct update_util_data *data;
    2918             : 
    2919             :         data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
    2920             :                                                   cpu_of(rq)));
    2921             :         if (data)
    2922             :                 data->func(data, rq_clock(rq), flags);
    2923             : }
    2924             : #else
    2925             : static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
    2926             : #endif /* CONFIG_CPU_FREQ */
    2927             : 
    2928             : #ifdef arch_scale_freq_capacity
    2929             : # ifndef arch_scale_freq_invariant
    2930             : #  define arch_scale_freq_invariant()   true
    2931             : # endif
    2932             : #else
    2933             : # define arch_scale_freq_invariant()    false
    2934             : #endif
    2935             : 
    2936             : #ifdef CONFIG_SMP
    2937             : static inline unsigned long capacity_orig_of(int cpu)
    2938             : {
    2939             :         return cpu_rq(cpu)->cpu_capacity_orig;
    2940             : }
    2941             : 
    2942             : /**
    2943             :  * enum cpu_util_type - CPU utilization type
    2944             :  * @FREQUENCY_UTIL:     Utilization used to select frequency
    2945             :  * @ENERGY_UTIL:        Utilization used during energy calculation
    2946             :  *
    2947             :  * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
    2948             :  * need to be aggregated differently depending on the usage made of them. This
    2949             :  * enum is used within effective_cpu_util() to differentiate the types of
    2950             :  * utilization expected by the callers, and adjust the aggregation accordingly.
    2951             :  */
    2952             : enum cpu_util_type {
    2953             :         FREQUENCY_UTIL,
    2954             :         ENERGY_UTIL,
    2955             : };
    2956             : 
    2957             : unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
    2958             :                                  enum cpu_util_type type,
    2959             :                                  struct task_struct *p);
    2960             : 
    2961             : /*
    2962             :  * Verify the fitness of task @p to run on @cpu taking into account the
    2963             :  * CPU original capacity and the runtime/deadline ratio of the task.
    2964             :  *
    2965             :  * The function will return true if the original capacity of @cpu is
    2966             :  * greater than or equal to task's deadline density right shifted by
    2967             :  * (BW_SHIFT - SCHED_CAPACITY_SHIFT) and false otherwise.
    2968             :  */
    2969             : static inline bool dl_task_fits_capacity(struct task_struct *p, int cpu)
    2970             : {
    2971             :         unsigned long cap = arch_scale_cpu_capacity(cpu);
    2972             : 
    2973             :         return cap >= p->dl.dl_density >> (BW_SHIFT - SCHED_CAPACITY_SHIFT);
    2974             : }
    2975             : 
    2976             : static inline unsigned long cpu_bw_dl(struct rq *rq)
    2977             : {
    2978             :         return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
    2979             : }
    2980             : 
    2981             : static inline unsigned long cpu_util_dl(struct rq *rq)
    2982             : {
    2983             :         return READ_ONCE(rq->avg_dl.util_avg);
    2984             : }
    2985             : 
    2986             : 
    2987             : extern unsigned long cpu_util_cfs(int cpu);
    2988             : extern unsigned long cpu_util_cfs_boost(int cpu);
    2989             : 
    2990             : static inline unsigned long cpu_util_rt(struct rq *rq)
    2991             : {
    2992             :         return READ_ONCE(rq->avg_rt.util_avg);
    2993             : }
    2994             : #endif
    2995             : 
    2996             : #ifdef CONFIG_UCLAMP_TASK
    2997             : unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
    2998             : 
    2999             : static inline unsigned long uclamp_rq_get(struct rq *rq,
    3000             :                                           enum uclamp_id clamp_id)
    3001             : {
    3002             :         return READ_ONCE(rq->uclamp[clamp_id].value);
    3003             : }
    3004             : 
    3005             : static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
    3006             :                                  unsigned int value)
    3007             : {
    3008             :         WRITE_ONCE(rq->uclamp[clamp_id].value, value);
    3009             : }
    3010             : 
    3011             : static inline bool uclamp_rq_is_idle(struct rq *rq)
    3012             : {
    3013             :         return rq->uclamp_flags & UCLAMP_FLAG_IDLE;
    3014             : }
    3015             : 
    3016             : /**
    3017             :  * uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
    3018             :  * @rq:         The rq to clamp against. Must not be NULL.
    3019             :  * @util:       The util value to clamp.
    3020             :  * @p:          The task to clamp against. Can be NULL if you want to clamp
    3021             :  *              against @rq only.
    3022             :  *
    3023             :  * Clamps the passed @util to the max(@rq, @p) effective uclamp values.
    3024             :  *
    3025             :  * If sched_uclamp_used static key is disabled, then just return the util
    3026             :  * without any clamping since uclamp aggregation at the rq level in the fast
    3027             :  * path is disabled, rendering this operation a NOP.
    3028             :  *
    3029             :  * Use uclamp_eff_value() if you don't care about uclamp values at rq level. It
    3030             :  * will return the correct effective uclamp value of the task even if the
    3031             :  * static key is disabled.
    3032             :  */
    3033             : static __always_inline
    3034             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    3035             :                                   struct task_struct *p)
    3036             : {
    3037             :         unsigned long min_util = 0;
    3038             :         unsigned long max_util = 0;
    3039             : 
    3040             :         if (!static_branch_likely(&sched_uclamp_used))
    3041             :                 return util;
    3042             : 
    3043             :         if (p) {
    3044             :                 min_util = uclamp_eff_value(p, UCLAMP_MIN);
    3045             :                 max_util = uclamp_eff_value(p, UCLAMP_MAX);
    3046             : 
    3047             :                 /*
    3048             :                  * Ignore last runnable task's max clamp, as this task will
    3049             :                  * reset it. Similarly, no need to read the rq's min clamp.
    3050             :                  */
    3051             :                 if (uclamp_rq_is_idle(rq))
    3052             :                         goto out;
    3053             :         }
    3054             : 
    3055             :         min_util = max_t(unsigned long, min_util, uclamp_rq_get(rq, UCLAMP_MIN));
    3056             :         max_util = max_t(unsigned long, max_util, uclamp_rq_get(rq, UCLAMP_MAX));
    3057             : out:
    3058             :         /*
    3059             :          * Since CPU's {min,max}_util clamps are MAX aggregated considering
    3060             :          * RUNNABLE tasks with _different_ clamps, we can end up with an
    3061             :          * inversion. Fix it now when the clamps are applied.
    3062             :          */
    3063             :         if (unlikely(min_util >= max_util))
    3064             :                 return min_util;
    3065             : 
    3066             :         return clamp(util, min_util, max_util);
    3067             : }
    3068             : 
    3069             : /* Is the rq being capped/throttled by uclamp_max? */
    3070             : static inline bool uclamp_rq_is_capped(struct rq *rq)
    3071             : {
    3072             :         unsigned long rq_util;
    3073             :         unsigned long max_util;
    3074             : 
    3075             :         if (!static_branch_likely(&sched_uclamp_used))
    3076             :                 return false;
    3077             : 
    3078             :         rq_util = cpu_util_cfs(cpu_of(rq)) + cpu_util_rt(rq);
    3079             :         max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
    3080             : 
    3081             :         return max_util != SCHED_CAPACITY_SCALE && rq_util >= max_util;
    3082             : }
    3083             : 
    3084             : /*
    3085             :  * When uclamp is compiled in, the aggregation at rq level is 'turned off'
    3086             :  * by default in the fast path and only gets turned on once userspace performs
    3087             :  * an operation that requires it.
    3088             :  *
    3089             :  * Returns true if userspace opted-in to use uclamp and aggregation at rq level
    3090             :  * hence is active.
    3091             :  */
    3092             : static inline bool uclamp_is_used(void)
    3093             : {
    3094             :         return static_branch_likely(&sched_uclamp_used);
    3095             : }
    3096             : #else /* CONFIG_UCLAMP_TASK */
    3097             : static inline unsigned long uclamp_eff_value(struct task_struct *p,
    3098             :                                              enum uclamp_id clamp_id)
    3099             : {
    3100             :         if (clamp_id == UCLAMP_MIN)
    3101             :                 return 0;
    3102             : 
    3103             :         return SCHED_CAPACITY_SCALE;
    3104             : }
    3105             : 
    3106             : static inline
    3107             : unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
    3108             :                                   struct task_struct *p)
    3109             : {
    3110             :         return util;
    3111             : }
    3112             : 
    3113             : static inline bool uclamp_rq_is_capped(struct rq *rq) { return false; }
    3114             : 
    3115             : static inline bool uclamp_is_used(void)
    3116             : {
    3117             :         return false;
    3118             : }
    3119             : 
    3120             : static inline unsigned long uclamp_rq_get(struct rq *rq,
    3121             :                                           enum uclamp_id clamp_id)
    3122             : {
    3123             :         if (clamp_id == UCLAMP_MIN)
    3124             :                 return 0;
    3125             : 
    3126             :         return SCHED_CAPACITY_SCALE;
    3127             : }
    3128             : 
    3129             : static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
    3130             :                                  unsigned int value)
    3131             : {
    3132             : }
    3133             : 
    3134             : static inline bool uclamp_rq_is_idle(struct rq *rq)
    3135             : {
    3136             :         return false;
    3137             : }
    3138             : #endif /* CONFIG_UCLAMP_TASK */
    3139             : 
    3140             : #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
    3141             : static inline unsigned long cpu_util_irq(struct rq *rq)
    3142             : {
    3143             :         return rq->avg_irq.util_avg;
    3144             : }
    3145             : 
    3146             : static inline
    3147             : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    3148             : {
    3149             :         util *= (max - irq);
    3150             :         util /= max;
    3151             : 
    3152             :         return util;
    3153             : 
    3154             : }
    3155             : #else
    3156             : static inline unsigned long cpu_util_irq(struct rq *rq)
    3157             : {
    3158             :         return 0;
    3159             : }
    3160             : 
    3161             : static inline
    3162             : unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
    3163             : {
    3164             :         return util;
    3165             : }
    3166             : #endif
    3167             : 
    3168             : #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
    3169             : 
    3170             : #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
    3171             : 
    3172             : DECLARE_STATIC_KEY_FALSE(sched_energy_present);
    3173             : 
    3174             : static inline bool sched_energy_enabled(void)
    3175             : {
    3176             :         return static_branch_unlikely(&sched_energy_present);
    3177             : }
    3178             : 
    3179             : #else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
    3180             : 
    3181             : #define perf_domain_span(pd) NULL
    3182             : static inline bool sched_energy_enabled(void) { return false; }
    3183             : 
    3184             : #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
    3185             : 
    3186             : #ifdef CONFIG_MEMBARRIER
    3187             : /*
    3188             :  * The scheduler provides memory barriers required by membarrier between:
    3189             :  * - prior user-space memory accesses and store to rq->membarrier_state,
    3190             :  * - store to rq->membarrier_state and following user-space memory accesses.
    3191             :  * In the same way it provides those guarantees around store to rq->curr.
    3192             :  */
    3193             : static inline void membarrier_switch_mm(struct rq *rq,
    3194             :                                         struct mm_struct *prev_mm,
    3195             :                                         struct mm_struct *next_mm)
    3196             : {
    3197             :         int membarrier_state;
    3198             : 
    3199           0 :         if (prev_mm == next_mm)
    3200             :                 return;
    3201             : 
    3202           0 :         membarrier_state = atomic_read(&next_mm->membarrier_state);
    3203           0 :         if (READ_ONCE(rq->membarrier_state) == membarrier_state)
    3204             :                 return;
    3205             : 
    3206           0 :         WRITE_ONCE(rq->membarrier_state, membarrier_state);
    3207             : }
    3208             : #else
    3209             : static inline void membarrier_switch_mm(struct rq *rq,
    3210             :                                         struct mm_struct *prev_mm,
    3211             :                                         struct mm_struct *next_mm)
    3212             : {
    3213             : }
    3214             : #endif
    3215             : 
    3216             : #ifdef CONFIG_SMP
    3217             : static inline bool is_per_cpu_kthread(struct task_struct *p)
    3218             : {
    3219             :         if (!(p->flags & PF_KTHREAD))
    3220             :                 return false;
    3221             : 
    3222             :         if (p->nr_cpus_allowed != 1)
    3223             :                 return false;
    3224             : 
    3225             :         return true;
    3226             : }
    3227             : #endif
    3228             : 
    3229             : extern void swake_up_all_locked(struct swait_queue_head *q);
    3230             : extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
    3231             : 
    3232             : #ifdef CONFIG_PREEMPT_DYNAMIC
    3233             : extern int preempt_dynamic_mode;
    3234             : extern int sched_dynamic_mode(const char *str);
    3235             : extern void sched_dynamic_update(int mode);
    3236             : #endif
    3237             : 
    3238             : static inline void update_current_exec_runtime(struct task_struct *curr,
    3239             :                                                 u64 now, u64 delta_exec)
    3240             : {
    3241           0 :         curr->se.sum_exec_runtime += delta_exec;
    3242           0 :         account_group_exec_runtime(curr, delta_exec);
    3243             : 
    3244           0 :         curr->se.exec_start = now;
    3245           0 :         cgroup_account_cputime(curr, delta_exec);
    3246             : }
    3247             : 
    3248             : #ifdef CONFIG_SCHED_MM_CID
    3249             : 
    3250             : #define SCHED_MM_CID_PERIOD_NS  (100ULL * 1000000)      /* 100ms */
    3251             : #define MM_CID_SCAN_DELAY       100                     /* 100ms */
    3252             : 
    3253             : extern raw_spinlock_t cid_lock;
    3254             : extern int use_cid_lock;
    3255             : 
    3256             : extern void sched_mm_cid_migrate_from(struct task_struct *t);
    3257             : extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t);
    3258             : extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
    3259             : extern void init_sched_mm_cid(struct task_struct *t);
    3260             : 
    3261             : static inline void __mm_cid_put(struct mm_struct *mm, int cid)
    3262             : {
    3263             :         if (cid < 0)
    3264             :                 return;
    3265             :         cpumask_clear_cpu(cid, mm_cidmask(mm));
    3266             : }
    3267             : 
    3268             : /*
    3269             :  * The per-mm/cpu cid can have the MM_CID_LAZY_PUT flag set or transition to
    3270             :  * the MM_CID_UNSET state without holding the rq lock, but the rq lock needs to
    3271             :  * be held to transition to other states.
    3272             :  *
    3273             :  * State transitions synchronized with cmpxchg or try_cmpxchg need to be
    3274             :  * consistent across cpus, which prevents use of this_cpu_cmpxchg.
    3275             :  */
    3276             : static inline void mm_cid_put_lazy(struct task_struct *t)
    3277             : {
    3278             :         struct mm_struct *mm = t->mm;
    3279             :         struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
    3280             :         int cid;
    3281             : 
    3282             :         lockdep_assert_irqs_disabled();
    3283             :         cid = __this_cpu_read(pcpu_cid->cid);
    3284             :         if (!mm_cid_is_lazy_put(cid) ||
    3285             :             !try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
    3286             :                 return;
    3287             :         __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
    3288             : }
    3289             : 
    3290             : static inline int mm_cid_pcpu_unset(struct mm_struct *mm)
    3291             : {
    3292             :         struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
    3293             :         int cid, res;
    3294             : 
    3295             :         lockdep_assert_irqs_disabled();
    3296             :         cid = __this_cpu_read(pcpu_cid->cid);
    3297             :         for (;;) {
    3298             :                 if (mm_cid_is_unset(cid))
    3299             :                         return MM_CID_UNSET;
    3300             :                 /*
    3301             :                  * Attempt transition from valid or lazy-put to unset.
    3302             :                  */
    3303             :                 res = cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, cid, MM_CID_UNSET);
    3304             :                 if (res == cid)
    3305             :                         break;
    3306             :                 cid = res;
    3307             :         }
    3308             :         return cid;
    3309             : }
    3310             : 
    3311             : static inline void mm_cid_put(struct mm_struct *mm)
    3312             : {
    3313             :         int cid;
    3314             : 
    3315             :         lockdep_assert_irqs_disabled();
    3316             :         cid = mm_cid_pcpu_unset(mm);
    3317             :         if (cid == MM_CID_UNSET)
    3318             :                 return;
    3319             :         __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
    3320             : }
    3321             : 
    3322             : static inline int __mm_cid_try_get(struct mm_struct *mm)
    3323             : {
    3324             :         struct cpumask *cpumask;
    3325             :         int cid;
    3326             : 
    3327             :         cpumask = mm_cidmask(mm);
    3328             :         /*
    3329             :          * Retry finding first zero bit if the mask is temporarily
    3330             :          * filled. This only happens during concurrent remote-clear
    3331             :          * which owns a cid without holding a rq lock.
    3332             :          */
    3333             :         for (;;) {
    3334             :                 cid = cpumask_first_zero(cpumask);
    3335             :                 if (cid < nr_cpu_ids)
    3336             :                         break;
    3337             :                 cpu_relax();
    3338             :         }
    3339             :         if (cpumask_test_and_set_cpu(cid, cpumask))
    3340             :                 return -1;
    3341             :         return cid;
    3342             : }
    3343             : 
    3344             : /*
    3345             :  * Save a snapshot of the current runqueue time of this cpu
    3346             :  * with the per-cpu cid value, allowing to estimate how recently it was used.
    3347             :  */
    3348             : static inline void mm_cid_snapshot_time(struct rq *rq, struct mm_struct *mm)
    3349             : {
    3350             :         struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(rq));
    3351             : 
    3352             :         lockdep_assert_rq_held(rq);
    3353             :         WRITE_ONCE(pcpu_cid->time, rq->clock);
    3354             : }
    3355             : 
    3356             : static inline int __mm_cid_get(struct rq *rq, struct mm_struct *mm)
    3357             : {
    3358             :         int cid;
    3359             : 
    3360             :         /*
    3361             :          * All allocations (even those using the cid_lock) are lock-free. If
    3362             :          * use_cid_lock is set, hold the cid_lock to perform cid allocation to
    3363             :          * guarantee forward progress.
    3364             :          */
    3365             :         if (!READ_ONCE(use_cid_lock)) {
    3366             :                 cid = __mm_cid_try_get(mm);
    3367             :                 if (cid >= 0)
    3368             :                         goto end;
    3369             :                 raw_spin_lock(&cid_lock);
    3370             :         } else {
    3371             :                 raw_spin_lock(&cid_lock);
    3372             :                 cid = __mm_cid_try_get(mm);
    3373             :                 if (cid >= 0)
    3374             :                         goto unlock;
    3375             :         }
    3376             : 
    3377             :         /*
    3378             :          * cid concurrently allocated. Retry while forcing following
    3379             :          * allocations to use the cid_lock to ensure forward progress.
    3380             :          */
    3381             :         WRITE_ONCE(use_cid_lock, 1);
    3382             :         /*
    3383             :          * Set use_cid_lock before allocation. Only care about program order
    3384             :          * because this is only required for forward progress.
    3385             :          */
    3386             :         barrier();
    3387             :         /*
    3388             :          * Retry until it succeeds. It is guaranteed to eventually succeed once
    3389             :          * all newcoming allocations observe the use_cid_lock flag set.
    3390             :          */
    3391             :         do {
    3392             :                 cid = __mm_cid_try_get(mm);
    3393             :                 cpu_relax();
    3394             :         } while (cid < 0);
    3395             :         /*
    3396             :          * Allocate before clearing use_cid_lock. Only care about
    3397             :          * program order because this is for forward progress.
    3398             :          */
    3399             :         barrier();
    3400             :         WRITE_ONCE(use_cid_lock, 0);
    3401             : unlock:
    3402             :         raw_spin_unlock(&cid_lock);
    3403             : end:
    3404             :         mm_cid_snapshot_time(rq, mm);
    3405             :         return cid;
    3406             : }
    3407             : 
    3408             : static inline int mm_cid_get(struct rq *rq, struct mm_struct *mm)
    3409             : {
    3410             :         struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid;
    3411             :         struct cpumask *cpumask;
    3412             :         int cid;
    3413             : 
    3414             :         lockdep_assert_rq_held(rq);
    3415             :         cpumask = mm_cidmask(mm);
    3416             :         cid = __this_cpu_read(pcpu_cid->cid);
    3417             :         if (mm_cid_is_valid(cid)) {
    3418             :                 mm_cid_snapshot_time(rq, mm);
    3419             :                 return cid;
    3420             :         }
    3421             :         if (mm_cid_is_lazy_put(cid)) {
    3422             :                 if (try_cmpxchg(&this_cpu_ptr(pcpu_cid)->cid, &cid, MM_CID_UNSET))
    3423             :                         __mm_cid_put(mm, mm_cid_clear_lazy_put(cid));
    3424             :         }
    3425             :         cid = __mm_cid_get(rq, mm);
    3426             :         __this_cpu_write(pcpu_cid->cid, cid);
    3427             :         return cid;
    3428             : }
    3429             : 
    3430             : static inline void switch_mm_cid(struct rq *rq,
    3431             :                                  struct task_struct *prev,
    3432             :                                  struct task_struct *next)
    3433             : {
    3434             :         /*
    3435             :          * Provide a memory barrier between rq->curr store and load of
    3436             :          * {prev,next}->mm->pcpu_cid[cpu] on rq->curr->mm transition.
    3437             :          *
    3438             :          * Should be adapted if context_switch() is modified.
    3439             :          */
    3440             :         if (!next->mm) {                                // to kernel
    3441             :                 /*
    3442             :                  * user -> kernel transition does not guarantee a barrier, but
    3443             :                  * we can use the fact that it performs an atomic operation in
    3444             :                  * mmgrab().
    3445             :                  */
    3446             :                 if (prev->mm)                           // from user
    3447             :                         smp_mb__after_mmgrab();
    3448             :                 /*
    3449             :                  * kernel -> kernel transition does not change rq->curr->mm
    3450             :                  * state. It stays NULL.
    3451             :                  */
    3452             :         } else {                                        // to user
    3453             :                 /*
    3454             :                  * kernel -> user transition does not provide a barrier
    3455             :                  * between rq->curr store and load of {prev,next}->mm->pcpu_cid[cpu].
    3456             :                  * Provide it here.
    3457             :                  */
    3458             :                 if (!prev->mm)                          // from kernel
    3459             :                         smp_mb();
    3460             :                 /*
    3461             :                  * user -> user transition guarantees a memory barrier through
    3462             :                  * switch_mm() when current->mm changes. If current->mm is
    3463             :                  * unchanged, no barrier is needed.
    3464             :                  */
    3465             :         }
    3466             :         if (prev->mm_cid_active) {
    3467             :                 mm_cid_snapshot_time(rq, prev->mm);
    3468             :                 mm_cid_put_lazy(prev);
    3469             :                 prev->mm_cid = -1;
    3470             :         }
    3471             :         if (next->mm_cid_active)
    3472             :                 next->last_mm_cid = next->mm_cid = mm_cid_get(rq, next->mm);
    3473             : }
    3474             : 
    3475             : #else
    3476             : static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
    3477             : static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
    3478             : static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
    3479             : static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
    3480             : static inline void init_sched_mm_cid(struct task_struct *t) { }
    3481             : #endif
    3482             : 
    3483             : #endif /* _KERNEL_SCHED_SCHED_H */

Generated by: LCOV version 1.14