LCOV - coverage.info - kernel/sched/deadline.c

LCOV - code coverage report

Current view:	top level - kernel/sched - deadline.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	45	540	8.3 %
Date:	2023-04-06 08:38:28	Functions:	7	50	14.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Deadline Scheduling Class (SCHED_DEADLINE)
       4             :  *
       5             :  * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
       6             :  *
       7             :  * Tasks that periodically executes their instances for less than their
       8             :  * runtime won't miss any of their deadlines.
       9             :  * Tasks that are not periodic or sporadic or that tries to execute more
      10             :  * than their reserved bandwidth will be slowed down (and may potentially
      11             :  * miss some of their deadlines), and won't affect any other task.
      12             :  *
      13             :  * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
      14             :  *                    Juri Lelli <juri.lelli@gmail.com>,
      15             :  *                    Michael Trimarchi <michael@amarulasolutions.com>,
      16             :  *                    Fabio Checconi <fchecconi@gmail.com>
      17             :  */
      18             : 
      19             : /*
      20             :  * Default limits for DL period; on the top end we guard against small util
      21             :  * tasks still getting ridiculously long effective runtimes, on the bottom end we
      22             :  * guard against timer DoS.
      23             :  */
      24             : static unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
      25             : static unsigned int sysctl_sched_dl_period_min = 100;     /* 100 us */
      26             : #ifdef CONFIG_SYSCTL
      27             : static struct ctl_table sched_dl_sysctls[] = {
      28             :         {
      29             :                 .procname       = "sched_deadline_period_max_us",
      30             :                 .data           = &sysctl_sched_dl_period_max,
      31             :                 .maxlen         = sizeof(unsigned int),
      32             :                 .mode           = 0644,
      33             :                 .proc_handler   = proc_douintvec_minmax,
      34             :                 .extra1         = (void *)&sysctl_sched_dl_period_min,
      35             :         },
      36             :         {
      37             :                 .procname       = "sched_deadline_period_min_us",
      38             :                 .data           = &sysctl_sched_dl_period_min,
      39             :                 .maxlen         = sizeof(unsigned int),
      40             :                 .mode           = 0644,
      41             :                 .proc_handler   = proc_douintvec_minmax,
      42             :                 .extra2         = (void *)&sysctl_sched_dl_period_max,
      43             :         },
      44             :         {}
      45             : };
      46             : 
      47           1 : static int __init sched_dl_sysctl_init(void)
      48             : {
      49           1 :         register_sysctl_init("kernel", sched_dl_sysctls);
      50           1 :         return 0;
      51             : }
      52             : late_initcall(sched_dl_sysctl_init);
      53             : #endif
      54             : 
      55             : static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
      56             : {
      57           0 :         return container_of(dl_se, struct task_struct, dl);
      58             : }
      59             : 
      60             : static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
      61             : {
      62           0 :         return container_of(dl_rq, struct rq, dl);
      63             : }
      64             : 
      65             : static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
      66             : {
      67           0 :         struct task_struct *p = dl_task_of(dl_se);
      68           0 :         struct rq *rq = task_rq(p);
      69             : 
      70             :         return &rq->dl;
      71             : }
      72             : 
      73             : static inline int on_dl_rq(struct sched_dl_entity *dl_se)
      74             : {
      75           0 :         return !RB_EMPTY_NODE(&dl_se->rb_node);
      76             : }
      77             : 
      78             : #ifdef CONFIG_RT_MUTEXES
      79             : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
      80             : {
      81             :         return dl_se->pi_se;
      82             : }
      83             : 
      84             : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
      85             : {
      86           0 :         return pi_of(dl_se) != dl_se;
      87             : }
      88             : #else
      89             : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
      90             : {
      91             :         return dl_se;
      92             : }
      93             : 
      94             : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
      95             : {
      96             :         return false;
      97             : }
      98             : #endif
      99             : 
     100             : #ifdef CONFIG_SMP
     101             : static inline struct dl_bw *dl_bw_of(int i)
     102             : {
     103             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     104             :                          "sched RCU must be held");
     105             :         return &cpu_rq(i)->rd->dl_bw;
     106             : }
     107             : 
     108             : static inline int dl_bw_cpus(int i)
     109             : {
     110             :         struct root_domain *rd = cpu_rq(i)->rd;
     111             :         int cpus;
     112             : 
     113             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     114             :                          "sched RCU must be held");
     115             : 
     116             :         if (cpumask_subset(rd->span, cpu_active_mask))
     117             :                 return cpumask_weight(rd->span);
     118             : 
     119             :         cpus = 0;
     120             : 
     121             :         for_each_cpu_and(i, rd->span, cpu_active_mask)
     122             :                 cpus++;
     123             : 
     124             :         return cpus;
     125             : }
     126             : 
     127             : static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
     128             : {
     129             :         unsigned long cap = 0;
     130             :         int i;
     131             : 
     132             :         for_each_cpu_and(i, mask, cpu_active_mask)
     133             :                 cap += capacity_orig_of(i);
     134             : 
     135             :         return cap;
     136             : }
     137             : 
     138             : /*
     139             :  * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
     140             :  * of the CPU the task is running on rather rd's \Sum CPU capacity.
     141             :  */
     142             : static inline unsigned long dl_bw_capacity(int i)
     143             : {
     144             :         if (!sched_asym_cpucap_active() &&
     145             :             capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
     146             :                 return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
     147             :         } else {
     148             :                 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     149             :                                  "sched RCU must be held");
     150             : 
     151             :                 return __dl_bw_capacity(cpu_rq(i)->rd->span);
     152             :         }
     153             : }
     154             : 
     155             : static inline bool dl_bw_visited(int cpu, u64 gen)
     156             : {
     157             :         struct root_domain *rd = cpu_rq(cpu)->rd;
     158             : 
     159             :         if (rd->visit_gen == gen)
     160             :                 return true;
     161             : 
     162             :         rd->visit_gen = gen;
     163             :         return false;
     164             : }
     165             : 
     166             : static inline
     167             : void __dl_update(struct dl_bw *dl_b, s64 bw)
     168             : {
     169             :         struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
     170             :         int i;
     171             : 
     172             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     173             :                          "sched RCU must be held");
     174             :         for_each_cpu_and(i, rd->span, cpu_active_mask) {
     175             :                 struct rq *rq = cpu_rq(i);
     176             : 
     177             :                 rq->dl.extra_bw += bw;
     178             :         }
     179             : }
     180             : #else
     181             : static inline struct dl_bw *dl_bw_of(int i)
     182             : {
     183           0 :         return &cpu_rq(i)->dl.dl_bw;
     184             : }
     185             : 
     186             : static inline int dl_bw_cpus(int i)
     187             : {
     188             :         return 1;
     189             : }
     190             : 
     191             : static inline unsigned long dl_bw_capacity(int i)
     192             : {
     193             :         return SCHED_CAPACITY_SCALE;
     194             : }
     195             : 
     196             : static inline bool dl_bw_visited(int cpu, u64 gen)
     197             : {
     198             :         return false;
     199             : }
     200             : 
     201             : static inline
     202             : void __dl_update(struct dl_bw *dl_b, s64 bw)
     203             : {
     204           0 :         struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
     205             : 
     206           0 :         dl->extra_bw += bw;
     207             : }
     208             : #endif
     209             : 
     210             : static inline
     211             : void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     212             : {
     213           0 :         dl_b->total_bw -= tsk_bw;
     214           0 :         __dl_update(dl_b, (s32)tsk_bw / cpus);
     215             : }
     216             : 
     217             : static inline
     218             : void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     219             : {
     220           0 :         dl_b->total_bw += tsk_bw;
     221           0 :         __dl_update(dl_b, -((s32)tsk_bw / cpus));
     222             : }
     223             : 
     224             : static inline bool
     225             : __dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw)
     226             : {
     227           0 :         return dl_b->bw != -1 &&
     228           0 :                cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
     229             : }
     230             : 
     231             : static inline
     232           0 : void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
     233             : {
     234           0 :         u64 old = dl_rq->running_bw;
     235             : 
     236           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     237           0 :         dl_rq->running_bw += dl_bw;
     238           0 :         SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
     239           0 :         SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
     240             :         /* kick cpufreq (see the comment in kernel/sched/sched.h). */
     241           0 :         cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
     242           0 : }
     243             : 
     244             : static inline
     245           0 : void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
     246             : {
     247           0 :         u64 old = dl_rq->running_bw;
     248             : 
     249           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     250           0 :         dl_rq->running_bw -= dl_bw;
     251           0 :         SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
     252           0 :         if (dl_rq->running_bw > old)
     253           0 :                 dl_rq->running_bw = 0;
     254             :         /* kick cpufreq (see the comment in kernel/sched/sched.h). */
     255           0 :         cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
     256           0 : }
     257             : 
     258             : static inline
     259           0 : void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
     260             : {
     261           0 :         u64 old = dl_rq->this_bw;
     262             : 
     263           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     264           0 :         dl_rq->this_bw += dl_bw;
     265           0 :         SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
     266           0 : }
     267             : 
     268             : static inline
     269           0 : void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
     270             : {
     271           0 :         u64 old = dl_rq->this_bw;
     272             : 
     273           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     274           0 :         dl_rq->this_bw -= dl_bw;
     275           0 :         SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
     276           0 :         if (dl_rq->this_bw > old)
     277           0 :                 dl_rq->this_bw = 0;
     278           0 :         SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
     279           0 : }
     280             : 
     281             : static inline
     282             : void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     283             : {
     284           0 :         if (!dl_entity_is_special(dl_se))
     285           0 :                 __add_rq_bw(dl_se->dl_bw, dl_rq);
     286             : }
     287             : 
     288             : static inline
     289             : void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     290             : {
     291           0 :         if (!dl_entity_is_special(dl_se))
     292           0 :                 __sub_rq_bw(dl_se->dl_bw, dl_rq);
     293             : }
     294             : 
     295             : static inline
     296             : void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     297             : {
     298           0 :         if (!dl_entity_is_special(dl_se))
     299           0 :                 __add_running_bw(dl_se->dl_bw, dl_rq);
     300             : }
     301             : 
     302             : static inline
     303             : void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     304             : {
     305           0 :         if (!dl_entity_is_special(dl_se))
     306           0 :                 __sub_running_bw(dl_se->dl_bw, dl_rq);
     307             : }
     308             : 
     309           0 : static void dl_change_utilization(struct task_struct *p, u64 new_bw)
     310             : {
     311             :         struct rq *rq;
     312             : 
     313           0 :         WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
     314             : 
     315           0 :         if (task_on_rq_queued(p))
     316             :                 return;
     317             : 
     318           0 :         rq = task_rq(p);
     319           0 :         if (p->dl.dl_non_contending) {
     320           0 :                 sub_running_bw(&p->dl, &rq->dl);
     321           0 :                 p->dl.dl_non_contending = 0;
     322             :                 /*
     323             :                  * If the timer handler is currently running and the
     324             :                  * timer cannot be canceled, inactive_task_timer()
     325             :                  * will see that dl_not_contending is not set, and
     326             :                  * will not touch the rq's active utilization,
     327             :                  * so we are still safe.
     328             :                  */
     329           0 :                 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
     330           0 :                         put_task_struct(p);
     331             :         }
     332           0 :         __sub_rq_bw(p->dl.dl_bw, &rq->dl);
     333           0 :         __add_rq_bw(new_bw, &rq->dl);
     334             : }
     335             : 
     336             : /*
     337             :  * The utilization of a task cannot be immediately removed from
     338             :  * the rq active utilization (running_bw) when the task blocks.
     339             :  * Instead, we have to wait for the so called "0-lag time".
     340             :  *
     341             :  * If a task blocks before the "0-lag time", a timer (the inactive
     342             :  * timer) is armed, and running_bw is decreased when the timer
     343             :  * fires.
     344             :  *
     345             :  * If the task wakes up again before the inactive timer fires,
     346             :  * the timer is canceled, whereas if the task wakes up after the
     347             :  * inactive timer fired (and running_bw has been decreased) the
     348             :  * task's utilization has to be added to running_bw again.
     349             :  * A flag in the deadline scheduling entity (dl_non_contending)
     350             :  * is used to avoid race conditions between the inactive timer handler
     351             :  * and task wakeups.
     352             :  *
     353             :  * The following diagram shows how running_bw is updated. A task is
     354             :  * "ACTIVE" when its utilization contributes to running_bw; an
     355             :  * "ACTIVE contending" task is in the TASK_RUNNING state, while an
     356             :  * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
     357             :  * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
     358             :  * time already passed, which does not contribute to running_bw anymore.
     359             :  *                              +------------------+
     360             :  *             wakeup           |    ACTIVE        |
     361             :  *          +------------------>+   contending     |
     362             :  *          | add_running_bw    |                  |
     363             :  *          |                   +----+------+------+
     364             :  *          |                        |      ^
     365             :  *          |                dequeue |      |
     366             :  * +--------+-------+                |      |
     367             :  * |                |   t >= 0-lag   |      | wakeup
     368             :  * |    INACTIVE    |<---------------+      |
     369             :  * |                | sub_running_bw |      |
     370             :  * +--------+-------+                |      |
     371             :  *          ^                        |      |
     372             :  *          |              t < 0-lag |      |
     373             :  *          |                        |      |
     374             :  *          |                        V      |
     375             :  *          |                   +----+------+------+
     376             :  *          | sub_running_bw    |    ACTIVE        |
     377             :  *          +-------------------+                  |
     378             :  *            inactive timer    |  non contending  |
     379             :  *            fired             +------------------+
     380             :  *
     381             :  * The task_non_contending() function is invoked when a task
     382             :  * blocks, and checks if the 0-lag time already passed or
     383             :  * not (in the first case, it directly updates running_bw;
     384             :  * in the second case, it arms the inactive timer).
     385             :  *
     386             :  * The task_contending() function is invoked when a task wakes
     387             :  * up, and checks if the task is still in the "ACTIVE non contending"
     388             :  * state or not (in the second case, it updates running_bw).
     389             :  */
     390           0 : static void task_non_contending(struct task_struct *p)
     391             : {
     392           0 :         struct sched_dl_entity *dl_se = &p->dl;
     393           0 :         struct hrtimer *timer = &dl_se->inactive_timer;
     394           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     395           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     396             :         s64 zerolag_time;
     397             : 
     398             :         /*
     399             :          * If this is a non-deadline task that has been boosted,
     400             :          * do nothing
     401             :          */
     402           0 :         if (dl_se->dl_runtime == 0)
     403             :                 return;
     404             : 
     405           0 :         if (dl_entity_is_special(dl_se))
     406             :                 return;
     407             : 
     408           0 :         WARN_ON(dl_se->dl_non_contending);
     409             : 
     410           0 :         zerolag_time = dl_se->deadline -
     411           0 :                  div64_long((dl_se->runtime * dl_se->dl_period),
     412             :                         dl_se->dl_runtime);
     413             : 
     414             :         /*
     415             :          * Using relative times instead of the absolute "0-lag time"
     416             :          * allows to simplify the code
     417             :          */
     418           0 :         zerolag_time -= rq_clock(rq);
     419             : 
     420             :         /*
     421             :          * If the "0-lag time" already passed, decrease the active
     422             :          * utilization now, instead of starting a timer
     423             :          */
     424           0 :         if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
     425           0 :                 if (dl_task(p))
     426           0 :                         sub_running_bw(dl_se, dl_rq);
     427           0 :                 if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
     428           0 :                         struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
     429             : 
     430           0 :                         if (READ_ONCE(p->__state) == TASK_DEAD)
     431           0 :                                 sub_rq_bw(&p->dl, &rq->dl);
     432           0 :                         raw_spin_lock(&dl_b->lock);
     433           0 :                         __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
     434           0 :                         raw_spin_unlock(&dl_b->lock);
     435             :                         __dl_clear_params(p);
     436             :                 }
     437             : 
     438             :                 return;
     439             :         }
     440             : 
     441           0 :         dl_se->dl_non_contending = 1;
     442           0 :         get_task_struct(p);
     443           0 :         hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
     444             : }
     445             : 
     446           0 : static void task_contending(struct sched_dl_entity *dl_se, int flags)
     447             : {
     448           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     449             : 
     450             :         /*
     451             :          * If this is a non-deadline task that has been boosted,
     452             :          * do nothing
     453             :          */
     454           0 :         if (dl_se->dl_runtime == 0)
     455             :                 return;
     456             : 
     457             :         if (flags & ENQUEUE_MIGRATED)
     458             :                 add_rq_bw(dl_se, dl_rq);
     459             : 
     460           0 :         if (dl_se->dl_non_contending) {
     461           0 :                 dl_se->dl_non_contending = 0;
     462             :                 /*
     463             :                  * If the timer handler is currently running and the
     464             :                  * timer cannot be canceled, inactive_task_timer()
     465             :                  * will see that dl_not_contending is not set, and
     466             :                  * will not touch the rq's active utilization,
     467             :                  * so we are still safe.
     468             :                  */
     469           0 :                 if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
     470           0 :                         put_task_struct(dl_task_of(dl_se));
     471             :         } else {
     472             :                 /*
     473             :                  * Since "dl_non_contending" is not set, the
     474             :                  * task's utilization has already been removed from
     475             :                  * active utilization (either when the task blocked,
     476             :                  * when the "inactive timer" fired).
     477             :                  * So, add it back.
     478             :                  */
     479           0 :                 add_running_bw(dl_se, dl_rq);
     480             :         }
     481             : }
     482             : 
     483             : static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
     484             : {
     485           0 :         struct sched_dl_entity *dl_se = &p->dl;
     486             : 
     487             :         return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
     488             : }
     489             : 
     490             : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
     491             : 
     492           0 : void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
     493             : {
     494             :         raw_spin_lock_init(&dl_b->dl_runtime_lock);
     495           0 :         dl_b->dl_period = period;
     496           0 :         dl_b->dl_runtime = runtime;
     497           0 : }
     498             : 
     499           1 : void init_dl_bw(struct dl_bw *dl_b)
     500             : {
     501             :         raw_spin_lock_init(&dl_b->lock);
     502           1 :         if (global_rt_runtime() == RUNTIME_INF)
     503           0 :                 dl_b->bw = -1;
     504             :         else
     505           1 :                 dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
     506           1 :         dl_b->total_bw = 0;
     507           1 : }
     508             : 
     509           1 : void init_dl_rq(struct dl_rq *dl_rq)
     510             : {
     511           1 :         dl_rq->root = RB_ROOT_CACHED;
     512             : 
     513             : #ifdef CONFIG_SMP
     514             :         /* zero means no -deadline tasks */
     515             :         dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
     516             : 
     517             :         dl_rq->dl_nr_migratory = 0;
     518             :         dl_rq->overloaded = 0;
     519             :         dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
     520             : #else
     521           1 :         init_dl_bw(&dl_rq->dl_bw);
     522             : #endif
     523             : 
     524           1 :         dl_rq->running_bw = 0;
     525           1 :         dl_rq->this_bw = 0;
     526           1 :         init_dl_rq_bw_ratio(dl_rq);
     527           1 : }
     528             : 
     529             : #ifdef CONFIG_SMP
     530             : 
     531             : static inline int dl_overloaded(struct rq *rq)
     532             : {
     533             :         return atomic_read(&rq->rd->dlo_count);
     534             : }
     535             : 
     536             : static inline void dl_set_overload(struct rq *rq)
     537             : {
     538             :         if (!rq->online)
     539             :                 return;
     540             : 
     541             :         cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
     542             :         /*
     543             :          * Must be visible before the overload count is
     544             :          * set (as in sched_rt.c).
     545             :          *
     546             :          * Matched by the barrier in pull_dl_task().
     547             :          */
     548             :         smp_wmb();
     549             :         atomic_inc(&rq->rd->dlo_count);
     550             : }
     551             : 
     552             : static inline void dl_clear_overload(struct rq *rq)
     553             : {
     554             :         if (!rq->online)
     555             :                 return;
     556             : 
     557             :         atomic_dec(&rq->rd->dlo_count);
     558             :         cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
     559             : }
     560             : 
     561             : static void update_dl_migration(struct dl_rq *dl_rq)
     562             : {
     563             :         if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
     564             :                 if (!dl_rq->overloaded) {
     565             :                         dl_set_overload(rq_of_dl_rq(dl_rq));
     566             :                         dl_rq->overloaded = 1;
     567             :                 }
     568             :         } else if (dl_rq->overloaded) {
     569             :                 dl_clear_overload(rq_of_dl_rq(dl_rq));
     570             :                 dl_rq->overloaded = 0;
     571             :         }
     572             : }
     573             : 
     574             : static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     575             : {
     576             :         struct task_struct *p = dl_task_of(dl_se);
     577             : 
     578             :         if (p->nr_cpus_allowed > 1)
     579             :                 dl_rq->dl_nr_migratory++;
     580             : 
     581             :         update_dl_migration(dl_rq);
     582             : }
     583             : 
     584             : static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     585             : {
     586             :         struct task_struct *p = dl_task_of(dl_se);
     587             : 
     588             :         if (p->nr_cpus_allowed > 1)
     589             :                 dl_rq->dl_nr_migratory--;
     590             : 
     591             :         update_dl_migration(dl_rq);
     592             : }
     593             : 
     594             : #define __node_2_pdl(node) \
     595             :         rb_entry((node), struct task_struct, pushable_dl_tasks)
     596             : 
     597             : static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b)
     598             : {
     599             :         return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl);
     600             : }
     601             : 
     602             : /*
     603             :  * The list of pushable -deadline task is not a plist, like in
     604             :  * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
     605             :  */
     606             : static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     607             : {
     608             :         struct rb_node *leftmost;
     609             : 
     610             :         WARN_ON_ONCE(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
     611             : 
     612             :         leftmost = rb_add_cached(&p->pushable_dl_tasks,
     613             :                                  &rq->dl.pushable_dl_tasks_root,
     614             :                                  __pushable_less);
     615             :         if (leftmost)
     616             :                 rq->dl.earliest_dl.next = p->dl.deadline;
     617             : }
     618             : 
     619             : static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     620             : {
     621             :         struct dl_rq *dl_rq = &rq->dl;
     622             :         struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
     623             :         struct rb_node *leftmost;
     624             : 
     625             :         if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
     626             :                 return;
     627             : 
     628             :         leftmost = rb_erase_cached(&p->pushable_dl_tasks, root);
     629             :         if (leftmost)
     630             :                 dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
     631             : 
     632             :         RB_CLEAR_NODE(&p->pushable_dl_tasks);
     633             : }
     634             : 
     635             : static inline int has_pushable_dl_tasks(struct rq *rq)
     636             : {
     637             :         return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
     638             : }
     639             : 
     640             : static int push_dl_task(struct rq *rq);
     641             : 
     642             : static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
     643             : {
     644             :         return rq->online && dl_task(prev);
     645             : }
     646             : 
     647             : static DEFINE_PER_CPU(struct balance_callback, dl_push_head);
     648             : static DEFINE_PER_CPU(struct balance_callback, dl_pull_head);
     649             : 
     650             : static void push_dl_tasks(struct rq *);
     651             : static void pull_dl_task(struct rq *);
     652             : 
     653             : static inline void deadline_queue_push_tasks(struct rq *rq)
     654             : {
     655             :         if (!has_pushable_dl_tasks(rq))
     656             :                 return;
     657             : 
     658             :         queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
     659             : }
     660             : 
     661             : static inline void deadline_queue_pull_task(struct rq *rq)
     662             : {
     663             :         queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
     664             : }
     665             : 
     666             : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
     667             : 
     668             : static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
     669             : {
     670             :         struct rq *later_rq = NULL;
     671             :         struct dl_bw *dl_b;
     672             : 
     673             :         later_rq = find_lock_later_rq(p, rq);
     674             :         if (!later_rq) {
     675             :                 int cpu;
     676             : 
     677             :                 /*
     678             :                  * If we cannot preempt any rq, fall back to pick any
     679             :                  * online CPU:
     680             :                  */
     681             :                 cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
     682             :                 if (cpu >= nr_cpu_ids) {
     683             :                         /*
     684             :                          * Failed to find any suitable CPU.
     685             :                          * The task will never come back!
     686             :                          */
     687             :                         WARN_ON_ONCE(dl_bandwidth_enabled());
     688             : 
     689             :                         /*
     690             :                          * If admission control is disabled we
     691             :                          * try a little harder to let the task
     692             :                          * run.
     693             :                          */
     694             :                         cpu = cpumask_any(cpu_active_mask);
     695             :                 }
     696             :                 later_rq = cpu_rq(cpu);
     697             :                 double_lock_balance(rq, later_rq);
     698             :         }
     699             : 
     700             :         if (p->dl.dl_non_contending || p->dl.dl_throttled) {
     701             :                 /*
     702             :                  * Inactive timer is armed (or callback is running, but
     703             :                  * waiting for us to release rq locks). In any case, when it
     704             :                  * will fire (or continue), it will see running_bw of this
     705             :                  * task migrated to later_rq (and correctly handle it).
     706             :                  */
     707             :                 sub_running_bw(&p->dl, &rq->dl);
     708             :                 sub_rq_bw(&p->dl, &rq->dl);
     709             : 
     710             :                 add_rq_bw(&p->dl, &later_rq->dl);
     711             :                 add_running_bw(&p->dl, &later_rq->dl);
     712             :         } else {
     713             :                 sub_rq_bw(&p->dl, &rq->dl);
     714             :                 add_rq_bw(&p->dl, &later_rq->dl);
     715             :         }
     716             : 
     717             :         /*
     718             :          * And we finally need to fixup root_domain(s) bandwidth accounting,
     719             :          * since p is still hanging out in the old (now moved to default) root
     720             :          * domain.
     721             :          */
     722             :         dl_b = &rq->rd->dl_bw;
     723             :         raw_spin_lock(&dl_b->lock);
     724             :         __dl_sub(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
     725             :         raw_spin_unlock(&dl_b->lock);
     726             : 
     727             :         dl_b = &later_rq->rd->dl_bw;
     728             :         raw_spin_lock(&dl_b->lock);
     729             :         __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(later_rq->rd->span));
     730             :         raw_spin_unlock(&dl_b->lock);
     731             : 
     732             :         set_task_cpu(p, later_rq->cpu);
     733             :         double_unlock_balance(later_rq, rq);
     734             : 
     735             :         return later_rq;
     736             : }
     737             : 
     738             : #else
     739             : 
     740             : static inline
     741             : void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     742             : {
     743             : }
     744             : 
     745             : static inline
     746             : void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     747             : {
     748             : }
     749             : 
     750             : static inline
     751             : void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     752             : {
     753             : }
     754             : 
     755             : static inline
     756             : void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     757             : {
     758             : }
     759             : 
     760             : static inline void deadline_queue_push_tasks(struct rq *rq)
     761             : {
     762             : }
     763             : 
     764             : static inline void deadline_queue_pull_task(struct rq *rq)
     765             : {
     766             : }
     767             : #endif /* CONFIG_SMP */
     768             : 
     769             : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
     770             : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
     771             : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
     772             : 
     773             : static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
     774             :                                             struct rq *rq)
     775             : {
     776             :         /* for non-boosted task, pi_of(dl_se) == dl_se */
     777           0 :         dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
     778           0 :         dl_se->runtime = pi_of(dl_se)->dl_runtime;
     779             : }
     780             : 
     781             : /*
     782             :  * We are being explicitly informed that a new instance is starting,
     783             :  * and this means that:
     784             :  *  - the absolute deadline of the entity has to be placed at
     785             :  *    current time + relative deadline;
     786             :  *  - the runtime of the entity has to be set to the maximum value.
     787             :  *
     788             :  * The capability of specifying such event is useful whenever a -deadline
     789             :  * entity wants to (try to!) synchronize its behaviour with the scheduler's
     790             :  * one, and to (try to!) reconcile itself with its own scheduling
     791             :  * parameters.
     792             :  */
     793           0 : static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
     794             : {
     795           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     796           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     797             : 
     798           0 :         WARN_ON(is_dl_boosted(dl_se));
     799           0 :         WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
     800             : 
     801             :         /*
     802             :          * We are racing with the deadline timer. So, do nothing because
     803             :          * the deadline timer handler will take care of properly recharging
     804             :          * the runtime and postponing the deadline
     805             :          */
     806           0 :         if (dl_se->dl_throttled)
     807             :                 return;
     808             : 
     809             :         /*
     810             :          * We use the regular wall clock time to set deadlines in the
     811             :          * future; in fact, we must consider execution overheads (time
     812             :          * spent on hardirq context, etc.).
     813             :          */
     814             :         replenish_dl_new_period(dl_se, rq);
     815             : }
     816             : 
     817             : /*
     818             :  * Pure Earliest Deadline First (EDF) scheduling does not deal with the
     819             :  * possibility of a entity lasting more than what it declared, and thus
     820             :  * exhausting its runtime.
     821             :  *
     822             :  * Here we are interested in making runtime overrun possible, but we do
     823             :  * not want a entity which is misbehaving to affect the scheduling of all
     824             :  * other entities.
     825             :  * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
     826             :  * is used, in order to confine each entity within its own bandwidth.
     827             :  *
     828             :  * This function deals exactly with that, and ensures that when the runtime
     829             :  * of a entity is replenished, its deadline is also postponed. That ensures
     830             :  * the overrunning entity can't interfere with other entity in the system and
     831             :  * can't make them miss their deadlines. Reasons why this kind of overruns
     832             :  * could happen are, typically, a entity voluntarily trying to overcome its
     833             :  * runtime, or it just underestimated it during sched_setattr().
     834             :  */
     835           0 : static void replenish_dl_entity(struct sched_dl_entity *dl_se)
     836             : {
     837           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     838           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     839             : 
     840           0 :         WARN_ON_ONCE(pi_of(dl_se)->dl_runtime <= 0);
     841             : 
     842             :         /*
     843             :          * This could be the case for a !-dl task that is boosted.
     844             :          * Just go with full inherited parameters.
     845             :          */
     846           0 :         if (dl_se->dl_deadline == 0)
     847             :                 replenish_dl_new_period(dl_se, rq);
     848             : 
     849           0 :         if (dl_se->dl_yielded && dl_se->runtime > 0)
     850           0 :                 dl_se->runtime = 0;
     851             : 
     852             :         /*
     853             :          * We keep moving the deadline away until we get some
     854             :          * available runtime for the entity. This ensures correct
     855             :          * handling of situations where the runtime overrun is
     856             :          * arbitrary large.
     857             :          */
     858           0 :         while (dl_se->runtime <= 0) {
     859           0 :                 dl_se->deadline += pi_of(dl_se)->dl_period;
     860           0 :                 dl_se->runtime += pi_of(dl_se)->dl_runtime;
     861             :         }
     862             : 
     863             :         /*
     864             :          * At this point, the deadline really should be "in
     865             :          * the future" with respect to rq->clock. If it's
     866             :          * not, we are, for some reason, lagging too much!
     867             :          * Anyway, after having warn userspace abut that,
     868             :          * we still try to keep the things running by
     869             :          * resetting the deadline and the budget of the
     870             :          * entity.
     871             :          */
     872           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
     873           0 :                 printk_deferred_once("sched: DL replenish lagged too much\n");
     874             :                 replenish_dl_new_period(dl_se, rq);
     875             :         }
     876             : 
     877           0 :         if (dl_se->dl_yielded)
     878           0 :                 dl_se->dl_yielded = 0;
     879           0 :         if (dl_se->dl_throttled)
     880           0 :                 dl_se->dl_throttled = 0;
     881           0 : }
     882             : 
     883             : /*
     884             :  * Here we check if --at time t-- an entity (which is probably being
     885             :  * [re]activated or, in general, enqueued) can use its remaining runtime
     886             :  * and its current deadline _without_ exceeding the bandwidth it is
     887             :  * assigned (function returns true if it can't). We are in fact applying
     888             :  * one of the CBS rules: when a task wakes up, if the residual runtime
     889             :  * over residual deadline fits within the allocated bandwidth, then we
     890             :  * can keep the current (absolute) deadline and residual budget without
     891             :  * disrupting the schedulability of the system. Otherwise, we should
     892             :  * refill the runtime and set the deadline a period in the future,
     893             :  * because keeping the current (absolute) deadline of the task would
     894             :  * result in breaking guarantees promised to other tasks (refer to
     895             :  * Documentation/scheduler/sched-deadline.rst for more information).
     896             :  *
     897             :  * This function returns true if:
     898             :  *
     899             :  *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
     900             :  *
     901             :  * IOW we can't recycle current parameters.
     902             :  *
     903             :  * Notice that the bandwidth check is done against the deadline. For
     904             :  * task with deadline equal to period this is the same of using
     905             :  * dl_period instead of dl_deadline in the equation above.
     906             :  */
     907             : static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
     908             : {
     909             :         u64 left, right;
     910             : 
     911             :         /*
     912             :          * left and right are the two sides of the equation above,
     913             :          * after a bit of shuffling to use multiplications instead
     914             :          * of divisions.
     915             :          *
     916             :          * Note that none of the time values involved in the two
     917             :          * multiplications are absolute: dl_deadline and dl_runtime
     918             :          * are the relative deadline and the maximum runtime of each
     919             :          * instance, runtime is the runtime left for the last instance
     920             :          * and (deadline - t), since t is rq->clock, is the time left
     921             :          * to the (absolute) deadline. Even if overflowing the u64 type
     922             :          * is very unlikely to occur in both cases, here we scale down
     923             :          * as we want to avoid that risk at all. Scaling down by 10
     924             :          * means that we reduce granularity to 1us. We are fine with it,
     925             :          * since this is only a true/false check and, anyway, thinking
     926             :          * of anything below microseconds resolution is actually fiction
     927             :          * (but still we want to give the user that illusion >;).
     928             :          */
     929           0 :         left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
     930           0 :         right = ((dl_se->deadline - t) >> DL_SCALE) *
     931           0 :                 (pi_of(dl_se)->dl_runtime >> DL_SCALE);
     932             : 
     933           0 :         return dl_time_before(right, left);
     934             : }
     935             : 
     936             : /*
     937             :  * Revised wakeup rule [1]: For self-suspending tasks, rather then
     938             :  * re-initializing task's runtime and deadline, the revised wakeup
     939             :  * rule adjusts the task's runtime to avoid the task to overrun its
     940             :  * density.
     941             :  *
     942             :  * Reasoning: a task may overrun the density if:
     943             :  *    runtime / (deadline - t) > dl_runtime / dl_deadline
     944             :  *
     945             :  * Therefore, runtime can be adjusted to:
     946             :  *     runtime = (dl_runtime / dl_deadline) * (deadline - t)
     947             :  *
     948             :  * In such way that runtime will be equal to the maximum density
     949             :  * the task can use without breaking any rule.
     950             :  *
     951             :  * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
     952             :  * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
     953             :  */
     954             : static void
     955           0 : update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
     956             : {
     957           0 :         u64 laxity = dl_se->deadline - rq_clock(rq);
     958             : 
     959             :         /*
     960             :          * If the task has deadline < period, and the deadline is in the past,
     961             :          * it should already be throttled before this check.
     962             :          *
     963             :          * See update_dl_entity() comments for further details.
     964             :          */
     965           0 :         WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
     966             : 
     967           0 :         dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
     968           0 : }
     969             : 
     970             : /*
     971             :  * Regarding the deadline, a task with implicit deadline has a relative
     972             :  * deadline == relative period. A task with constrained deadline has a
     973             :  * relative deadline <= relative period.
     974             :  *
     975             :  * We support constrained deadline tasks. However, there are some restrictions
     976             :  * applied only for tasks which do not have an implicit deadline. See
     977             :  * update_dl_entity() to know more about such restrictions.
     978             :  *
     979             :  * The dl_is_implicit() returns true if the task has an implicit deadline.
     980             :  */
     981             : static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
     982             : {
     983             :         return dl_se->dl_deadline == dl_se->dl_period;
     984             : }
     985             : 
     986             : /*
     987             :  * When a deadline entity is placed in the runqueue, its runtime and deadline
     988             :  * might need to be updated. This is done by a CBS wake up rule. There are two
     989             :  * different rules: 1) the original CBS; and 2) the Revisited CBS.
     990             :  *
     991             :  * When the task is starting a new period, the Original CBS is used. In this
     992             :  * case, the runtime is replenished and a new absolute deadline is set.
     993             :  *
     994             :  * When a task is queued before the begin of the next period, using the
     995             :  * remaining runtime and deadline could make the entity to overflow, see
     996             :  * dl_entity_overflow() to find more about runtime overflow. When such case
     997             :  * is detected, the runtime and deadline need to be updated.
     998             :  *
     999             :  * If the task has an implicit deadline, i.e., deadline == period, the Original
    1000             :  * CBS is applied. the runtime is replenished and a new absolute deadline is
    1001             :  * set, as in the previous cases.
    1002             :  *
    1003             :  * However, the Original CBS does not work properly for tasks with
    1004             :  * deadline < period, which are said to have a constrained deadline. By
    1005             :  * applying the Original CBS, a constrained deadline task would be able to run
    1006             :  * runtime/deadline in a period. With deadline < period, the task would
    1007             :  * overrun the runtime/period allowed bandwidth, breaking the admission test.
    1008             :  *
    1009             :  * In order to prevent this misbehave, the Revisited CBS is used for
    1010             :  * constrained deadline tasks when a runtime overflow is detected. In the
    1011             :  * Revisited CBS, rather than replenishing & setting a new absolute deadline,
    1012             :  * the remaining runtime of the task is reduced to avoid runtime overflow.
    1013             :  * Please refer to the comments update_dl_revised_wakeup() function to find
    1014             :  * more about the Revised CBS rule.
    1015             :  */
    1016           0 : static void update_dl_entity(struct sched_dl_entity *dl_se)
    1017             : {
    1018           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1019           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1020             : 
    1021           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
    1022           0 :             dl_entity_overflow(dl_se, rq_clock(rq))) {
    1023             : 
    1024           0 :                 if (unlikely(!dl_is_implicit(dl_se) &&
    1025             :                              !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
    1026             :                              !is_dl_boosted(dl_se))) {
    1027           0 :                         update_dl_revised_wakeup(dl_se, rq);
    1028           0 :                         return;
    1029             :                 }
    1030             : 
    1031             :                 replenish_dl_new_period(dl_se, rq);
    1032             :         }
    1033             : }
    1034             : 
    1035             : static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
    1036             : {
    1037           0 :         return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
    1038             : }
    1039             : 
    1040             : /*
    1041             :  * If the entity depleted all its runtime, and if we want it to sleep
    1042             :  * while waiting for some new execution time to become available, we
    1043             :  * set the bandwidth replenishment timer to the replenishment instant
    1044             :  * and try to activate it.
    1045             :  *
    1046             :  * Notice that it is important for the caller to know if the timer
    1047             :  * actually started or not (i.e., the replenishment instant is in
    1048             :  * the future or in the past).
    1049             :  */
    1050           0 : static int start_dl_timer(struct task_struct *p)
    1051             : {
    1052           0 :         struct sched_dl_entity *dl_se = &p->dl;
    1053           0 :         struct hrtimer *timer = &dl_se->dl_timer;
    1054           0 :         struct rq *rq = task_rq(p);
    1055             :         ktime_t now, act;
    1056             :         s64 delta;
    1057             : 
    1058           0 :         lockdep_assert_rq_held(rq);
    1059             : 
    1060             :         /*
    1061             :          * We want the timer to fire at the deadline, but considering
    1062             :          * that it is actually coming from rq->clock and not from
    1063             :          * hrtimer's time base reading.
    1064             :          */
    1065           0 :         act = ns_to_ktime(dl_next_period(dl_se));
    1066           0 :         now = hrtimer_cb_get_time(timer);
    1067           0 :         delta = ktime_to_ns(now) - rq_clock(rq);
    1068           0 :         act = ktime_add_ns(act, delta);
    1069             : 
    1070             :         /*
    1071             :          * If the expiry time already passed, e.g., because the value
    1072             :          * chosen as the deadline is too small, don't even try to
    1073             :          * start the timer in the past!
    1074             :          */
    1075           0 :         if (ktime_us_delta(act, now) < 0)
    1076             :                 return 0;
    1077             : 
    1078             :         /*
    1079             :          * !enqueued will guarantee another callback; even if one is already in
    1080             :          * progress. This ensures a balanced {get,put}_task_struct().
    1081             :          *
    1082             :          * The race against __run_timer() clearing the enqueued state is
    1083             :          * harmless because we're holding task_rq()->lock, therefore the timer
    1084             :          * expiring after we've done the check will wait on its task_rq_lock()
    1085             :          * and observe our state.
    1086             :          */
    1087           0 :         if (!hrtimer_is_queued(timer)) {
    1088           0 :                 get_task_struct(p);
    1089             :                 hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
    1090             :         }
    1091             : 
    1092             :         return 1;
    1093             : }
    1094             : 
    1095             : /*
    1096             :  * This is the bandwidth enforcement timer callback. If here, we know
    1097             :  * a task is not on its dl_rq, since the fact that the timer was running
    1098             :  * means the task is throttled and needs a runtime replenishment.
    1099             :  *
    1100             :  * However, what we actually do depends on the fact the task is active,
    1101             :  * (it is on its rq) or has been removed from there by a call to
    1102             :  * dequeue_task_dl(). In the former case we must issue the runtime
    1103             :  * replenishment and add the task back to the dl_rq; in the latter, we just
    1104             :  * do nothing but clearing dl_throttled, so that runtime and deadline
    1105             :  * updating (and the queueing back to dl_rq) will be done by the
    1106             :  * next call to enqueue_task_dl().
    1107             :  */
    1108           0 : static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
    1109             : {
    1110           0 :         struct sched_dl_entity *dl_se = container_of(timer,
    1111             :                                                      struct sched_dl_entity,
    1112             :                                                      dl_timer);
    1113           0 :         struct task_struct *p = dl_task_of(dl_se);
    1114             :         struct rq_flags rf;
    1115             :         struct rq *rq;
    1116             : 
    1117           0 :         rq = task_rq_lock(p, &rf);
    1118             : 
    1119             :         /*
    1120             :          * The task might have changed its scheduling policy to something
    1121             :          * different than SCHED_DEADLINE (through switched_from_dl()).
    1122             :          */
    1123           0 :         if (!dl_task(p))
    1124             :                 goto unlock;
    1125             : 
    1126             :         /*
    1127             :          * The task might have been boosted by someone else and might be in the
    1128             :          * boosting/deboosting path, its not throttled.
    1129             :          */
    1130           0 :         if (is_dl_boosted(dl_se))
    1131             :                 goto unlock;
    1132             : 
    1133             :         /*
    1134             :          * Spurious timer due to start_dl_timer() race; or we already received
    1135             :          * a replenishment from rt_mutex_setprio().
    1136             :          */
    1137           0 :         if (!dl_se->dl_throttled)
    1138             :                 goto unlock;
    1139             : 
    1140             :         sched_clock_tick();
    1141           0 :         update_rq_clock(rq);
    1142             : 
    1143             :         /*
    1144             :          * If the throttle happened during sched-out; like:
    1145             :          *
    1146             :          *   schedule()
    1147             :          *     deactivate_task()
    1148             :          *       dequeue_task_dl()
    1149             :          *         update_curr_dl()
    1150             :          *           start_dl_timer()
    1151             :          *         __dequeue_task_dl()
    1152             :          *     prev->on_rq = 0;
    1153             :          *
    1154             :          * We can be both throttled and !queued. Replenish the counter
    1155             :          * but do not enqueue -- wait for our wakeup to do that.
    1156             :          */
    1157           0 :         if (!task_on_rq_queued(p)) {
    1158           0 :                 replenish_dl_entity(dl_se);
    1159           0 :                 goto unlock;
    1160             :         }
    1161             : 
    1162             : #ifdef CONFIG_SMP
    1163             :         if (unlikely(!rq->online)) {
    1164             :                 /*
    1165             :                  * If the runqueue is no longer available, migrate the
    1166             :                  * task elsewhere. This necessarily changes rq.
    1167             :                  */
    1168             :                 lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
    1169             :                 rq = dl_task_offline_migration(rq, p);
    1170             :                 rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
    1171             :                 update_rq_clock(rq);
    1172             : 
    1173             :                 /*
    1174             :                  * Now that the task has been migrated to the new RQ and we
    1175             :                  * have that locked, proceed as normal and enqueue the task
    1176             :                  * there.
    1177             :                  */
    1178             :         }
    1179             : #endif
    1180             : 
    1181           0 :         enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
    1182           0 :         if (dl_task(rq->curr))
    1183             :                 check_preempt_curr_dl(rq, p, 0);
    1184             :         else
    1185           0 :                 resched_curr(rq);
    1186             : 
    1187             : #ifdef CONFIG_SMP
    1188             :         /*
    1189             :          * Queueing this task back might have overloaded rq, check if we need
    1190             :          * to kick someone away.
    1191             :          */
    1192             :         if (has_pushable_dl_tasks(rq)) {
    1193             :                 /*
    1194             :                  * Nothing relies on rq->lock after this, so its safe to drop
    1195             :                  * rq->lock.
    1196             :                  */
    1197             :                 rq_unpin_lock(rq, &rf);
    1198             :                 push_dl_task(rq);
    1199             :                 rq_repin_lock(rq, &rf);
    1200             :         }
    1201             : #endif
    1202             : 
    1203             : unlock:
    1204           0 :         task_rq_unlock(rq, p, &rf);
    1205             : 
    1206             :         /*
    1207             :          * This can free the task_struct, including this hrtimer, do not touch
    1208             :          * anything related to that after this.
    1209             :          */
    1210           0 :         put_task_struct(p);
    1211             : 
    1212           0 :         return HRTIMER_NORESTART;
    1213             : }
    1214             : 
    1215         349 : void init_dl_task_timer(struct sched_dl_entity *dl_se)
    1216             : {
    1217         349 :         struct hrtimer *timer = &dl_se->dl_timer;
    1218             : 
    1219         349 :         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
    1220         349 :         timer->function = dl_task_timer;
    1221         349 : }
    1222             : 
    1223             : /*
    1224             :  * During the activation, CBS checks if it can reuse the current task's
    1225             :  * runtime and period. If the deadline of the task is in the past, CBS
    1226             :  * cannot use the runtime, and so it replenishes the task. This rule
    1227             :  * works fine for implicit deadline tasks (deadline == period), and the
    1228             :  * CBS was designed for implicit deadline tasks. However, a task with
    1229             :  * constrained deadline (deadline < period) might be awakened after the
    1230             :  * deadline, but before the next period. In this case, replenishing the
    1231             :  * task would allow it to run for runtime / deadline. As in this case
    1232             :  * deadline < period, CBS enables a task to run for more than the
    1233             :  * runtime / period. In a very loaded system, this can cause a domino
    1234             :  * effect, making other tasks miss their deadlines.
    1235             :  *
    1236             :  * To avoid this problem, in the activation of a constrained deadline
    1237             :  * task after the deadline but before the next period, throttle the
    1238             :  * task and set the replenishing timer to the begin of the next period,
    1239             :  * unless it is boosted.
    1240             :  */
    1241           0 : static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
    1242             : {
    1243           0 :         struct task_struct *p = dl_task_of(dl_se);
    1244           0 :         struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
    1245             : 
    1246           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
    1247           0 :             dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
    1248           0 :                 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
    1249             :                         return;
    1250           0 :                 dl_se->dl_throttled = 1;
    1251           0 :                 if (dl_se->runtime > 0)
    1252           0 :                         dl_se->runtime = 0;
    1253             :         }
    1254             : }
    1255             : 
    1256             : static
    1257             : int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
    1258             : {
    1259             :         return (dl_se->runtime <= 0);
    1260             : }
    1261             : 
    1262             : /*
    1263             :  * This function implements the GRUB accounting rule:
    1264             :  * according to the GRUB reclaiming algorithm, the runtime is
    1265             :  * not decreased as "dq = -dt", but as
    1266             :  * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
    1267             :  * where u is the utilization of the task, Umax is the maximum reclaimable
    1268             :  * utilization, Uinact is the (per-runqueue) inactive utilization, computed
    1269             :  * as the difference between the "total runqueue utilization" and the
    1270             :  * runqueue active utilization, and Uextra is the (per runqueue) extra
    1271             :  * reclaimable utilization.
    1272             :  * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
    1273             :  * multiplied by 2^BW_SHIFT, the result has to be shifted right by
    1274             :  * BW_SHIFT.
    1275             :  * Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT,
    1276             :  * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
    1277             :  * Since delta is a 64 bit variable, to have an overflow its value
    1278             :  * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
    1279             :  * So, overflow is not an issue here.
    1280             :  */
    1281             : static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
    1282             : {
    1283           0 :         u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
    1284             :         u64 u_act;
    1285           0 :         u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
    1286             : 
    1287             :         /*
    1288             :          * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
    1289             :          * we compare u_inact + rq->dl.extra_bw with
    1290             :          * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
    1291             :          * u_inact + rq->dl.extra_bw can be larger than
    1292             :          * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
    1293             :          * leading to wrong results)
    1294             :          */
    1295           0 :         if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
    1296             :                 u_act = u_act_min;
    1297             :         else
    1298           0 :                 u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
    1299             : 
    1300           0 :         return (delta * u_act) >> BW_SHIFT;
    1301             : }
    1302             : 
    1303             : /*
    1304             :  * Update the current task's runtime statistics (provided it is still
    1305             :  * a -deadline task and has not been removed from the dl_rq).
    1306             :  */
    1307           0 : static void update_curr_dl(struct rq *rq)
    1308             : {
    1309           0 :         struct task_struct *curr = rq->curr;
    1310           0 :         struct sched_dl_entity *dl_se = &curr->dl;
    1311             :         u64 delta_exec, scaled_delta_exec;
    1312           0 :         int cpu = cpu_of(rq);
    1313             :         u64 now;
    1314             : 
    1315           0 :         if (!dl_task(curr) || !on_dl_rq(dl_se))
    1316             :                 return;
    1317             : 
    1318             :         /*
    1319             :          * Consumed budget is computed considering the time as
    1320             :          * observed by schedulable tasks (excluding time spent
    1321             :          * in hardirq context, etc.). Deadlines are instead
    1322             :          * computed using hard walltime. This seems to be the more
    1323             :          * natural solution, but the full ramifications of this
    1324             :          * approach need further study.
    1325             :          */
    1326           0 :         now = rq_clock_task(rq);
    1327           0 :         delta_exec = now - curr->se.exec_start;
    1328           0 :         if (unlikely((s64)delta_exec <= 0)) {
    1329           0 :                 if (unlikely(dl_se->dl_yielded))
    1330             :                         goto throttle;
    1331             :                 return;
    1332             :         }
    1333             : 
    1334             :         schedstat_set(curr->stats.exec_max,
    1335             :                       max(curr->stats.exec_max, delta_exec));
    1336             : 
    1337           0 :         trace_sched_stat_runtime(curr, delta_exec, 0);
    1338             : 
    1339           0 :         update_current_exec_runtime(curr, now, delta_exec);
    1340             : 
    1341           0 :         if (dl_entity_is_special(dl_se))
    1342             :                 return;
    1343             : 
    1344             :         /*
    1345             :          * For tasks that participate in GRUB, we implement GRUB-PA: the
    1346             :          * spare reclaimed bandwidth is used to clock down frequency.
    1347             :          *
    1348             :          * For the others, we still need to scale reservation parameters
    1349             :          * according to current frequency and CPU maximum capacity.
    1350             :          */
    1351           0 :         if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
    1352           0 :                 scaled_delta_exec = grub_reclaim(delta_exec,
    1353             :                                                  rq,
    1354             :                                                  &curr->dl);
    1355             :         } else {
    1356           0 :                 unsigned long scale_freq = arch_scale_freq_capacity(cpu);
    1357           0 :                 unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
    1358             : 
    1359           0 :                 scaled_delta_exec = cap_scale(delta_exec, scale_freq);
    1360           0 :                 scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
    1361             :         }
    1362             : 
    1363           0 :         dl_se->runtime -= scaled_delta_exec;
    1364             : 
    1365             : throttle:
    1366           0 :         if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
    1367           0 :                 dl_se->dl_throttled = 1;
    1368             : 
    1369             :                 /* If requested, inform the user about runtime overruns. */
    1370           0 :                 if (dl_runtime_exceeded(dl_se) &&
    1371           0 :                     (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
    1372           0 :                         dl_se->dl_overrun = 1;
    1373             : 
    1374           0 :                 __dequeue_task_dl(rq, curr, 0);
    1375           0 :                 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
    1376           0 :                         enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
    1377             : 
    1378           0 :                 if (!is_leftmost(curr, &rq->dl))
    1379           0 :                         resched_curr(rq);
    1380             :         }
    1381             : 
    1382             :         /*
    1383             :          * Because -- for now -- we share the rt bandwidth, we need to
    1384             :          * account our runtime there too, otherwise actual rt tasks
    1385             :          * would be able to exceed the shared quota.
    1386             :          *
    1387             :          * Account to the root rt group for now.
    1388             :          *
    1389             :          * The solution we're working towards is having the RT groups scheduled
    1390             :          * using deadline servers -- however there's a few nasties to figure
    1391             :          * out before that can happen.
    1392             :          */
    1393           0 :         if (rt_bandwidth_enabled()) {
    1394           0 :                 struct rt_rq *rt_rq = &rq->rt;
    1395             : 
    1396           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    1397             :                 /*
    1398             :                  * We'll let actual RT tasks worry about the overflow here, we
    1399             :                  * have our own CBS to keep us inline; only account when RT
    1400             :                  * bandwidth is relevant.
    1401             :                  */
    1402           0 :                 if (sched_rt_bandwidth_account(rt_rq))
    1403           0 :                         rt_rq->rt_time += delta_exec;
    1404           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    1405             :         }
    1406             : }
    1407             : 
    1408           0 : static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
    1409             : {
    1410           0 :         struct sched_dl_entity *dl_se = container_of(timer,
    1411             :                                                      struct sched_dl_entity,
    1412             :                                                      inactive_timer);
    1413           0 :         struct task_struct *p = dl_task_of(dl_se);
    1414             :         struct rq_flags rf;
    1415             :         struct rq *rq;
    1416             : 
    1417           0 :         rq = task_rq_lock(p, &rf);
    1418             : 
    1419             :         sched_clock_tick();
    1420           0 :         update_rq_clock(rq);
    1421             : 
    1422           0 :         if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
    1423           0 :                 struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
    1424             : 
    1425           0 :                 if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
    1426           0 :                         sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
    1427           0 :                         sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
    1428           0 :                         dl_se->dl_non_contending = 0;
    1429             :                 }
    1430             : 
    1431           0 :                 raw_spin_lock(&dl_b->lock);
    1432           0 :                 __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
    1433           0 :                 raw_spin_unlock(&dl_b->lock);
    1434             :                 __dl_clear_params(p);
    1435             : 
    1436             :                 goto unlock;
    1437             :         }
    1438           0 :         if (dl_se->dl_non_contending == 0)
    1439             :                 goto unlock;
    1440             : 
    1441           0 :         sub_running_bw(dl_se, &rq->dl);
    1442           0 :         dl_se->dl_non_contending = 0;
    1443             : unlock:
    1444           0 :         task_rq_unlock(rq, p, &rf);
    1445           0 :         put_task_struct(p);
    1446             : 
    1447           0 :         return HRTIMER_NORESTART;
    1448             : }
    1449             : 
    1450         349 : void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
    1451             : {
    1452         349 :         struct hrtimer *timer = &dl_se->inactive_timer;
    1453             : 
    1454         349 :         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
    1455         349 :         timer->function = inactive_task_timer;
    1456         349 : }
    1457             : 
    1458             : #define __node_2_dle(node) \
    1459             :         rb_entry((node), struct sched_dl_entity, rb_node)
    1460             : 
    1461             : #ifdef CONFIG_SMP
    1462             : 
    1463             : static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
    1464             : {
    1465             :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1466             : 
    1467             :         if (dl_rq->earliest_dl.curr == 0 ||
    1468             :             dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
    1469             :                 if (dl_rq->earliest_dl.curr == 0)
    1470             :                         cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_HIGHER);
    1471             :                 dl_rq->earliest_dl.curr = deadline;
    1472             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
    1473             :         }
    1474             : }
    1475             : 
    1476             : static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
    1477             : {
    1478             :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1479             : 
    1480             :         /*
    1481             :          * Since we may have removed our earliest (and/or next earliest)
    1482             :          * task we must recompute them.
    1483             :          */
    1484             :         if (!dl_rq->dl_nr_running) {
    1485             :                 dl_rq->earliest_dl.curr = 0;
    1486             :                 dl_rq->earliest_dl.next = 0;
    1487             :                 cpudl_clear(&rq->rd->cpudl, rq->cpu);
    1488             :                 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
    1489             :         } else {
    1490             :                 struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
    1491             :                 struct sched_dl_entity *entry = __node_2_dle(leftmost);
    1492             : 
    1493             :                 dl_rq->earliest_dl.curr = entry->deadline;
    1494             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
    1495             :         }
    1496             : }
    1497             : 
    1498             : #else
    1499             : 
    1500             : static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
    1501             : static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
    1502             : 
    1503             : #endif /* CONFIG_SMP */
    1504             : 
    1505             : static inline
    1506           0 : void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
    1507             : {
    1508           0 :         int prio = dl_task_of(dl_se)->prio;
    1509           0 :         u64 deadline = dl_se->deadline;
    1510             : 
    1511           0 :         WARN_ON(!dl_prio(prio));
    1512           0 :         dl_rq->dl_nr_running++;
    1513           0 :         add_nr_running(rq_of_dl_rq(dl_rq), 1);
    1514             : 
    1515           0 :         inc_dl_deadline(dl_rq, deadline);
    1516           0 :         inc_dl_migration(dl_se, dl_rq);
    1517           0 : }
    1518             : 
    1519             : static inline
    1520           0 : void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
    1521             : {
    1522           0 :         int prio = dl_task_of(dl_se)->prio;
    1523             : 
    1524           0 :         WARN_ON(!dl_prio(prio));
    1525           0 :         WARN_ON(!dl_rq->dl_nr_running);
    1526           0 :         dl_rq->dl_nr_running--;
    1527           0 :         sub_nr_running(rq_of_dl_rq(dl_rq), 1);
    1528             : 
    1529           0 :         dec_dl_deadline(dl_rq, dl_se->deadline);
    1530           0 :         dec_dl_migration(dl_se, dl_rq);
    1531           0 : }
    1532             : 
    1533             : static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
    1534             : {
    1535           0 :         return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
    1536             : }
    1537             : 
    1538             : static inline struct sched_statistics *
    1539             : __schedstats_from_dl_se(struct sched_dl_entity *dl_se)
    1540             : {
    1541             :         return &dl_task_of(dl_se)->stats;
    1542             : }
    1543             : 
    1544             : static inline void
    1545             : update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1546             : {
    1547             :         struct sched_statistics *stats;
    1548             : 
    1549             :         if (!schedstat_enabled())
    1550             :                 return;
    1551             : 
    1552             :         stats = __schedstats_from_dl_se(dl_se);
    1553             :         __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1554             : }
    1555             : 
    1556             : static inline void
    1557             : update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1558             : {
    1559             :         struct sched_statistics *stats;
    1560             : 
    1561             :         if (!schedstat_enabled())
    1562             :                 return;
    1563             : 
    1564             :         stats = __schedstats_from_dl_se(dl_se);
    1565             :         __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1566             : }
    1567             : 
    1568             : static inline void
    1569             : update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1570             : {
    1571             :         struct sched_statistics *stats;
    1572             : 
    1573             :         if (!schedstat_enabled())
    1574             :                 return;
    1575             : 
    1576             :         stats = __schedstats_from_dl_se(dl_se);
    1577             :         __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1578             : }
    1579             : 
    1580             : static inline void
    1581             : update_stats_enqueue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
    1582             :                         int flags)
    1583             : {
    1584             :         if (!schedstat_enabled())
    1585             :                 return;
    1586             : 
    1587             :         if (flags & ENQUEUE_WAKEUP)
    1588             :                 update_stats_enqueue_sleeper_dl(dl_rq, dl_se);
    1589             : }
    1590             : 
    1591             : static inline void
    1592             : update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
    1593             :                         int flags)
    1594             : {
    1595           0 :         struct task_struct *p = dl_task_of(dl_se);
    1596             : 
    1597             :         if (!schedstat_enabled())
    1598             :                 return;
    1599             : 
    1600             :         if ((flags & DEQUEUE_SLEEP)) {
    1601             :                 unsigned int state;
    1602             : 
    1603             :                 state = READ_ONCE(p->__state);
    1604             :                 if (state & TASK_INTERRUPTIBLE)
    1605             :                         __schedstat_set(p->stats.sleep_start,
    1606             :                                         rq_clock(rq_of_dl_rq(dl_rq)));
    1607             : 
    1608             :                 if (state & TASK_UNINTERRUPTIBLE)
    1609             :                         __schedstat_set(p->stats.block_start,
    1610             :                                         rq_clock(rq_of_dl_rq(dl_rq)));
    1611             :         }
    1612             : }
    1613             : 
    1614           0 : static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
    1615             : {
    1616           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1617             : 
    1618           0 :         WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
    1619             : 
    1620           0 :         rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
    1621             : 
    1622           0 :         inc_dl_tasks(dl_se, dl_rq);
    1623           0 : }
    1624             : 
    1625           0 : static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
    1626             : {
    1627           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1628             : 
    1629           0 :         if (RB_EMPTY_NODE(&dl_se->rb_node))
    1630             :                 return;
    1631             : 
    1632           0 :         rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
    1633             : 
    1634           0 :         RB_CLEAR_NODE(&dl_se->rb_node);
    1635             : 
    1636           0 :         dec_dl_tasks(dl_se, dl_rq);
    1637             : }
    1638             : 
    1639             : static void
    1640           0 : enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
    1641             : {
    1642           0 :         WARN_ON_ONCE(on_dl_rq(dl_se));
    1643             : 
    1644           0 :         update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
    1645             : 
    1646             :         /*
    1647             :          * If this is a wakeup or a new instance, the scheduling
    1648             :          * parameters of the task might need updating. Otherwise,
    1649             :          * we want a replenishment of its runtime.
    1650             :          */
    1651           0 :         if (flags & ENQUEUE_WAKEUP) {
    1652           0 :                 task_contending(dl_se, flags);
    1653           0 :                 update_dl_entity(dl_se);
    1654           0 :         } else if (flags & ENQUEUE_REPLENISH) {
    1655           0 :                 replenish_dl_entity(dl_se);
    1656           0 :         } else if ((flags & ENQUEUE_RESTORE) &&
    1657           0 :                   dl_time_before(dl_se->deadline,
    1658             :                                  rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
    1659           0 :                 setup_new_dl_entity(dl_se);
    1660             :         }
    1661             : 
    1662           0 :         __enqueue_dl_entity(dl_se);
    1663           0 : }
    1664             : 
    1665             : static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
    1666             : {
    1667           0 :         __dequeue_dl_entity(dl_se);
    1668             : }
    1669             : 
    1670           0 : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1671             : {
    1672           0 :         if (is_dl_boosted(&p->dl)) {
    1673             :                 /*
    1674             :                  * Because of delays in the detection of the overrun of a
    1675             :                  * thread's runtime, it might be the case that a thread
    1676             :                  * goes to sleep in a rt mutex with negative runtime. As
    1677             :                  * a consequence, the thread will be throttled.
    1678             :                  *
    1679             :                  * While waiting for the mutex, this thread can also be
    1680             :                  * boosted via PI, resulting in a thread that is throttled
    1681             :                  * and boosted at the same time.
    1682             :                  *
    1683             :                  * In this case, the boost overrides the throttle.
    1684             :                  */
    1685           0 :                 if (p->dl.dl_throttled) {
    1686             :                         /*
    1687             :                          * The replenish timer needs to be canceled. No
    1688             :                          * problem if it fires concurrently: boosted threads
    1689             :                          * are ignored in dl_task_timer().
    1690             :                          */
    1691           0 :                         hrtimer_try_to_cancel(&p->dl.dl_timer);
    1692           0 :                         p->dl.dl_throttled = 0;
    1693             :                 }
    1694           0 :         } else if (!dl_prio(p->normal_prio)) {
    1695             :                 /*
    1696             :                  * Special case in which we have a !SCHED_DEADLINE task that is going
    1697             :                  * to be deboosted, but exceeds its runtime while doing so. No point in
    1698             :                  * replenishing it, as it's going to return back to its original
    1699             :                  * scheduling class after this. If it has been throttled, we need to
    1700             :                  * clear the flag, otherwise the task may wake up as throttled after
    1701             :                  * being boosted again with no means to replenish the runtime and clear
    1702             :                  * the throttle.
    1703             :                  */
    1704           0 :                 p->dl.dl_throttled = 0;
    1705           0 :                 if (!(flags & ENQUEUE_REPLENISH))
    1706           0 :                         printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
    1707             :                                              task_pid_nr(p));
    1708             : 
    1709             :                 return;
    1710             :         }
    1711             : 
    1712             :         /*
    1713             :          * Check if a constrained deadline task was activated
    1714             :          * after the deadline but before the next period.
    1715             :          * If that is the case, the task will be throttled and
    1716             :          * the replenishment timer will be set to the next period.
    1717             :          */
    1718           0 :         if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
    1719           0 :                 dl_check_constrained_dl(&p->dl);
    1720             : 
    1721           0 :         if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
    1722           0 :                 add_rq_bw(&p->dl, &rq->dl);
    1723           0 :                 add_running_bw(&p->dl, &rq->dl);
    1724             :         }
    1725             : 
    1726             :         /*
    1727             :          * If p is throttled, we do not enqueue it. In fact, if it exhausted
    1728             :          * its budget it needs a replenishment and, since it now is on
    1729             :          * its rq, the bandwidth timer callback (which clearly has not
    1730             :          * run yet) will take care of this.
    1731             :          * However, the active utilization does not depend on the fact
    1732             :          * that the task is on the runqueue or not (but depends on the
    1733             :          * task's state - in GRUB parlance, "inactive" vs "active contending").
    1734             :          * In other words, even if a task is throttled its utilization must
    1735             :          * be counted in the active utilization; hence, we need to call
    1736             :          * add_running_bw().
    1737             :          */
    1738           0 :         if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
    1739           0 :                 if (flags & ENQUEUE_WAKEUP)
    1740           0 :                         task_contending(&p->dl, flags);
    1741             : 
    1742             :                 return;
    1743             :         }
    1744             : 
    1745             :         check_schedstat_required();
    1746           0 :         update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
    1747             : 
    1748           0 :         enqueue_dl_entity(&p->dl, flags);
    1749             : 
    1750           0 :         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
    1751             :                 enqueue_pushable_dl_task(rq, p);
    1752             : }
    1753             : 
    1754             : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1755             : {
    1756           0 :         update_stats_dequeue_dl(&rq->dl, &p->dl, flags);
    1757           0 :         dequeue_dl_entity(&p->dl);
    1758           0 :         dequeue_pushable_dl_task(rq, p);
    1759             : }
    1760             : 
    1761           0 : static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1762             : {
    1763           0 :         update_curr_dl(rq);
    1764           0 :         __dequeue_task_dl(rq, p, flags);
    1765             : 
    1766           0 :         if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
    1767           0 :                 sub_running_bw(&p->dl, &rq->dl);
    1768           0 :                 sub_rq_bw(&p->dl, &rq->dl);
    1769             :         }
    1770             : 
    1771             :         /*
    1772             :          * This check allows to start the inactive timer (or to immediately
    1773             :          * decrease the active utilization, if needed) in two cases:
    1774             :          * when the task blocks and when it is terminating
    1775             :          * (p->state == TASK_DEAD). We can handle the two cases in the same
    1776             :          * way, because from GRUB's point of view the same thing is happening
    1777             :          * (the task moves from "active contending" to "active non contending"
    1778             :          * or "inactive")
    1779             :          */
    1780           0 :         if (flags & DEQUEUE_SLEEP)
    1781           0 :                 task_non_contending(p);
    1782           0 : }
    1783             : 
    1784             : /*
    1785             :  * Yield task semantic for -deadline tasks is:
    1786             :  *
    1787             :  *   get off from the CPU until our next instance, with
    1788             :  *   a new runtime. This is of little use now, since we
    1789             :  *   don't have a bandwidth reclaiming mechanism. Anyway,
    1790             :  *   bandwidth reclaiming is planned for the future, and
    1791             :  *   yield_task_dl will indicate that some spare budget
    1792             :  *   is available for other task instances to use it.
    1793             :  */
    1794           0 : static void yield_task_dl(struct rq *rq)
    1795             : {
    1796             :         /*
    1797             :          * We make the task go to sleep until its current deadline by
    1798             :          * forcing its runtime to zero. This way, update_curr_dl() stops
    1799             :          * it and the bandwidth timer will wake it up and will give it
    1800             :          * new scheduling parameters (thanks to dl_yielded=1).
    1801             :          */
    1802           0 :         rq->curr->dl.dl_yielded = 1;
    1803             : 
    1804           0 :         update_rq_clock(rq);
    1805           0 :         update_curr_dl(rq);
    1806             :         /*
    1807             :          * Tell update_rq_clock() that we've just updated,
    1808             :          * so we don't do microscopic update in schedule()
    1809             :          * and double the fastpath cost.
    1810             :          */
    1811           0 :         rq_clock_skip_update(rq);
    1812           0 : }
    1813             : 
    1814             : #ifdef CONFIG_SMP
    1815             : 
    1816             : static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
    1817             :                                                  struct rq *rq)
    1818             : {
    1819             :         return (!rq->dl.dl_nr_running ||
    1820             :                 dl_time_before(p->dl.deadline,
    1821             :                                rq->dl.earliest_dl.curr));
    1822             : }
    1823             : 
    1824             : static int find_later_rq(struct task_struct *task);
    1825             : 
    1826             : static int
    1827             : select_task_rq_dl(struct task_struct *p, int cpu, int flags)
    1828             : {
    1829             :         struct task_struct *curr;
    1830             :         bool select_rq;
    1831             :         struct rq *rq;
    1832             : 
    1833             :         if (!(flags & WF_TTWU))
    1834             :                 goto out;
    1835             : 
    1836             :         rq = cpu_rq(cpu);
    1837             : 
    1838             :         rcu_read_lock();
    1839             :         curr = READ_ONCE(rq->curr); /* unlocked access */
    1840             : 
    1841             :         /*
    1842             :          * If we are dealing with a -deadline task, we must
    1843             :          * decide where to wake it up.
    1844             :          * If it has a later deadline and the current task
    1845             :          * on this rq can't move (provided the waking task
    1846             :          * can!) we prefer to send it somewhere else. On the
    1847             :          * other hand, if it has a shorter deadline, we
    1848             :          * try to make it stay here, it might be important.
    1849             :          */
    1850             :         select_rq = unlikely(dl_task(curr)) &&
    1851             :                     (curr->nr_cpus_allowed < 2 ||
    1852             :                      !dl_entity_preempt(&p->dl, &curr->dl)) &&
    1853             :                     p->nr_cpus_allowed > 1;
    1854             : 
    1855             :         /*
    1856             :          * Take the capacity of the CPU into account to
    1857             :          * ensure it fits the requirement of the task.
    1858             :          */
    1859             :         if (sched_asym_cpucap_active())
    1860             :                 select_rq |= !dl_task_fits_capacity(p, cpu);
    1861             : 
    1862             :         if (select_rq) {
    1863             :                 int target = find_later_rq(p);
    1864             : 
    1865             :                 if (target != -1 &&
    1866             :                     dl_task_is_earliest_deadline(p, cpu_rq(target)))
    1867             :                         cpu = target;
    1868             :         }
    1869             :         rcu_read_unlock();
    1870             : 
    1871             : out:
    1872             :         return cpu;
    1873             : }
    1874             : 
    1875             : static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
    1876             : {
    1877             :         struct rq_flags rf;
    1878             :         struct rq *rq;
    1879             : 
    1880             :         if (READ_ONCE(p->__state) != TASK_WAKING)
    1881             :                 return;
    1882             : 
    1883             :         rq = task_rq(p);
    1884             :         /*
    1885             :          * Since p->state == TASK_WAKING, set_task_cpu() has been called
    1886             :          * from try_to_wake_up(). Hence, p->pi_lock is locked, but
    1887             :          * rq->lock is not... So, lock it
    1888             :          */
    1889             :         rq_lock(rq, &rf);
    1890             :         if (p->dl.dl_non_contending) {
    1891             :                 update_rq_clock(rq);
    1892             :                 sub_running_bw(&p->dl, &rq->dl);
    1893             :                 p->dl.dl_non_contending = 0;
    1894             :                 /*
    1895             :                  * If the timer handler is currently running and the
    1896             :                  * timer cannot be canceled, inactive_task_timer()
    1897             :                  * will see that dl_not_contending is not set, and
    1898             :                  * will not touch the rq's active utilization,
    1899             :                  * so we are still safe.
    1900             :                  */
    1901             :                 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
    1902             :                         put_task_struct(p);
    1903             :         }
    1904             :         sub_rq_bw(&p->dl, &rq->dl);
    1905             :         rq_unlock(rq, &rf);
    1906             : }
    1907             : 
    1908             : static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
    1909             : {
    1910             :         /*
    1911             :          * Current can't be migrated, useless to reschedule,
    1912             :          * let's hope p can move out.
    1913             :          */
    1914             :         if (rq->curr->nr_cpus_allowed == 1 ||
    1915             :             !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
    1916             :                 return;
    1917             : 
    1918             :         /*
    1919             :          * p is migratable, so let's not schedule it and
    1920             :          * see if it is pushed or pulled somewhere else.
    1921             :          */
    1922             :         if (p->nr_cpus_allowed != 1 &&
    1923             :             cpudl_find(&rq->rd->cpudl, p, NULL))
    1924             :                 return;
    1925             : 
    1926             :         resched_curr(rq);
    1927             : }
    1928             : 
    1929             : static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1930             : {
    1931             :         if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
    1932             :                 /*
    1933             :                  * This is OK, because current is on_cpu, which avoids it being
    1934             :                  * picked for load-balance and preemption/IRQs are still
    1935             :                  * disabled avoiding further scheduler activity on it and we've
    1936             :                  * not yet started the picking loop.
    1937             :                  */
    1938             :                 rq_unpin_lock(rq, rf);
    1939             :                 pull_dl_task(rq);
    1940             :                 rq_repin_lock(rq, rf);
    1941             :         }
    1942             : 
    1943             :         return sched_stop_runnable(rq) || sched_dl_runnable(rq);
    1944             : }
    1945             : #endif /* CONFIG_SMP */
    1946             : 
    1947             : /*
    1948             :  * Only called when both the current and waking task are -deadline
    1949             :  * tasks.
    1950             :  */
    1951           0 : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
    1952             :                                   int flags)
    1953             : {
    1954           0 :         if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
    1955           0 :                 resched_curr(rq);
    1956           0 :                 return;
    1957             :         }
    1958             : 
    1959             : #ifdef CONFIG_SMP
    1960             :         /*
    1961             :          * In the unlikely case current and p have the same deadline
    1962             :          * let us try to decide what's the best thing to do...
    1963             :          */
    1964             :         if ((p->dl.deadline == rq->curr->dl.deadline) &&
    1965             :             !test_tsk_need_resched(rq->curr))
    1966             :                 check_preempt_equal_dl(rq, p);
    1967             : #endif /* CONFIG_SMP */
    1968             : }
    1969             : 
    1970             : #ifdef CONFIG_SCHED_HRTICK
    1971             : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
    1972             : {
    1973             :         hrtick_start(rq, p->dl.runtime);
    1974             : }
    1975             : #else /* !CONFIG_SCHED_HRTICK */
    1976             : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
    1977             : {
    1978             : }
    1979             : #endif
    1980             : 
    1981           0 : static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
    1982             : {
    1983           0 :         struct sched_dl_entity *dl_se = &p->dl;
    1984           0 :         struct dl_rq *dl_rq = &rq->dl;
    1985             : 
    1986           0 :         p->se.exec_start = rq_clock_task(rq);
    1987           0 :         if (on_dl_rq(&p->dl))
    1988             :                 update_stats_wait_end_dl(dl_rq, dl_se);
    1989             : 
    1990             :         /* You can't push away the running task */
    1991           0 :         dequeue_pushable_dl_task(rq, p);
    1992             : 
    1993           0 :         if (!first)
    1994             :                 return;
    1995             : 
    1996           0 :         if (hrtick_enabled_dl(rq))
    1997             :                 start_hrtick_dl(rq, p);
    1998             : 
    1999           0 :         if (rq->curr->sched_class != &dl_sched_class)
    2000           0 :                 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    2001             : 
    2002             :         deadline_queue_push_tasks(rq);
    2003             : }
    2004             : 
    2005             : static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
    2006             : {
    2007           0 :         struct rb_node *left = rb_first_cached(&dl_rq->root);
    2008             : 
    2009           0 :         if (!left)
    2010             :                 return NULL;
    2011             : 
    2012           0 :         return __node_2_dle(left);
    2013             : }
    2014             : 
    2015           0 : static struct task_struct *pick_task_dl(struct rq *rq)
    2016             : {
    2017             :         struct sched_dl_entity *dl_se;
    2018           0 :         struct dl_rq *dl_rq = &rq->dl;
    2019             :         struct task_struct *p;
    2020             : 
    2021           0 :         if (!sched_dl_runnable(rq))
    2022             :                 return NULL;
    2023             : 
    2024           0 :         dl_se = pick_next_dl_entity(dl_rq);
    2025           0 :         WARN_ON_ONCE(!dl_se);
    2026           0 :         p = dl_task_of(dl_se);
    2027             : 
    2028           0 :         return p;
    2029             : }
    2030             : 
    2031           0 : static struct task_struct *pick_next_task_dl(struct rq *rq)
    2032             : {
    2033             :         struct task_struct *p;
    2034             : 
    2035           0 :         p = pick_task_dl(rq);
    2036           0 :         if (p)
    2037           0 :                 set_next_task_dl(rq, p, true);
    2038             : 
    2039           0 :         return p;
    2040             : }
    2041             : 
    2042           0 : static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
    2043             : {
    2044           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2045           0 :         struct dl_rq *dl_rq = &rq->dl;
    2046             : 
    2047           0 :         if (on_dl_rq(&p->dl))
    2048             :                 update_stats_wait_start_dl(dl_rq, dl_se);
    2049             : 
    2050           0 :         update_curr_dl(rq);
    2051             : 
    2052           0 :         update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2053           0 :         if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
    2054             :                 enqueue_pushable_dl_task(rq, p);
    2055           0 : }
    2056             : 
    2057             : /*
    2058             :  * scheduler tick hitting a task of our scheduling class.
    2059             :  *
    2060             :  * NOTE: This function can be called remotely by the tick offload that
    2061             :  * goes along full dynticks. Therefore no local assumption can be made
    2062             :  * and everything must be accessed through the @rq and @curr passed in
    2063             :  * parameters.
    2064             :  */
    2065           0 : static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
    2066             : {
    2067           0 :         update_curr_dl(rq);
    2068             : 
    2069           0 :         update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2070             :         /*
    2071             :          * Even when we have runtime, update_curr_dl() might have resulted in us
    2072             :          * not being the leftmost task anymore. In that case NEED_RESCHED will
    2073             :          * be set and schedule() will start a new hrtick for the next task.
    2074             :          */
    2075           0 :         if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 &&
    2076             :             is_leftmost(p, &rq->dl))
    2077             :                 start_hrtick_dl(rq, p);
    2078           0 : }
    2079             : 
    2080           0 : static void task_fork_dl(struct task_struct *p)
    2081             : {
    2082             :         /*
    2083             :          * SCHED_DEADLINE tasks cannot fork and this is achieved through
    2084             :          * sched_fork()
    2085             :          */
    2086           0 : }
    2087             : 
    2088             : #ifdef CONFIG_SMP
    2089             : 
    2090             : /* Only try algorithms three times */
    2091             : #define DL_MAX_TRIES 3
    2092             : 
    2093             : static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
    2094             : {
    2095             :         if (!task_on_cpu(rq, p) &&
    2096             :             cpumask_test_cpu(cpu, &p->cpus_mask))
    2097             :                 return 1;
    2098             :         return 0;
    2099             : }
    2100             : 
    2101             : /*
    2102             :  * Return the earliest pushable rq's task, which is suitable to be executed
    2103             :  * on the CPU, NULL otherwise:
    2104             :  */
    2105             : static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
    2106             : {
    2107             :         struct task_struct *p = NULL;
    2108             :         struct rb_node *next_node;
    2109             : 
    2110             :         if (!has_pushable_dl_tasks(rq))
    2111             :                 return NULL;
    2112             : 
    2113             :         next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
    2114             : 
    2115             : next_node:
    2116             :         if (next_node) {
    2117             :                 p = __node_2_pdl(next_node);
    2118             : 
    2119             :                 if (pick_dl_task(rq, p, cpu))
    2120             :                         return p;
    2121             : 
    2122             :                 next_node = rb_next(next_node);
    2123             :                 goto next_node;
    2124             :         }
    2125             : 
    2126             :         return NULL;
    2127             : }
    2128             : 
    2129             : static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
    2130             : 
    2131             : static int find_later_rq(struct task_struct *task)
    2132             : {
    2133             :         struct sched_domain *sd;
    2134             :         struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
    2135             :         int this_cpu = smp_processor_id();
    2136             :         int cpu = task_cpu(task);
    2137             : 
    2138             :         /* Make sure the mask is initialized first */
    2139             :         if (unlikely(!later_mask))
    2140             :                 return -1;
    2141             : 
    2142             :         if (task->nr_cpus_allowed == 1)
    2143             :                 return -1;
    2144             : 
    2145             :         /*
    2146             :          * We have to consider system topology and task affinity
    2147             :          * first, then we can look for a suitable CPU.
    2148             :          */
    2149             :         if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
    2150             :                 return -1;
    2151             : 
    2152             :         /*
    2153             :          * If we are here, some targets have been found, including
    2154             :          * the most suitable which is, among the runqueues where the
    2155             :          * current tasks have later deadlines than the task's one, the
    2156             :          * rq with the latest possible one.
    2157             :          *
    2158             :          * Now we check how well this matches with task's
    2159             :          * affinity and system topology.
    2160             :          *
    2161             :          * The last CPU where the task run is our first
    2162             :          * guess, since it is most likely cache-hot there.
    2163             :          */
    2164             :         if (cpumask_test_cpu(cpu, later_mask))
    2165             :                 return cpu;
    2166             :         /*
    2167             :          * Check if this_cpu is to be skipped (i.e., it is
    2168             :          * not in the mask) or not.
    2169             :          */
    2170             :         if (!cpumask_test_cpu(this_cpu, later_mask))
    2171             :                 this_cpu = -1;
    2172             : 
    2173             :         rcu_read_lock();
    2174             :         for_each_domain(cpu, sd) {
    2175             :                 if (sd->flags & SD_WAKE_AFFINE) {
    2176             :                         int best_cpu;
    2177             : 
    2178             :                         /*
    2179             :                          * If possible, preempting this_cpu is
    2180             :                          * cheaper than migrating.
    2181             :                          */
    2182             :                         if (this_cpu != -1 &&
    2183             :                             cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
    2184             :                                 rcu_read_unlock();
    2185             :                                 return this_cpu;
    2186             :                         }
    2187             : 
    2188             :                         best_cpu = cpumask_any_and_distribute(later_mask,
    2189             :                                                               sched_domain_span(sd));
    2190             :                         /*
    2191             :                          * Last chance: if a CPU being in both later_mask
    2192             :                          * and current sd span is valid, that becomes our
    2193             :                          * choice. Of course, the latest possible CPU is
    2194             :                          * already under consideration through later_mask.
    2195             :                          */
    2196             :                         if (best_cpu < nr_cpu_ids) {
    2197             :                                 rcu_read_unlock();
    2198             :                                 return best_cpu;
    2199             :                         }
    2200             :                 }
    2201             :         }
    2202             :         rcu_read_unlock();
    2203             : 
    2204             :         /*
    2205             :          * At this point, all our guesses failed, we just return
    2206             :          * 'something', and let the caller sort the things out.
    2207             :          */
    2208             :         if (this_cpu != -1)
    2209             :                 return this_cpu;
    2210             : 
    2211             :         cpu = cpumask_any_distribute(later_mask);
    2212             :         if (cpu < nr_cpu_ids)
    2213             :                 return cpu;
    2214             : 
    2215             :         return -1;
    2216             : }
    2217             : 
    2218             : /* Locks the rq it finds */
    2219             : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
    2220             : {
    2221             :         struct rq *later_rq = NULL;
    2222             :         int tries;
    2223             :         int cpu;
    2224             : 
    2225             :         for (tries = 0; tries < DL_MAX_TRIES; tries++) {
    2226             :                 cpu = find_later_rq(task);
    2227             : 
    2228             :                 if ((cpu == -1) || (cpu == rq->cpu))
    2229             :                         break;
    2230             : 
    2231             :                 later_rq = cpu_rq(cpu);
    2232             : 
    2233             :                 if (!dl_task_is_earliest_deadline(task, later_rq)) {
    2234             :                         /*
    2235             :                          * Target rq has tasks of equal or earlier deadline,
    2236             :                          * retrying does not release any lock and is unlikely
    2237             :                          * to yield a different result.
    2238             :                          */
    2239             :                         later_rq = NULL;
    2240             :                         break;
    2241             :                 }
    2242             : 
    2243             :                 /* Retry if something changed. */
    2244             :                 if (double_lock_balance(rq, later_rq)) {
    2245             :                         if (unlikely(task_rq(task) != rq ||
    2246             :                                      !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
    2247             :                                      task_on_cpu(rq, task) ||
    2248             :                                      !dl_task(task) ||
    2249             :                                      !task_on_rq_queued(task))) {
    2250             :                                 double_unlock_balance(rq, later_rq);
    2251             :                                 later_rq = NULL;
    2252             :                                 break;
    2253             :                         }
    2254             :                 }
    2255             : 
    2256             :                 /*
    2257             :                  * If the rq we found has no -deadline task, or
    2258             :                  * its earliest one has a later deadline than our
    2259             :                  * task, the rq is a good one.
    2260             :                  */
    2261             :                 if (dl_task_is_earliest_deadline(task, later_rq))
    2262             :                         break;
    2263             : 
    2264             :                 /* Otherwise we try again. */
    2265             :                 double_unlock_balance(rq, later_rq);
    2266             :                 later_rq = NULL;
    2267             :         }
    2268             : 
    2269             :         return later_rq;
    2270             : }
    2271             : 
    2272             : static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
    2273             : {
    2274             :         struct task_struct *p;
    2275             : 
    2276             :         if (!has_pushable_dl_tasks(rq))
    2277             :                 return NULL;
    2278             : 
    2279             :         p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
    2280             : 
    2281             :         WARN_ON_ONCE(rq->cpu != task_cpu(p));
    2282             :         WARN_ON_ONCE(task_current(rq, p));
    2283             :         WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
    2284             : 
    2285             :         WARN_ON_ONCE(!task_on_rq_queued(p));
    2286             :         WARN_ON_ONCE(!dl_task(p));
    2287             : 
    2288             :         return p;
    2289             : }
    2290             : 
    2291             : /*
    2292             :  * See if the non running -deadline tasks on this rq
    2293             :  * can be sent to some other CPU where they can preempt
    2294             :  * and start executing.
    2295             :  */
    2296             : static int push_dl_task(struct rq *rq)
    2297             : {
    2298             :         struct task_struct *next_task;
    2299             :         struct rq *later_rq;
    2300             :         int ret = 0;
    2301             : 
    2302             :         if (!rq->dl.overloaded)
    2303             :                 return 0;
    2304             : 
    2305             :         next_task = pick_next_pushable_dl_task(rq);
    2306             :         if (!next_task)
    2307             :                 return 0;
    2308             : 
    2309             : retry:
    2310             :         /*
    2311             :          * If next_task preempts rq->curr, and rq->curr
    2312             :          * can move away, it makes sense to just reschedule
    2313             :          * without going further in pushing next_task.
    2314             :          */
    2315             :         if (dl_task(rq->curr) &&
    2316             :             dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
    2317             :             rq->curr->nr_cpus_allowed > 1) {
    2318             :                 resched_curr(rq);
    2319             :                 return 0;
    2320             :         }
    2321             : 
    2322             :         if (is_migration_disabled(next_task))
    2323             :                 return 0;
    2324             : 
    2325             :         if (WARN_ON(next_task == rq->curr))
    2326             :                 return 0;
    2327             : 
    2328             :         /* We might release rq lock */
    2329             :         get_task_struct(next_task);
    2330             : 
    2331             :         /* Will lock the rq it'll find */
    2332             :         later_rq = find_lock_later_rq(next_task, rq);
    2333             :         if (!later_rq) {
    2334             :                 struct task_struct *task;
    2335             : 
    2336             :                 /*
    2337             :                  * We must check all this again, since
    2338             :                  * find_lock_later_rq releases rq->lock and it is
    2339             :                  * then possible that next_task has migrated.
    2340             :                  */
    2341             :                 task = pick_next_pushable_dl_task(rq);
    2342             :                 if (task == next_task) {
    2343             :                         /*
    2344             :                          * The task is still there. We don't try
    2345             :                          * again, some other CPU will pull it when ready.
    2346             :                          */
    2347             :                         goto out;
    2348             :                 }
    2349             : 
    2350             :                 if (!task)
    2351             :                         /* No more tasks */
    2352             :                         goto out;
    2353             : 
    2354             :                 put_task_struct(next_task);
    2355             :                 next_task = task;
    2356             :                 goto retry;
    2357             :         }
    2358             : 
    2359             :         deactivate_task(rq, next_task, 0);
    2360             :         set_task_cpu(next_task, later_rq->cpu);
    2361             :         activate_task(later_rq, next_task, 0);
    2362             :         ret = 1;
    2363             : 
    2364             :         resched_curr(later_rq);
    2365             : 
    2366             :         double_unlock_balance(rq, later_rq);
    2367             : 
    2368             : out:
    2369             :         put_task_struct(next_task);
    2370             : 
    2371             :         return ret;
    2372             : }
    2373             : 
    2374             : static void push_dl_tasks(struct rq *rq)
    2375             : {
    2376             :         /* push_dl_task() will return true if it moved a -deadline task */
    2377             :         while (push_dl_task(rq))
    2378             :                 ;
    2379             : }
    2380             : 
    2381             : static void pull_dl_task(struct rq *this_rq)
    2382             : {
    2383             :         int this_cpu = this_rq->cpu, cpu;
    2384             :         struct task_struct *p, *push_task;
    2385             :         bool resched = false;
    2386             :         struct rq *src_rq;
    2387             :         u64 dmin = LONG_MAX;
    2388             : 
    2389             :         if (likely(!dl_overloaded(this_rq)))
    2390             :                 return;
    2391             : 
    2392             :         /*
    2393             :          * Match the barrier from dl_set_overloaded; this guarantees that if we
    2394             :          * see overloaded we must also see the dlo_mask bit.
    2395             :          */
    2396             :         smp_rmb();
    2397             : 
    2398             :         for_each_cpu(cpu, this_rq->rd->dlo_mask) {
    2399             :                 if (this_cpu == cpu)
    2400             :                         continue;
    2401             : 
    2402             :                 src_rq = cpu_rq(cpu);
    2403             : 
    2404             :                 /*
    2405             :                  * It looks racy, abd it is! However, as in sched_rt.c,
    2406             :                  * we are fine with this.
    2407             :                  */
    2408             :                 if (this_rq->dl.dl_nr_running &&
    2409             :                     dl_time_before(this_rq->dl.earliest_dl.curr,
    2410             :                                    src_rq->dl.earliest_dl.next))
    2411             :                         continue;
    2412             : 
    2413             :                 /* Might drop this_rq->lock */
    2414             :                 push_task = NULL;
    2415             :                 double_lock_balance(this_rq, src_rq);
    2416             : 
    2417             :                 /*
    2418             :                  * If there are no more pullable tasks on the
    2419             :                  * rq, we're done with it.
    2420             :                  */
    2421             :                 if (src_rq->dl.dl_nr_running <= 1)
    2422             :                         goto skip;
    2423             : 
    2424             :                 p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
    2425             : 
    2426             :                 /*
    2427             :                  * We found a task to be pulled if:
    2428             :                  *  - it preempts our current (if there's one),
    2429             :                  *  - it will preempt the last one we pulled (if any).
    2430             :                  */
    2431             :                 if (p && dl_time_before(p->dl.deadline, dmin) &&
    2432             :                     dl_task_is_earliest_deadline(p, this_rq)) {
    2433             :                         WARN_ON(p == src_rq->curr);
    2434             :                         WARN_ON(!task_on_rq_queued(p));
    2435             : 
    2436             :                         /*
    2437             :                          * Then we pull iff p has actually an earlier
    2438             :                          * deadline than the current task of its runqueue.
    2439             :                          */
    2440             :                         if (dl_time_before(p->dl.deadline,
    2441             :                                            src_rq->curr->dl.deadline))
    2442             :                                 goto skip;
    2443             : 
    2444             :                         if (is_migration_disabled(p)) {
    2445             :                                 push_task = get_push_task(src_rq);
    2446             :                         } else {
    2447             :                                 deactivate_task(src_rq, p, 0);
    2448             :                                 set_task_cpu(p, this_cpu);
    2449             :                                 activate_task(this_rq, p, 0);
    2450             :                                 dmin = p->dl.deadline;
    2451             :                                 resched = true;
    2452             :                         }
    2453             : 
    2454             :                         /* Is there any other task even earlier? */
    2455             :                 }
    2456             : skip:
    2457             :                 double_unlock_balance(this_rq, src_rq);
    2458             : 
    2459             :                 if (push_task) {
    2460             :                         raw_spin_rq_unlock(this_rq);
    2461             :                         stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
    2462             :                                             push_task, &src_rq->push_work);
    2463             :                         raw_spin_rq_lock(this_rq);
    2464             :                 }
    2465             :         }
    2466             : 
    2467             :         if (resched)
    2468             :                 resched_curr(this_rq);
    2469             : }
    2470             : 
    2471             : /*
    2472             :  * Since the task is not running and a reschedule is not going to happen
    2473             :  * anytime soon on its runqueue, we try pushing it away now.
    2474             :  */
    2475             : static void task_woken_dl(struct rq *rq, struct task_struct *p)
    2476             : {
    2477             :         if (!task_on_cpu(rq, p) &&
    2478             :             !test_tsk_need_resched(rq->curr) &&
    2479             :             p->nr_cpus_allowed > 1 &&
    2480             :             dl_task(rq->curr) &&
    2481             :             (rq->curr->nr_cpus_allowed < 2 ||
    2482             :              !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
    2483             :                 push_dl_tasks(rq);
    2484             :         }
    2485             : }
    2486             : 
    2487             : static void set_cpus_allowed_dl(struct task_struct *p,
    2488             :                                 struct affinity_context *ctx)
    2489             : {
    2490             :         struct root_domain *src_rd;
    2491             :         struct rq *rq;
    2492             : 
    2493             :         WARN_ON_ONCE(!dl_task(p));
    2494             : 
    2495             :         rq = task_rq(p);
    2496             :         src_rd = rq->rd;
    2497             :         /*
    2498             :          * Migrating a SCHED_DEADLINE task between exclusive
    2499             :          * cpusets (different root_domains) entails a bandwidth
    2500             :          * update. We already made space for us in the destination
    2501             :          * domain (see cpuset_can_attach()).
    2502             :          */
    2503             :         if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
    2504             :                 struct dl_bw *src_dl_b;
    2505             : 
    2506             :                 src_dl_b = dl_bw_of(cpu_of(rq));
    2507             :                 /*
    2508             :                  * We now free resources of the root_domain we are migrating
    2509             :                  * off. In the worst case, sched_setattr() may temporary fail
    2510             :                  * until we complete the update.
    2511             :                  */
    2512             :                 raw_spin_lock(&src_dl_b->lock);
    2513             :                 __dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
    2514             :                 raw_spin_unlock(&src_dl_b->lock);
    2515             :         }
    2516             : 
    2517             :         set_cpus_allowed_common(p, ctx);
    2518             : }
    2519             : 
    2520             : /* Assumes rq->lock is held */
    2521             : static void rq_online_dl(struct rq *rq)
    2522             : {
    2523             :         if (rq->dl.overloaded)
    2524             :                 dl_set_overload(rq);
    2525             : 
    2526             :         cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
    2527             :         if (rq->dl.dl_nr_running > 0)
    2528             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
    2529             : }
    2530             : 
    2531             : /* Assumes rq->lock is held */
    2532             : static void rq_offline_dl(struct rq *rq)
    2533             : {
    2534             :         if (rq->dl.overloaded)
    2535             :                 dl_clear_overload(rq);
    2536             : 
    2537             :         cpudl_clear(&rq->rd->cpudl, rq->cpu);
    2538             :         cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
    2539             : }
    2540             : 
    2541             : void __init init_sched_dl_class(void)
    2542             : {
    2543             :         unsigned int i;
    2544             : 
    2545             :         for_each_possible_cpu(i)
    2546             :                 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
    2547             :                                         GFP_KERNEL, cpu_to_node(i));
    2548             : }
    2549             : 
    2550             : void dl_add_task_root_domain(struct task_struct *p)
    2551             : {
    2552             :         struct rq_flags rf;
    2553             :         struct rq *rq;
    2554             :         struct dl_bw *dl_b;
    2555             : 
    2556             :         raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
    2557             :         if (!dl_task(p)) {
    2558             :                 raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
    2559             :                 return;
    2560             :         }
    2561             : 
    2562             :         rq = __task_rq_lock(p, &rf);
    2563             : 
    2564             :         dl_b = &rq->rd->dl_bw;
    2565             :         raw_spin_lock(&dl_b->lock);
    2566             : 
    2567             :         __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
    2568             : 
    2569             :         raw_spin_unlock(&dl_b->lock);
    2570             : 
    2571             :         task_rq_unlock(rq, p, &rf);
    2572             : }
    2573             : 
    2574             : void dl_clear_root_domain(struct root_domain *rd)
    2575             : {
    2576             :         unsigned long flags;
    2577             : 
    2578             :         raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
    2579             :         rd->dl_bw.total_bw = 0;
    2580             :         raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
    2581             : }
    2582             : 
    2583             : #endif /* CONFIG_SMP */
    2584             : 
    2585           0 : static void switched_from_dl(struct rq *rq, struct task_struct *p)
    2586             : {
    2587             :         /*
    2588             :          * task_non_contending() can start the "inactive timer" (if the 0-lag
    2589             :          * time is in the future). If the task switches back to dl before
    2590             :          * the "inactive timer" fires, it can continue to consume its current
    2591             :          * runtime using its current deadline. If it stays outside of
    2592             :          * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
    2593             :          * will reset the task parameters.
    2594             :          */
    2595           0 :         if (task_on_rq_queued(p) && p->dl.dl_runtime)
    2596           0 :                 task_non_contending(p);
    2597             : 
    2598           0 :         if (!task_on_rq_queued(p)) {
    2599             :                 /*
    2600             :                  * Inactive timer is armed. However, p is leaving DEADLINE and
    2601             :                  * might migrate away from this rq while continuing to run on
    2602             :                  * some other class. We need to remove its contribution from
    2603             :                  * this rq running_bw now, or sub_rq_bw (below) will complain.
    2604             :                  */
    2605           0 :                 if (p->dl.dl_non_contending)
    2606           0 :                         sub_running_bw(&p->dl, &rq->dl);
    2607           0 :                 sub_rq_bw(&p->dl, &rq->dl);
    2608             :         }
    2609             : 
    2610             :         /*
    2611             :          * We cannot use inactive_task_timer() to invoke sub_running_bw()
    2612             :          * at the 0-lag time, because the task could have been migrated
    2613             :          * while SCHED_OTHER in the meanwhile.
    2614             :          */
    2615           0 :         if (p->dl.dl_non_contending)
    2616           0 :                 p->dl.dl_non_contending = 0;
    2617             : 
    2618             :         /*
    2619             :          * Since this might be the only -deadline task on the rq,
    2620             :          * this is the right place to try to pull some other one
    2621             :          * from an overloaded CPU, if any.
    2622             :          */
    2623           0 :         if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
    2624             :                 return;
    2625             : 
    2626             :         deadline_queue_pull_task(rq);
    2627             : }
    2628             : 
    2629             : /*
    2630             :  * When switching to -deadline, we may overload the rq, then
    2631             :  * we try to push someone off, if possible.
    2632             :  */
    2633           0 : static void switched_to_dl(struct rq *rq, struct task_struct *p)
    2634             : {
    2635           0 :         if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
    2636           0 :                 put_task_struct(p);
    2637             : 
    2638             :         /* If p is not queued we will update its parameters at next wakeup. */
    2639           0 :         if (!task_on_rq_queued(p)) {
    2640           0 :                 add_rq_bw(&p->dl, &rq->dl);
    2641             : 
    2642             :                 return;
    2643             :         }
    2644             : 
    2645           0 :         if (rq->curr != p) {
    2646             : #ifdef CONFIG_SMP
    2647             :                 if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
    2648             :                         deadline_queue_push_tasks(rq);
    2649             : #endif
    2650           0 :                 if (dl_task(rq->curr))
    2651             :                         check_preempt_curr_dl(rq, p, 0);
    2652             :                 else
    2653           0 :                         resched_curr(rq);
    2654             :         } else {
    2655           0 :                 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    2656             :         }
    2657             : }
    2658             : 
    2659             : /*
    2660             :  * If the scheduling parameters of a -deadline task changed,
    2661             :  * a push or pull operation might be needed.
    2662             :  */
    2663           0 : static void prio_changed_dl(struct rq *rq, struct task_struct *p,
    2664             :                             int oldprio)
    2665             : {
    2666           0 :         if (!task_on_rq_queued(p))
    2667             :                 return;
    2668             : 
    2669             : #ifdef CONFIG_SMP
    2670             :         /*
    2671             :          * This might be too much, but unfortunately
    2672             :          * we don't have the old deadline value, and
    2673             :          * we can't argue if the task is increasing
    2674             :          * or lowering its prio, so...
    2675             :          */
    2676             :         if (!rq->dl.overloaded)
    2677             :                 deadline_queue_pull_task(rq);
    2678             : 
    2679             :         if (task_current(rq, p)) {
    2680             :                 /*
    2681             :                  * If we now have a earlier deadline task than p,
    2682             :                  * then reschedule, provided p is still on this
    2683             :                  * runqueue.
    2684             :                  */
    2685             :                 if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
    2686             :                         resched_curr(rq);
    2687             :         } else {
    2688             :                 /*
    2689             :                  * Current may not be deadline in case p was throttled but we
    2690             :                  * have just replenished it (e.g. rt_mutex_setprio()).
    2691             :                  *
    2692             :                  * Otherwise, if p was given an earlier deadline, reschedule.
    2693             :                  */
    2694             :                 if (!dl_task(rq->curr) ||
    2695             :                     dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
    2696             :                         resched_curr(rq);
    2697             :         }
    2698             : #else
    2699             :         /*
    2700             :          * We don't know if p has a earlier or later deadline, so let's blindly
    2701             :          * set a (maybe not needed) rescheduling point.
    2702             :          */
    2703           0 :         resched_curr(rq);
    2704             : #endif
    2705             : }
    2706             : 
    2707             : DEFINE_SCHED_CLASS(dl) = {
    2708             : 
    2709             :         .enqueue_task           = enqueue_task_dl,
    2710             :         .dequeue_task           = dequeue_task_dl,
    2711             :         .yield_task             = yield_task_dl,
    2712             : 
    2713             :         .check_preempt_curr     = check_preempt_curr_dl,
    2714             : 
    2715             :         .pick_next_task         = pick_next_task_dl,
    2716             :         .put_prev_task          = put_prev_task_dl,
    2717             :         .set_next_task          = set_next_task_dl,
    2718             : 
    2719             : #ifdef CONFIG_SMP
    2720             :         .balance                = balance_dl,
    2721             :         .pick_task              = pick_task_dl,
    2722             :         .select_task_rq         = select_task_rq_dl,
    2723             :         .migrate_task_rq        = migrate_task_rq_dl,
    2724             :         .set_cpus_allowed       = set_cpus_allowed_dl,
    2725             :         .rq_online              = rq_online_dl,
    2726             :         .rq_offline             = rq_offline_dl,
    2727             :         .task_woken             = task_woken_dl,
    2728             :         .find_lock_rq           = find_lock_later_rq,
    2729             : #endif
    2730             : 
    2731             :         .task_tick              = task_tick_dl,
    2732             :         .task_fork              = task_fork_dl,
    2733             : 
    2734             :         .prio_changed           = prio_changed_dl,
    2735             :         .switched_from          = switched_from_dl,
    2736             :         .switched_to            = switched_to_dl,
    2737             : 
    2738             :         .update_curr            = update_curr_dl,
    2739             : };
    2740             : 
    2741             : /* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
    2742             : static u64 dl_generation;
    2743             : 
    2744           0 : int sched_dl_global_validate(void)
    2745             : {
    2746           0 :         u64 runtime = global_rt_runtime();
    2747           0 :         u64 period = global_rt_period();
    2748           0 :         u64 new_bw = to_ratio(period, runtime);
    2749           0 :         u64 gen = ++dl_generation;
    2750             :         struct dl_bw *dl_b;
    2751           0 :         int cpu, cpus, ret = 0;
    2752             :         unsigned long flags;
    2753             : 
    2754             :         /*
    2755             :          * Here we want to check the bandwidth not being set to some
    2756             :          * value smaller than the currently allocated bandwidth in
    2757             :          * any of the root_domains.
    2758             :          */
    2759           0 :         for_each_possible_cpu(cpu) {
    2760             :                 rcu_read_lock_sched();
    2761             : 
    2762           0 :                 if (dl_bw_visited(cpu, gen))
    2763             :                         goto next;
    2764             : 
    2765           0 :                 dl_b = dl_bw_of(cpu);
    2766           0 :                 cpus = dl_bw_cpus(cpu);
    2767             : 
    2768           0 :                 raw_spin_lock_irqsave(&dl_b->lock, flags);
    2769           0 :                 if (new_bw * cpus < dl_b->total_bw)
    2770           0 :                         ret = -EBUSY;
    2771           0 :                 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    2772             : 
    2773             : next:
    2774             :                 rcu_read_unlock_sched();
    2775             : 
    2776           0 :                 if (ret)
    2777             :                         break;
    2778             :         }
    2779             : 
    2780           0 :         return ret;
    2781             : }
    2782             : 
    2783           1 : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
    2784             : {
    2785           1 :         if (global_rt_runtime() == RUNTIME_INF) {
    2786           0 :                 dl_rq->bw_ratio = 1 << RATIO_SHIFT;
    2787           0 :                 dl_rq->extra_bw = 1 << BW_SHIFT;
    2788             :         } else {
    2789           2 :                 dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
    2790           1 :                           global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
    2791           1 :                 dl_rq->extra_bw = to_ratio(global_rt_period(),
    2792             :                                                     global_rt_runtime());
    2793             :         }
    2794           1 : }
    2795             : 
    2796           0 : void sched_dl_do_global(void)
    2797             : {
    2798           0 :         u64 new_bw = -1;
    2799           0 :         u64 gen = ++dl_generation;
    2800             :         struct dl_bw *dl_b;
    2801             :         int cpu;
    2802             :         unsigned long flags;
    2803             : 
    2804           0 :         if (global_rt_runtime() != RUNTIME_INF)
    2805           0 :                 new_bw = to_ratio(global_rt_period(), global_rt_runtime());
    2806             : 
    2807           0 :         for_each_possible_cpu(cpu) {
    2808             :                 rcu_read_lock_sched();
    2809             : 
    2810           0 :                 if (dl_bw_visited(cpu, gen)) {
    2811             :                         rcu_read_unlock_sched();
    2812             :                         continue;
    2813             :                 }
    2814             : 
    2815           0 :                 dl_b = dl_bw_of(cpu);
    2816             : 
    2817           0 :                 raw_spin_lock_irqsave(&dl_b->lock, flags);
    2818           0 :                 dl_b->bw = new_bw;
    2819           0 :                 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    2820             : 
    2821             :                 rcu_read_unlock_sched();
    2822           0 :                 init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
    2823             :         }
    2824           0 : }
    2825             : 
    2826             : /*
    2827             :  * We must be sure that accepting a new task (or allowing changing the
    2828             :  * parameters of an existing one) is consistent with the bandwidth
    2829             :  * constraints. If yes, this function also accordingly updates the currently
    2830             :  * allocated bandwidth to reflect the new situation.
    2831             :  *
    2832             :  * This function is called while holding p's rq->lock.
    2833             :  */
    2834           0 : int sched_dl_overflow(struct task_struct *p, int policy,
    2835             :                       const struct sched_attr *attr)
    2836             : {
    2837           0 :         u64 period = attr->sched_period ?: attr->sched_deadline;
    2838           0 :         u64 runtime = attr->sched_runtime;
    2839           0 :         u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
    2840           0 :         int cpus, err = -1, cpu = task_cpu(p);
    2841           0 :         struct dl_bw *dl_b = dl_bw_of(cpu);
    2842             :         unsigned long cap;
    2843             : 
    2844           0 :         if (attr->sched_flags & SCHED_FLAG_SUGOV)
    2845             :                 return 0;
    2846             : 
    2847             :         /* !deadline task may carry old deadline bandwidth */
    2848           0 :         if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
    2849             :                 return 0;
    2850             : 
    2851             :         /*
    2852             :          * Either if a task, enters, leave, or stays -deadline but changes
    2853             :          * its parameters, we may need to update accordingly the total
    2854             :          * allocated bandwidth of the container.
    2855             :          */
    2856           0 :         raw_spin_lock(&dl_b->lock);
    2857           0 :         cpus = dl_bw_cpus(cpu);
    2858           0 :         cap = dl_bw_capacity(cpu);
    2859             : 
    2860           0 :         if (dl_policy(policy) && !task_has_dl_policy(p) &&
    2861           0 :             !__dl_overflow(dl_b, cap, 0, new_bw)) {
    2862           0 :                 if (hrtimer_active(&p->dl.inactive_timer))
    2863           0 :                         __dl_sub(dl_b, p->dl.dl_bw, cpus);
    2864           0 :                 __dl_add(dl_b, new_bw, cpus);
    2865           0 :                 err = 0;
    2866           0 :         } else if (dl_policy(policy) && task_has_dl_policy(p) &&
    2867           0 :                    !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
    2868             :                 /*
    2869             :                  * XXX this is slightly incorrect: when the task
    2870             :                  * utilization decreases, we should delay the total
    2871             :                  * utilization change until the task's 0-lag point.
    2872             :                  * But this would require to set the task's "inactive
    2873             :                  * timer" when the task is not inactive.
    2874             :                  */
    2875           0 :                 __dl_sub(dl_b, p->dl.dl_bw, cpus);
    2876           0 :                 __dl_add(dl_b, new_bw, cpus);
    2877           0 :                 dl_change_utilization(p, new_bw);
    2878           0 :                 err = 0;
    2879           0 :         } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
    2880             :                 /*
    2881             :                  * Do not decrease the total deadline utilization here,
    2882             :                  * switched_from_dl() will take care to do it at the correct
    2883             :                  * (0-lag) time.
    2884             :                  */
    2885           0 :                 err = 0;
    2886             :         }
    2887           0 :         raw_spin_unlock(&dl_b->lock);
    2888             : 
    2889           0 :         return err;
    2890             : }
    2891             : 
    2892             : /*
    2893             :  * This function initializes the sched_dl_entity of a newly becoming
    2894             :  * SCHED_DEADLINE task.
    2895             :  *
    2896             :  * Only the static values are considered here, the actual runtime and the
    2897             :  * absolute deadline will be properly calculated when the task is enqueued
    2898             :  * for the first time with its new policy.
    2899             :  */
    2900           0 : void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
    2901             : {
    2902           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2903             : 
    2904           0 :         dl_se->dl_runtime = attr->sched_runtime;
    2905           0 :         dl_se->dl_deadline = attr->sched_deadline;
    2906           0 :         dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
    2907           0 :         dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
    2908           0 :         dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
    2909           0 :         dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
    2910           0 : }
    2911             : 
    2912           0 : void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
    2913             : {
    2914           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2915             : 
    2916           0 :         attr->sched_priority = p->rt_priority;
    2917           0 :         attr->sched_runtime = dl_se->dl_runtime;
    2918           0 :         attr->sched_deadline = dl_se->dl_deadline;
    2919           0 :         attr->sched_period = dl_se->dl_period;
    2920           0 :         attr->sched_flags &= ~SCHED_DL_FLAGS;
    2921           0 :         attr->sched_flags |= dl_se->flags;
    2922           0 : }
    2923             : 
    2924             : /*
    2925             :  * This function validates the new parameters of a -deadline task.
    2926             :  * We ask for the deadline not being zero, and greater or equal
    2927             :  * than the runtime, as well as the period of being zero or
    2928             :  * greater than deadline. Furthermore, we have to be sure that
    2929             :  * user parameters are above the internal resolution of 1us (we
    2930             :  * check sched_runtime only since it is always the smaller one) and
    2931             :  * below 2^63 ns (we have to check both sched_deadline and
    2932             :  * sched_period, as the latter can be zero).
    2933             :  */
    2934           0 : bool __checkparam_dl(const struct sched_attr *attr)
    2935             : {
    2936             :         u64 period, max, min;
    2937             : 
    2938             :         /* special dl tasks don't actually use any parameter */
    2939           0 :         if (attr->sched_flags & SCHED_FLAG_SUGOV)
    2940             :                 return true;
    2941             : 
    2942             :         /* deadline != 0 */
    2943           0 :         if (attr->sched_deadline == 0)
    2944             :                 return false;
    2945             : 
    2946             :         /*
    2947             :          * Since we truncate DL_SCALE bits, make sure we're at least
    2948             :          * that big.
    2949             :          */
    2950           0 :         if (attr->sched_runtime < (1ULL << DL_SCALE))
    2951             :                 return false;
    2952             : 
    2953             :         /*
    2954             :          * Since we use the MSB for wrap-around and sign issues, make
    2955             :          * sure it's not set (mind that period can be equal to zero).
    2956             :          */
    2957           0 :         if (attr->sched_deadline & (1ULL << 63) ||
    2958           0 :             attr->sched_period & (1ULL << 63))
    2959             :                 return false;
    2960             : 
    2961           0 :         period = attr->sched_period;
    2962           0 :         if (!period)
    2963           0 :                 period = attr->sched_deadline;
    2964             : 
    2965             :         /* runtime <= deadline <= period (if period != 0) */
    2966           0 :         if (period < attr->sched_deadline ||
    2967             :             attr->sched_deadline < attr->sched_runtime)
    2968             :                 return false;
    2969             : 
    2970           0 :         max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
    2971           0 :         min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
    2972             : 
    2973           0 :         if (period < min || period > max)
    2974             :                 return false;
    2975             : 
    2976           0 :         return true;
    2977             : }
    2978             : 
    2979             : /*
    2980             :  * This function clears the sched_dl_entity static params.
    2981             :  */
    2982         349 : void __dl_clear_params(struct task_struct *p)
    2983             : {
    2984         349 :         struct sched_dl_entity *dl_se = &p->dl;
    2985             : 
    2986         349 :         dl_se->dl_runtime            = 0;
    2987         349 :         dl_se->dl_deadline           = 0;
    2988         349 :         dl_se->dl_period             = 0;
    2989         349 :         dl_se->flags                 = 0;
    2990         349 :         dl_se->dl_bw                 = 0;
    2991         349 :         dl_se->dl_density            = 0;
    2992             : 
    2993         349 :         dl_se->dl_throttled          = 0;
    2994         349 :         dl_se->dl_yielded            = 0;
    2995         349 :         dl_se->dl_non_contending     = 0;
    2996         349 :         dl_se->dl_overrun            = 0;
    2997             : 
    2998             : #ifdef CONFIG_RT_MUTEXES
    2999         349 :         dl_se->pi_se                 = dl_se;
    3000             : #endif
    3001         349 : }
    3002             : 
    3003           0 : bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
    3004             : {
    3005           0 :         struct sched_dl_entity *dl_se = &p->dl;
    3006             : 
    3007           0 :         if (dl_se->dl_runtime != attr->sched_runtime ||
    3008           0 :             dl_se->dl_deadline != attr->sched_deadline ||
    3009           0 :             dl_se->dl_period != attr->sched_period ||
    3010           0 :             dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
    3011             :                 return true;
    3012             : 
    3013           0 :         return false;
    3014             : }
    3015             : 
    3016             : #ifdef CONFIG_SMP
    3017             : int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
    3018             :                                  const struct cpumask *trial)
    3019             : {
    3020             :         unsigned long flags, cap;
    3021             :         struct dl_bw *cur_dl_b;
    3022             :         int ret = 1;
    3023             : 
    3024             :         rcu_read_lock_sched();
    3025             :         cur_dl_b = dl_bw_of(cpumask_any(cur));
    3026             :         cap = __dl_bw_capacity(trial);
    3027             :         raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
    3028             :         if (__dl_overflow(cur_dl_b, cap, 0, 0))
    3029             :                 ret = 0;
    3030             :         raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
    3031             :         rcu_read_unlock_sched();
    3032             : 
    3033             :         return ret;
    3034             : }
    3035             : 
    3036             : int dl_cpu_busy(int cpu, struct task_struct *p)
    3037             : {
    3038             :         unsigned long flags, cap;
    3039             :         struct dl_bw *dl_b;
    3040             :         bool overflow;
    3041             : 
    3042             :         rcu_read_lock_sched();
    3043             :         dl_b = dl_bw_of(cpu);
    3044             :         raw_spin_lock_irqsave(&dl_b->lock, flags);
    3045             :         cap = dl_bw_capacity(cpu);
    3046             :         overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
    3047             : 
    3048             :         if (!overflow && p) {
    3049             :                 /*
    3050             :                  * We reserve space for this task in the destination
    3051             :                  * root_domain, as we can't fail after this point.
    3052             :                  * We will free resources in the source root_domain
    3053             :                  * later on (see set_cpus_allowed_dl()).
    3054             :                  */
    3055             :                 __dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu));
    3056             :         }
    3057             : 
    3058             :         raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    3059             :         rcu_read_unlock_sched();
    3060             : 
    3061             :         return overflow ? -EBUSY : 0;
    3062             : }
    3063             : #endif
    3064             : 
    3065             : #ifdef CONFIG_SCHED_DEBUG
    3066           0 : void print_dl_stats(struct seq_file *m, int cpu)
    3067             : {
    3068           0 :         print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
    3069           0 : }
    3070             : #endif /* CONFIG_SCHED_DEBUG */

Generated by: LCOV version 1.14