LCOV - coverage.info - kernel/sched/deadline.c

LCOV - code coverage report

Current view:	top level - kernel/sched - deadline.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	46	516	8.9 %
Date:	2023-08-24 13:40:31	Functions:	7	44	15.9 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Deadline Scheduling Class (SCHED_DEADLINE)
       4             :  *
       5             :  * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
       6             :  *
       7             :  * Tasks that periodically executes their instances for less than their
       8             :  * runtime won't miss any of their deadlines.
       9             :  * Tasks that are not periodic or sporadic or that tries to execute more
      10             :  * than their reserved bandwidth will be slowed down (and may potentially
      11             :  * miss some of their deadlines), and won't affect any other task.
      12             :  *
      13             :  * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
      14             :  *                    Juri Lelli <juri.lelli@gmail.com>,
      15             :  *                    Michael Trimarchi <michael@amarulasolutions.com>,
      16             :  *                    Fabio Checconi <fchecconi@gmail.com>
      17             :  */
      18             : 
      19             : #include <linux/cpuset.h>
      20             : 
      21             : /*
      22             :  * Default limits for DL period; on the top end we guard against small util
      23             :  * tasks still getting ridiculously long effective runtimes, on the bottom end we
      24             :  * guard against timer DoS.
      25             :  */
      26             : static unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
      27             : static unsigned int sysctl_sched_dl_period_min = 100;     /* 100 us */
      28             : #ifdef CONFIG_SYSCTL
      29             : static struct ctl_table sched_dl_sysctls[] = {
      30             :         {
      31             :                 .procname       = "sched_deadline_period_max_us",
      32             :                 .data           = &sysctl_sched_dl_period_max,
      33             :                 .maxlen         = sizeof(unsigned int),
      34             :                 .mode           = 0644,
      35             :                 .proc_handler   = proc_douintvec_minmax,
      36             :                 .extra1         = (void *)&sysctl_sched_dl_period_min,
      37             :         },
      38             :         {
      39             :                 .procname       = "sched_deadline_period_min_us",
      40             :                 .data           = &sysctl_sched_dl_period_min,
      41             :                 .maxlen         = sizeof(unsigned int),
      42             :                 .mode           = 0644,
      43             :                 .proc_handler   = proc_douintvec_minmax,
      44             :                 .extra2         = (void *)&sysctl_sched_dl_period_max,
      45             :         },
      46             :         {}
      47             : };
      48             : 
      49           1 : static int __init sched_dl_sysctl_init(void)
      50             : {
      51           1 :         register_sysctl_init("kernel", sched_dl_sysctls);
      52           1 :         return 0;
      53             : }
      54             : late_initcall(sched_dl_sysctl_init);
      55             : #endif
      56             : 
      57             : static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
      58             : {
      59           0 :         return container_of(dl_se, struct task_struct, dl);
      60             : }
      61             : 
      62             : static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq)
      63             : {
      64           0 :         return container_of(dl_rq, struct rq, dl);
      65             : }
      66             : 
      67             : static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se)
      68             : {
      69           0 :         struct task_struct *p = dl_task_of(dl_se);
      70           0 :         struct rq *rq = task_rq(p);
      71             : 
      72             :         return &rq->dl;
      73             : }
      74             : 
      75             : static inline int on_dl_rq(struct sched_dl_entity *dl_se)
      76             : {
      77           0 :         return !RB_EMPTY_NODE(&dl_se->rb_node);
      78             : }
      79             : 
      80             : #ifdef CONFIG_RT_MUTEXES
      81             : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
      82             : {
      83             :         return dl_se->pi_se;
      84             : }
      85             : 
      86             : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
      87             : {
      88           0 :         return pi_of(dl_se) != dl_se;
      89             : }
      90             : #else
      91             : static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se)
      92             : {
      93             :         return dl_se;
      94             : }
      95             : 
      96             : static inline bool is_dl_boosted(struct sched_dl_entity *dl_se)
      97             : {
      98             :         return false;
      99             : }
     100             : #endif
     101             : 
     102             : #ifdef CONFIG_SMP
     103             : static inline struct dl_bw *dl_bw_of(int i)
     104             : {
     105             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     106             :                          "sched RCU must be held");
     107             :         return &cpu_rq(i)->rd->dl_bw;
     108             : }
     109             : 
     110             : static inline int dl_bw_cpus(int i)
     111             : {
     112             :         struct root_domain *rd = cpu_rq(i)->rd;
     113             :         int cpus;
     114             : 
     115             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     116             :                          "sched RCU must be held");
     117             : 
     118             :         if (cpumask_subset(rd->span, cpu_active_mask))
     119             :                 return cpumask_weight(rd->span);
     120             : 
     121             :         cpus = 0;
     122             : 
     123             :         for_each_cpu_and(i, rd->span, cpu_active_mask)
     124             :                 cpus++;
     125             : 
     126             :         return cpus;
     127             : }
     128             : 
     129             : static inline unsigned long __dl_bw_capacity(const struct cpumask *mask)
     130             : {
     131             :         unsigned long cap = 0;
     132             :         int i;
     133             : 
     134             :         for_each_cpu_and(i, mask, cpu_active_mask)
     135             :                 cap += capacity_orig_of(i);
     136             : 
     137             :         return cap;
     138             : }
     139             : 
     140             : /*
     141             :  * XXX Fix: If 'rq->rd == def_root_domain' perform AC against capacity
     142             :  * of the CPU the task is running on rather rd's \Sum CPU capacity.
     143             :  */
     144             : static inline unsigned long dl_bw_capacity(int i)
     145             : {
     146             :         if (!sched_asym_cpucap_active() &&
     147             :             capacity_orig_of(i) == SCHED_CAPACITY_SCALE) {
     148             :                 return dl_bw_cpus(i) << SCHED_CAPACITY_SHIFT;
     149             :         } else {
     150             :                 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     151             :                                  "sched RCU must be held");
     152             : 
     153             :                 return __dl_bw_capacity(cpu_rq(i)->rd->span);
     154             :         }
     155             : }
     156             : 
     157             : static inline bool dl_bw_visited(int cpu, u64 gen)
     158             : {
     159             :         struct root_domain *rd = cpu_rq(cpu)->rd;
     160             : 
     161             :         if (rd->visit_gen == gen)
     162             :                 return true;
     163             : 
     164             :         rd->visit_gen = gen;
     165             :         return false;
     166             : }
     167             : 
     168             : static inline
     169             : void __dl_update(struct dl_bw *dl_b, s64 bw)
     170             : {
     171             :         struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
     172             :         int i;
     173             : 
     174             :         RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
     175             :                          "sched RCU must be held");
     176             :         for_each_cpu_and(i, rd->span, cpu_active_mask) {
     177             :                 struct rq *rq = cpu_rq(i);
     178             : 
     179             :                 rq->dl.extra_bw += bw;
     180             :         }
     181             : }
     182             : #else
     183             : static inline struct dl_bw *dl_bw_of(int i)
     184             : {
     185           0 :         return &cpu_rq(i)->dl.dl_bw;
     186             : }
     187             : 
     188             : static inline int dl_bw_cpus(int i)
     189             : {
     190             :         return 1;
     191             : }
     192             : 
     193             : static inline unsigned long dl_bw_capacity(int i)
     194             : {
     195             :         return SCHED_CAPACITY_SCALE;
     196             : }
     197             : 
     198             : static inline bool dl_bw_visited(int cpu, u64 gen)
     199             : {
     200             :         return false;
     201             : }
     202             : 
     203             : static inline
     204             : void __dl_update(struct dl_bw *dl_b, s64 bw)
     205             : {
     206           0 :         struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
     207             : 
     208           0 :         dl->extra_bw += bw;
     209             : }
     210             : #endif
     211             : 
     212             : static inline
     213             : void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     214             : {
     215           0 :         dl_b->total_bw -= tsk_bw;
     216           0 :         __dl_update(dl_b, (s32)tsk_bw / cpus);
     217             : }
     218             : 
     219             : static inline
     220             : void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
     221             : {
     222           0 :         dl_b->total_bw += tsk_bw;
     223           0 :         __dl_update(dl_b, -((s32)tsk_bw / cpus));
     224             : }
     225             : 
     226             : static inline bool
     227             : __dl_overflow(struct dl_bw *dl_b, unsigned long cap, u64 old_bw, u64 new_bw)
     228             : {
     229           0 :         return dl_b->bw != -1 &&
     230           0 :                cap_scale(dl_b->bw, cap) < dl_b->total_bw - old_bw + new_bw;
     231             : }
     232             : 
     233             : static inline
     234             : void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
     235             : {
     236           0 :         u64 old = dl_rq->running_bw;
     237             : 
     238           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     239           0 :         dl_rq->running_bw += dl_bw;
     240             :         SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */
     241           0 :         SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
     242             :         /* kick cpufreq (see the comment in kernel/sched/sched.h). */
     243           0 :         cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
     244             : }
     245             : 
     246             : static inline
     247             : void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
     248             : {
     249           0 :         u64 old = dl_rq->running_bw;
     250             : 
     251           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     252           0 :         dl_rq->running_bw -= dl_bw;
     253             :         SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */
     254           0 :         if (dl_rq->running_bw > old)
     255           0 :                 dl_rq->running_bw = 0;
     256             :         /* kick cpufreq (see the comment in kernel/sched/sched.h). */
     257             :         cpufreq_update_util(rq_of_dl_rq(dl_rq), 0);
     258             : }
     259             : 
     260             : static inline
     261             : void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
     262             : {
     263           0 :         u64 old = dl_rq->this_bw;
     264             : 
     265           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     266           0 :         dl_rq->this_bw += dl_bw;
     267             :         SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */
     268             : }
     269             : 
     270             : static inline
     271             : void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
     272             : {
     273           0 :         u64 old = dl_rq->this_bw;
     274             : 
     275           0 :         lockdep_assert_rq_held(rq_of_dl_rq(dl_rq));
     276           0 :         dl_rq->this_bw -= dl_bw;
     277             :         SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */
     278           0 :         if (dl_rq->this_bw > old)
     279           0 :                 dl_rq->this_bw = 0;
     280             :         SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
     281             : }
     282             : 
     283             : static inline
     284             : void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     285             : {
     286           0 :         if (!dl_entity_is_special(dl_se))
     287           0 :                 __add_rq_bw(dl_se->dl_bw, dl_rq);
     288             : }
     289             : 
     290             : static inline
     291             : void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     292             : {
     293           0 :         if (!dl_entity_is_special(dl_se))
     294           0 :                 __sub_rq_bw(dl_se->dl_bw, dl_rq);
     295             : }
     296             : 
     297             : static inline
     298             : void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     299             : {
     300           0 :         if (!dl_entity_is_special(dl_se))
     301           0 :                 __add_running_bw(dl_se->dl_bw, dl_rq);
     302             : }
     303             : 
     304             : static inline
     305             : void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     306             : {
     307           0 :         if (!dl_entity_is_special(dl_se))
     308           0 :                 __sub_running_bw(dl_se->dl_bw, dl_rq);
     309             : }
     310             : 
     311           0 : static void dl_change_utilization(struct task_struct *p, u64 new_bw)
     312             : {
     313             :         struct rq *rq;
     314             : 
     315           0 :         WARN_ON_ONCE(p->dl.flags & SCHED_FLAG_SUGOV);
     316             : 
     317           0 :         if (task_on_rq_queued(p))
     318             :                 return;
     319             : 
     320           0 :         rq = task_rq(p);
     321           0 :         if (p->dl.dl_non_contending) {
     322           0 :                 sub_running_bw(&p->dl, &rq->dl);
     323           0 :                 p->dl.dl_non_contending = 0;
     324             :                 /*
     325             :                  * If the timer handler is currently running and the
     326             :                  * timer cannot be canceled, inactive_task_timer()
     327             :                  * will see that dl_not_contending is not set, and
     328             :                  * will not touch the rq's active utilization,
     329             :                  * so we are still safe.
     330             :                  */
     331           0 :                 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
     332           0 :                         put_task_struct(p);
     333             :         }
     334           0 :         __sub_rq_bw(p->dl.dl_bw, &rq->dl);
     335           0 :         __add_rq_bw(new_bw, &rq->dl);
     336             : }
     337             : 
     338             : /*
     339             :  * The utilization of a task cannot be immediately removed from
     340             :  * the rq active utilization (running_bw) when the task blocks.
     341             :  * Instead, we have to wait for the so called "0-lag time".
     342             :  *
     343             :  * If a task blocks before the "0-lag time", a timer (the inactive
     344             :  * timer) is armed, and running_bw is decreased when the timer
     345             :  * fires.
     346             :  *
     347             :  * If the task wakes up again before the inactive timer fires,
     348             :  * the timer is canceled, whereas if the task wakes up after the
     349             :  * inactive timer fired (and running_bw has been decreased) the
     350             :  * task's utilization has to be added to running_bw again.
     351             :  * A flag in the deadline scheduling entity (dl_non_contending)
     352             :  * is used to avoid race conditions between the inactive timer handler
     353             :  * and task wakeups.
     354             :  *
     355             :  * The following diagram shows how running_bw is updated. A task is
     356             :  * "ACTIVE" when its utilization contributes to running_bw; an
     357             :  * "ACTIVE contending" task is in the TASK_RUNNING state, while an
     358             :  * "ACTIVE non contending" task is a blocked task for which the "0-lag time"
     359             :  * has not passed yet. An "INACTIVE" task is a task for which the "0-lag"
     360             :  * time already passed, which does not contribute to running_bw anymore.
     361             :  *                              +------------------+
     362             :  *             wakeup           |    ACTIVE        |
     363             :  *          +------------------>+   contending     |
     364             :  *          | add_running_bw    |                  |
     365             :  *          |                   +----+------+------+
     366             :  *          |                        |      ^
     367             :  *          |                dequeue |      |
     368             :  * +--------+-------+                |      |
     369             :  * |                |   t >= 0-lag   |      | wakeup
     370             :  * |    INACTIVE    |<---------------+      |
     371             :  * |                | sub_running_bw |      |
     372             :  * +--------+-------+                |      |
     373             :  *          ^                        |      |
     374             :  *          |              t < 0-lag |      |
     375             :  *          |                        |      |
     376             :  *          |                        V      |
     377             :  *          |                   +----+------+------+
     378             :  *          | sub_running_bw    |    ACTIVE        |
     379             :  *          +-------------------+                  |
     380             :  *            inactive timer    |  non contending  |
     381             :  *            fired             +------------------+
     382             :  *
     383             :  * The task_non_contending() function is invoked when a task
     384             :  * blocks, and checks if the 0-lag time already passed or
     385             :  * not (in the first case, it directly updates running_bw;
     386             :  * in the second case, it arms the inactive timer).
     387             :  *
     388             :  * The task_contending() function is invoked when a task wakes
     389             :  * up, and checks if the task is still in the "ACTIVE non contending"
     390             :  * state or not (in the second case, it updates running_bw).
     391             :  */
     392           0 : static void task_non_contending(struct task_struct *p)
     393             : {
     394           0 :         struct sched_dl_entity *dl_se = &p->dl;
     395           0 :         struct hrtimer *timer = &dl_se->inactive_timer;
     396           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     397           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     398             :         s64 zerolag_time;
     399             : 
     400             :         /*
     401             :          * If this is a non-deadline task that has been boosted,
     402             :          * do nothing
     403             :          */
     404           0 :         if (dl_se->dl_runtime == 0)
     405             :                 return;
     406             : 
     407           0 :         if (dl_entity_is_special(dl_se))
     408             :                 return;
     409             : 
     410           0 :         WARN_ON(dl_se->dl_non_contending);
     411             : 
     412           0 :         zerolag_time = dl_se->deadline -
     413           0 :                  div64_long((dl_se->runtime * dl_se->dl_period),
     414             :                         dl_se->dl_runtime);
     415             : 
     416             :         /*
     417             :          * Using relative times instead of the absolute "0-lag time"
     418             :          * allows to simplify the code
     419             :          */
     420           0 :         zerolag_time -= rq_clock(rq);
     421             : 
     422             :         /*
     423             :          * If the "0-lag time" already passed, decrease the active
     424             :          * utilization now, instead of starting a timer
     425             :          */
     426           0 :         if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
     427           0 :                 if (dl_task(p))
     428           0 :                         sub_running_bw(dl_se, dl_rq);
     429           0 :                 if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
     430           0 :                         struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
     431             : 
     432           0 :                         if (READ_ONCE(p->__state) == TASK_DEAD)
     433           0 :                                 sub_rq_bw(&p->dl, &rq->dl);
     434           0 :                         raw_spin_lock(&dl_b->lock);
     435           0 :                         __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
     436           0 :                         raw_spin_unlock(&dl_b->lock);
     437             :                         __dl_clear_params(p);
     438             :                 }
     439             : 
     440             :                 return;
     441             :         }
     442             : 
     443           0 :         dl_se->dl_non_contending = 1;
     444           0 :         get_task_struct(p);
     445           0 :         hrtimer_start(timer, ns_to_ktime(zerolag_time), HRTIMER_MODE_REL_HARD);
     446             : }
     447             : 
     448           0 : static void task_contending(struct sched_dl_entity *dl_se, int flags)
     449             : {
     450           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     451             : 
     452             :         /*
     453             :          * If this is a non-deadline task that has been boosted,
     454             :          * do nothing
     455             :          */
     456           0 :         if (dl_se->dl_runtime == 0)
     457             :                 return;
     458             : 
     459             :         if (flags & ENQUEUE_MIGRATED)
     460             :                 add_rq_bw(dl_se, dl_rq);
     461             : 
     462           0 :         if (dl_se->dl_non_contending) {
     463           0 :                 dl_se->dl_non_contending = 0;
     464             :                 /*
     465             :                  * If the timer handler is currently running and the
     466             :                  * timer cannot be canceled, inactive_task_timer()
     467             :                  * will see that dl_not_contending is not set, and
     468             :                  * will not touch the rq's active utilization,
     469             :                  * so we are still safe.
     470             :                  */
     471           0 :                 if (hrtimer_try_to_cancel(&dl_se->inactive_timer) == 1)
     472           0 :                         put_task_struct(dl_task_of(dl_se));
     473             :         } else {
     474             :                 /*
     475             :                  * Since "dl_non_contending" is not set, the
     476             :                  * task's utilization has already been removed from
     477             :                  * active utilization (either when the task blocked,
     478             :                  * when the "inactive timer" fired).
     479             :                  * So, add it back.
     480             :                  */
     481           0 :                 add_running_bw(dl_se, dl_rq);
     482             :         }
     483             : }
     484             : 
     485             : static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
     486             : {
     487           0 :         struct sched_dl_entity *dl_se = &p->dl;
     488             : 
     489             :         return rb_first_cached(&dl_rq->root) == &dl_se->rb_node;
     490             : }
     491             : 
     492             : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
     493             : 
     494           1 : void init_dl_bw(struct dl_bw *dl_b)
     495             : {
     496             :         raw_spin_lock_init(&dl_b->lock);
     497           1 :         if (global_rt_runtime() == RUNTIME_INF)
     498           0 :                 dl_b->bw = -1;
     499             :         else
     500           1 :                 dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
     501           1 :         dl_b->total_bw = 0;
     502           1 : }
     503             : 
     504           1 : void init_dl_rq(struct dl_rq *dl_rq)
     505             : {
     506           1 :         dl_rq->root = RB_ROOT_CACHED;
     507             : 
     508             : #ifdef CONFIG_SMP
     509             :         /* zero means no -deadline tasks */
     510             :         dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
     511             : 
     512             :         dl_rq->dl_nr_migratory = 0;
     513             :         dl_rq->overloaded = 0;
     514             :         dl_rq->pushable_dl_tasks_root = RB_ROOT_CACHED;
     515             : #else
     516           1 :         init_dl_bw(&dl_rq->dl_bw);
     517             : #endif
     518             : 
     519           1 :         dl_rq->running_bw = 0;
     520           1 :         dl_rq->this_bw = 0;
     521           1 :         init_dl_rq_bw_ratio(dl_rq);
     522           1 : }
     523             : 
     524             : #ifdef CONFIG_SMP
     525             : 
     526             : static inline int dl_overloaded(struct rq *rq)
     527             : {
     528             :         return atomic_read(&rq->rd->dlo_count);
     529             : }
     530             : 
     531             : static inline void dl_set_overload(struct rq *rq)
     532             : {
     533             :         if (!rq->online)
     534             :                 return;
     535             : 
     536             :         cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
     537             :         /*
     538             :          * Must be visible before the overload count is
     539             :          * set (as in sched_rt.c).
     540             :          *
     541             :          * Matched by the barrier in pull_dl_task().
     542             :          */
     543             :         smp_wmb();
     544             :         atomic_inc(&rq->rd->dlo_count);
     545             : }
     546             : 
     547             : static inline void dl_clear_overload(struct rq *rq)
     548             : {
     549             :         if (!rq->online)
     550             :                 return;
     551             : 
     552             :         atomic_dec(&rq->rd->dlo_count);
     553             :         cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
     554             : }
     555             : 
     556             : static void update_dl_migration(struct dl_rq *dl_rq)
     557             : {
     558             :         if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
     559             :                 if (!dl_rq->overloaded) {
     560             :                         dl_set_overload(rq_of_dl_rq(dl_rq));
     561             :                         dl_rq->overloaded = 1;
     562             :                 }
     563             :         } else if (dl_rq->overloaded) {
     564             :                 dl_clear_overload(rq_of_dl_rq(dl_rq));
     565             :                 dl_rq->overloaded = 0;
     566             :         }
     567             : }
     568             : 
     569             : static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     570             : {
     571             :         struct task_struct *p = dl_task_of(dl_se);
     572             : 
     573             :         if (p->nr_cpus_allowed > 1)
     574             :                 dl_rq->dl_nr_migratory++;
     575             : 
     576             :         update_dl_migration(dl_rq);
     577             : }
     578             : 
     579             : static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     580             : {
     581             :         struct task_struct *p = dl_task_of(dl_se);
     582             : 
     583             :         if (p->nr_cpus_allowed > 1)
     584             :                 dl_rq->dl_nr_migratory--;
     585             : 
     586             :         update_dl_migration(dl_rq);
     587             : }
     588             : 
     589             : #define __node_2_pdl(node) \
     590             :         rb_entry((node), struct task_struct, pushable_dl_tasks)
     591             : 
     592             : static inline bool __pushable_less(struct rb_node *a, const struct rb_node *b)
     593             : {
     594             :         return dl_entity_preempt(&__node_2_pdl(a)->dl, &__node_2_pdl(b)->dl);
     595             : }
     596             : 
     597             : /*
     598             :  * The list of pushable -deadline task is not a plist, like in
     599             :  * sched_rt.c, it is an rb-tree with tasks ordered by deadline.
     600             :  */
     601             : static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     602             : {
     603             :         struct rb_node *leftmost;
     604             : 
     605             :         WARN_ON_ONCE(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
     606             : 
     607             :         leftmost = rb_add_cached(&p->pushable_dl_tasks,
     608             :                                  &rq->dl.pushable_dl_tasks_root,
     609             :                                  __pushable_less);
     610             :         if (leftmost)
     611             :                 rq->dl.earliest_dl.next = p->dl.deadline;
     612             : }
     613             : 
     614             : static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     615             : {
     616             :         struct dl_rq *dl_rq = &rq->dl;
     617             :         struct rb_root_cached *root = &dl_rq->pushable_dl_tasks_root;
     618             :         struct rb_node *leftmost;
     619             : 
     620             :         if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
     621             :                 return;
     622             : 
     623             :         leftmost = rb_erase_cached(&p->pushable_dl_tasks, root);
     624             :         if (leftmost)
     625             :                 dl_rq->earliest_dl.next = __node_2_pdl(leftmost)->dl.deadline;
     626             : 
     627             :         RB_CLEAR_NODE(&p->pushable_dl_tasks);
     628             : }
     629             : 
     630             : static inline int has_pushable_dl_tasks(struct rq *rq)
     631             : {
     632             :         return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root.rb_root);
     633             : }
     634             : 
     635             : static int push_dl_task(struct rq *rq);
     636             : 
     637             : static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
     638             : {
     639             :         return rq->online && dl_task(prev);
     640             : }
     641             : 
     642             : static DEFINE_PER_CPU(struct balance_callback, dl_push_head);
     643             : static DEFINE_PER_CPU(struct balance_callback, dl_pull_head);
     644             : 
     645             : static void push_dl_tasks(struct rq *);
     646             : static void pull_dl_task(struct rq *);
     647             : 
     648             : static inline void deadline_queue_push_tasks(struct rq *rq)
     649             : {
     650             :         if (!has_pushable_dl_tasks(rq))
     651             :                 return;
     652             : 
     653             :         queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks);
     654             : }
     655             : 
     656             : static inline void deadline_queue_pull_task(struct rq *rq)
     657             : {
     658             :         queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task);
     659             : }
     660             : 
     661             : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
     662             : 
     663             : static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
     664             : {
     665             :         struct rq *later_rq = NULL;
     666             :         struct dl_bw *dl_b;
     667             : 
     668             :         later_rq = find_lock_later_rq(p, rq);
     669             :         if (!later_rq) {
     670             :                 int cpu;
     671             : 
     672             :                 /*
     673             :                  * If we cannot preempt any rq, fall back to pick any
     674             :                  * online CPU:
     675             :                  */
     676             :                 cpu = cpumask_any_and(cpu_active_mask, p->cpus_ptr);
     677             :                 if (cpu >= nr_cpu_ids) {
     678             :                         /*
     679             :                          * Failed to find any suitable CPU.
     680             :                          * The task will never come back!
     681             :                          */
     682             :                         WARN_ON_ONCE(dl_bandwidth_enabled());
     683             : 
     684             :                         /*
     685             :                          * If admission control is disabled we
     686             :                          * try a little harder to let the task
     687             :                          * run.
     688             :                          */
     689             :                         cpu = cpumask_any(cpu_active_mask);
     690             :                 }
     691             :                 later_rq = cpu_rq(cpu);
     692             :                 double_lock_balance(rq, later_rq);
     693             :         }
     694             : 
     695             :         if (p->dl.dl_non_contending || p->dl.dl_throttled) {
     696             :                 /*
     697             :                  * Inactive timer is armed (or callback is running, but
     698             :                  * waiting for us to release rq locks). In any case, when it
     699             :                  * will fire (or continue), it will see running_bw of this
     700             :                  * task migrated to later_rq (and correctly handle it).
     701             :                  */
     702             :                 sub_running_bw(&p->dl, &rq->dl);
     703             :                 sub_rq_bw(&p->dl, &rq->dl);
     704             : 
     705             :                 add_rq_bw(&p->dl, &later_rq->dl);
     706             :                 add_running_bw(&p->dl, &later_rq->dl);
     707             :         } else {
     708             :                 sub_rq_bw(&p->dl, &rq->dl);
     709             :                 add_rq_bw(&p->dl, &later_rq->dl);
     710             :         }
     711             : 
     712             :         /*
     713             :          * And we finally need to fixup root_domain(s) bandwidth accounting,
     714             :          * since p is still hanging out in the old (now moved to default) root
     715             :          * domain.
     716             :          */
     717             :         dl_b = &rq->rd->dl_bw;
     718             :         raw_spin_lock(&dl_b->lock);
     719             :         __dl_sub(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
     720             :         raw_spin_unlock(&dl_b->lock);
     721             : 
     722             :         dl_b = &later_rq->rd->dl_bw;
     723             :         raw_spin_lock(&dl_b->lock);
     724             :         __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(later_rq->rd->span));
     725             :         raw_spin_unlock(&dl_b->lock);
     726             : 
     727             :         set_task_cpu(p, later_rq->cpu);
     728             :         double_unlock_balance(later_rq, rq);
     729             : 
     730             :         return later_rq;
     731             : }
     732             : 
     733             : #else
     734             : 
     735             : static inline
     736             : void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     737             : {
     738             : }
     739             : 
     740             : static inline
     741             : void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
     742             : {
     743             : }
     744             : 
     745             : static inline
     746             : void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     747             : {
     748             : }
     749             : 
     750             : static inline
     751             : void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
     752             : {
     753             : }
     754             : 
     755             : static inline void deadline_queue_push_tasks(struct rq *rq)
     756             : {
     757             : }
     758             : 
     759             : static inline void deadline_queue_pull_task(struct rq *rq)
     760             : {
     761             : }
     762             : #endif /* CONFIG_SMP */
     763             : 
     764             : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
     765             : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
     766             : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
     767             : 
     768             : static inline void replenish_dl_new_period(struct sched_dl_entity *dl_se,
     769             :                                             struct rq *rq)
     770             : {
     771             :         /* for non-boosted task, pi_of(dl_se) == dl_se */
     772           0 :         dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline;
     773           0 :         dl_se->runtime = pi_of(dl_se)->dl_runtime;
     774             : }
     775             : 
     776             : /*
     777             :  * We are being explicitly informed that a new instance is starting,
     778             :  * and this means that:
     779             :  *  - the absolute deadline of the entity has to be placed at
     780             :  *    current time + relative deadline;
     781             :  *  - the runtime of the entity has to be set to the maximum value.
     782             :  *
     783             :  * The capability of specifying such event is useful whenever a -deadline
     784             :  * entity wants to (try to!) synchronize its behaviour with the scheduler's
     785             :  * one, and to (try to!) reconcile itself with its own scheduling
     786             :  * parameters.
     787             :  */
     788           0 : static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
     789             : {
     790           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     791           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     792             : 
     793           0 :         WARN_ON(is_dl_boosted(dl_se));
     794           0 :         WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
     795             : 
     796             :         /*
     797             :          * We are racing with the deadline timer. So, do nothing because
     798             :          * the deadline timer handler will take care of properly recharging
     799             :          * the runtime and postponing the deadline
     800             :          */
     801           0 :         if (dl_se->dl_throttled)
     802             :                 return;
     803             : 
     804             :         /*
     805             :          * We use the regular wall clock time to set deadlines in the
     806             :          * future; in fact, we must consider execution overheads (time
     807             :          * spent on hardirq context, etc.).
     808             :          */
     809             :         replenish_dl_new_period(dl_se, rq);
     810             : }
     811             : 
     812             : /*
     813             :  * Pure Earliest Deadline First (EDF) scheduling does not deal with the
     814             :  * possibility of a entity lasting more than what it declared, and thus
     815             :  * exhausting its runtime.
     816             :  *
     817             :  * Here we are interested in making runtime overrun possible, but we do
     818             :  * not want a entity which is misbehaving to affect the scheduling of all
     819             :  * other entities.
     820             :  * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
     821             :  * is used, in order to confine each entity within its own bandwidth.
     822             :  *
     823             :  * This function deals exactly with that, and ensures that when the runtime
     824             :  * of a entity is replenished, its deadline is also postponed. That ensures
     825             :  * the overrunning entity can't interfere with other entity in the system and
     826             :  * can't make them miss their deadlines. Reasons why this kind of overruns
     827             :  * could happen are, typically, a entity voluntarily trying to overcome its
     828             :  * runtime, or it just underestimated it during sched_setattr().
     829             :  */
     830           0 : static void replenish_dl_entity(struct sched_dl_entity *dl_se)
     831             : {
     832           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
     833           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
     834             : 
     835           0 :         WARN_ON_ONCE(pi_of(dl_se)->dl_runtime <= 0);
     836             : 
     837             :         /*
     838             :          * This could be the case for a !-dl task that is boosted.
     839             :          * Just go with full inherited parameters.
     840             :          */
     841           0 :         if (dl_se->dl_deadline == 0)
     842             :                 replenish_dl_new_period(dl_se, rq);
     843             : 
     844           0 :         if (dl_se->dl_yielded && dl_se->runtime > 0)
     845           0 :                 dl_se->runtime = 0;
     846             : 
     847             :         /*
     848             :          * We keep moving the deadline away until we get some
     849             :          * available runtime for the entity. This ensures correct
     850             :          * handling of situations where the runtime overrun is
     851             :          * arbitrary large.
     852             :          */
     853           0 :         while (dl_se->runtime <= 0) {
     854           0 :                 dl_se->deadline += pi_of(dl_se)->dl_period;
     855           0 :                 dl_se->runtime += pi_of(dl_se)->dl_runtime;
     856             :         }
     857             : 
     858             :         /*
     859             :          * At this point, the deadline really should be "in
     860             :          * the future" with respect to rq->clock. If it's
     861             :          * not, we are, for some reason, lagging too much!
     862             :          * Anyway, after having warn userspace abut that,
     863             :          * we still try to keep the things running by
     864             :          * resetting the deadline and the budget of the
     865             :          * entity.
     866             :          */
     867           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
     868           0 :                 printk_deferred_once("sched: DL replenish lagged too much\n");
     869             :                 replenish_dl_new_period(dl_se, rq);
     870             :         }
     871             : 
     872           0 :         if (dl_se->dl_yielded)
     873           0 :                 dl_se->dl_yielded = 0;
     874           0 :         if (dl_se->dl_throttled)
     875           0 :                 dl_se->dl_throttled = 0;
     876           0 : }
     877             : 
     878             : /*
     879             :  * Here we check if --at time t-- an entity (which is probably being
     880             :  * [re]activated or, in general, enqueued) can use its remaining runtime
     881             :  * and its current deadline _without_ exceeding the bandwidth it is
     882             :  * assigned (function returns true if it can't). We are in fact applying
     883             :  * one of the CBS rules: when a task wakes up, if the residual runtime
     884             :  * over residual deadline fits within the allocated bandwidth, then we
     885             :  * can keep the current (absolute) deadline and residual budget without
     886             :  * disrupting the schedulability of the system. Otherwise, we should
     887             :  * refill the runtime and set the deadline a period in the future,
     888             :  * because keeping the current (absolute) deadline of the task would
     889             :  * result in breaking guarantees promised to other tasks (refer to
     890             :  * Documentation/scheduler/sched-deadline.rst for more information).
     891             :  *
     892             :  * This function returns true if:
     893             :  *
     894             :  *   runtime / (deadline - t) > dl_runtime / dl_deadline ,
     895             :  *
     896             :  * IOW we can't recycle current parameters.
     897             :  *
     898             :  * Notice that the bandwidth check is done against the deadline. For
     899             :  * task with deadline equal to period this is the same of using
     900             :  * dl_period instead of dl_deadline in the equation above.
     901             :  */
     902             : static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
     903             : {
     904             :         u64 left, right;
     905             : 
     906             :         /*
     907             :          * left and right are the two sides of the equation above,
     908             :          * after a bit of shuffling to use multiplications instead
     909             :          * of divisions.
     910             :          *
     911             :          * Note that none of the time values involved in the two
     912             :          * multiplications are absolute: dl_deadline and dl_runtime
     913             :          * are the relative deadline and the maximum runtime of each
     914             :          * instance, runtime is the runtime left for the last instance
     915             :          * and (deadline - t), since t is rq->clock, is the time left
     916             :          * to the (absolute) deadline. Even if overflowing the u64 type
     917             :          * is very unlikely to occur in both cases, here we scale down
     918             :          * as we want to avoid that risk at all. Scaling down by 10
     919             :          * means that we reduce granularity to 1us. We are fine with it,
     920             :          * since this is only a true/false check and, anyway, thinking
     921             :          * of anything below microseconds resolution is actually fiction
     922             :          * (but still we want to give the user that illusion >;).
     923             :          */
     924           0 :         left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
     925           0 :         right = ((dl_se->deadline - t) >> DL_SCALE) *
     926           0 :                 (pi_of(dl_se)->dl_runtime >> DL_SCALE);
     927             : 
     928           0 :         return dl_time_before(right, left);
     929             : }
     930             : 
     931             : /*
     932             :  * Revised wakeup rule [1]: For self-suspending tasks, rather then
     933             :  * re-initializing task's runtime and deadline, the revised wakeup
     934             :  * rule adjusts the task's runtime to avoid the task to overrun its
     935             :  * density.
     936             :  *
     937             :  * Reasoning: a task may overrun the density if:
     938             :  *    runtime / (deadline - t) > dl_runtime / dl_deadline
     939             :  *
     940             :  * Therefore, runtime can be adjusted to:
     941             :  *     runtime = (dl_runtime / dl_deadline) * (deadline - t)
     942             :  *
     943             :  * In such way that runtime will be equal to the maximum density
     944             :  * the task can use without breaking any rule.
     945             :  *
     946             :  * [1] Luca Abeni, Giuseppe Lipari, and Juri Lelli. 2015. Constant
     947             :  * bandwidth server revisited. SIGBED Rev. 11, 4 (January 2015), 19-24.
     948             :  */
     949             : static void
     950           0 : update_dl_revised_wakeup(struct sched_dl_entity *dl_se, struct rq *rq)
     951             : {
     952           0 :         u64 laxity = dl_se->deadline - rq_clock(rq);
     953             : 
     954             :         /*
     955             :          * If the task has deadline < period, and the deadline is in the past,
     956             :          * it should already be throttled before this check.
     957             :          *
     958             :          * See update_dl_entity() comments for further details.
     959             :          */
     960           0 :         WARN_ON(dl_time_before(dl_se->deadline, rq_clock(rq)));
     961             : 
     962           0 :         dl_se->runtime = (dl_se->dl_density * laxity) >> BW_SHIFT;
     963           0 : }
     964             : 
     965             : /*
     966             :  * Regarding the deadline, a task with implicit deadline has a relative
     967             :  * deadline == relative period. A task with constrained deadline has a
     968             :  * relative deadline <= relative period.
     969             :  *
     970             :  * We support constrained deadline tasks. However, there are some restrictions
     971             :  * applied only for tasks which do not have an implicit deadline. See
     972             :  * update_dl_entity() to know more about such restrictions.
     973             :  *
     974             :  * The dl_is_implicit() returns true if the task has an implicit deadline.
     975             :  */
     976             : static inline bool dl_is_implicit(struct sched_dl_entity *dl_se)
     977             : {
     978             :         return dl_se->dl_deadline == dl_se->dl_period;
     979             : }
     980             : 
     981             : /*
     982             :  * When a deadline entity is placed in the runqueue, its runtime and deadline
     983             :  * might need to be updated. This is done by a CBS wake up rule. There are two
     984             :  * different rules: 1) the original CBS; and 2) the Revisited CBS.
     985             :  *
     986             :  * When the task is starting a new period, the Original CBS is used. In this
     987             :  * case, the runtime is replenished and a new absolute deadline is set.
     988             :  *
     989             :  * When a task is queued before the begin of the next period, using the
     990             :  * remaining runtime and deadline could make the entity to overflow, see
     991             :  * dl_entity_overflow() to find more about runtime overflow. When such case
     992             :  * is detected, the runtime and deadline need to be updated.
     993             :  *
     994             :  * If the task has an implicit deadline, i.e., deadline == period, the Original
     995             :  * CBS is applied. the runtime is replenished and a new absolute deadline is
     996             :  * set, as in the previous cases.
     997             :  *
     998             :  * However, the Original CBS does not work properly for tasks with
     999             :  * deadline < period, which are said to have a constrained deadline. By
    1000             :  * applying the Original CBS, a constrained deadline task would be able to run
    1001             :  * runtime/deadline in a period. With deadline < period, the task would
    1002             :  * overrun the runtime/period allowed bandwidth, breaking the admission test.
    1003             :  *
    1004             :  * In order to prevent this misbehave, the Revisited CBS is used for
    1005             :  * constrained deadline tasks when a runtime overflow is detected. In the
    1006             :  * Revisited CBS, rather than replenishing & setting a new absolute deadline,
    1007             :  * the remaining runtime of the task is reduced to avoid runtime overflow.
    1008             :  * Please refer to the comments update_dl_revised_wakeup() function to find
    1009             :  * more about the Revised CBS rule.
    1010             :  */
    1011           0 : static void update_dl_entity(struct sched_dl_entity *dl_se)
    1012             : {
    1013           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1014           0 :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1015             : 
    1016           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
    1017           0 :             dl_entity_overflow(dl_se, rq_clock(rq))) {
    1018             : 
    1019           0 :                 if (unlikely(!dl_is_implicit(dl_se) &&
    1020             :                              !dl_time_before(dl_se->deadline, rq_clock(rq)) &&
    1021             :                              !is_dl_boosted(dl_se))) {
    1022           0 :                         update_dl_revised_wakeup(dl_se, rq);
    1023           0 :                         return;
    1024             :                 }
    1025             : 
    1026             :                 replenish_dl_new_period(dl_se, rq);
    1027             :         }
    1028             : }
    1029             : 
    1030             : static inline u64 dl_next_period(struct sched_dl_entity *dl_se)
    1031             : {
    1032           0 :         return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period;
    1033             : }
    1034             : 
    1035             : /*
    1036             :  * If the entity depleted all its runtime, and if we want it to sleep
    1037             :  * while waiting for some new execution time to become available, we
    1038             :  * set the bandwidth replenishment timer to the replenishment instant
    1039             :  * and try to activate it.
    1040             :  *
    1041             :  * Notice that it is important for the caller to know if the timer
    1042             :  * actually started or not (i.e., the replenishment instant is in
    1043             :  * the future or in the past).
    1044             :  */
    1045           0 : static int start_dl_timer(struct task_struct *p)
    1046             : {
    1047           0 :         struct sched_dl_entity *dl_se = &p->dl;
    1048           0 :         struct hrtimer *timer = &dl_se->dl_timer;
    1049           0 :         struct rq *rq = task_rq(p);
    1050             :         ktime_t now, act;
    1051             :         s64 delta;
    1052             : 
    1053           0 :         lockdep_assert_rq_held(rq);
    1054             : 
    1055             :         /*
    1056             :          * We want the timer to fire at the deadline, but considering
    1057             :          * that it is actually coming from rq->clock and not from
    1058             :          * hrtimer's time base reading.
    1059             :          */
    1060           0 :         act = ns_to_ktime(dl_next_period(dl_se));
    1061           0 :         now = hrtimer_cb_get_time(timer);
    1062           0 :         delta = ktime_to_ns(now) - rq_clock(rq);
    1063           0 :         act = ktime_add_ns(act, delta);
    1064             : 
    1065             :         /*
    1066             :          * If the expiry time already passed, e.g., because the value
    1067             :          * chosen as the deadline is too small, don't even try to
    1068             :          * start the timer in the past!
    1069             :          */
    1070           0 :         if (ktime_us_delta(act, now) < 0)
    1071             :                 return 0;
    1072             : 
    1073             :         /*
    1074             :          * !enqueued will guarantee another callback; even if one is already in
    1075             :          * progress. This ensures a balanced {get,put}_task_struct().
    1076             :          *
    1077             :          * The race against __run_timer() clearing the enqueued state is
    1078             :          * harmless because we're holding task_rq()->lock, therefore the timer
    1079             :          * expiring after we've done the check will wait on its task_rq_lock()
    1080             :          * and observe our state.
    1081             :          */
    1082           0 :         if (!hrtimer_is_queued(timer)) {
    1083           0 :                 get_task_struct(p);
    1084             :                 hrtimer_start(timer, act, HRTIMER_MODE_ABS_HARD);
    1085             :         }
    1086             : 
    1087             :         return 1;
    1088             : }
    1089             : 
    1090             : /*
    1091             :  * This is the bandwidth enforcement timer callback. If here, we know
    1092             :  * a task is not on its dl_rq, since the fact that the timer was running
    1093             :  * means the task is throttled and needs a runtime replenishment.
    1094             :  *
    1095             :  * However, what we actually do depends on the fact the task is active,
    1096             :  * (it is on its rq) or has been removed from there by a call to
    1097             :  * dequeue_task_dl(). In the former case we must issue the runtime
    1098             :  * replenishment and add the task back to the dl_rq; in the latter, we just
    1099             :  * do nothing but clearing dl_throttled, so that runtime and deadline
    1100             :  * updating (and the queueing back to dl_rq) will be done by the
    1101             :  * next call to enqueue_task_dl().
    1102             :  */
    1103           0 : static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
    1104             : {
    1105           0 :         struct sched_dl_entity *dl_se = container_of(timer,
    1106             :                                                      struct sched_dl_entity,
    1107             :                                                      dl_timer);
    1108           0 :         struct task_struct *p = dl_task_of(dl_se);
    1109             :         struct rq_flags rf;
    1110             :         struct rq *rq;
    1111             : 
    1112           0 :         rq = task_rq_lock(p, &rf);
    1113             : 
    1114             :         /*
    1115             :          * The task might have changed its scheduling policy to something
    1116             :          * different than SCHED_DEADLINE (through switched_from_dl()).
    1117             :          */
    1118           0 :         if (!dl_task(p))
    1119             :                 goto unlock;
    1120             : 
    1121             :         /*
    1122             :          * The task might have been boosted by someone else and might be in the
    1123             :          * boosting/deboosting path, its not throttled.
    1124             :          */
    1125           0 :         if (is_dl_boosted(dl_se))
    1126             :                 goto unlock;
    1127             : 
    1128             :         /*
    1129             :          * Spurious timer due to start_dl_timer() race; or we already received
    1130             :          * a replenishment from rt_mutex_setprio().
    1131             :          */
    1132           0 :         if (!dl_se->dl_throttled)
    1133             :                 goto unlock;
    1134             : 
    1135             :         sched_clock_tick();
    1136           0 :         update_rq_clock(rq);
    1137             : 
    1138             :         /*
    1139             :          * If the throttle happened during sched-out; like:
    1140             :          *
    1141             :          *   schedule()
    1142             :          *     deactivate_task()
    1143             :          *       dequeue_task_dl()
    1144             :          *         update_curr_dl()
    1145             :          *           start_dl_timer()
    1146             :          *         __dequeue_task_dl()
    1147             :          *     prev->on_rq = 0;
    1148             :          *
    1149             :          * We can be both throttled and !queued. Replenish the counter
    1150             :          * but do not enqueue -- wait for our wakeup to do that.
    1151             :          */
    1152           0 :         if (!task_on_rq_queued(p)) {
    1153           0 :                 replenish_dl_entity(dl_se);
    1154           0 :                 goto unlock;
    1155             :         }
    1156             : 
    1157             : #ifdef CONFIG_SMP
    1158             :         if (unlikely(!rq->online)) {
    1159             :                 /*
    1160             :                  * If the runqueue is no longer available, migrate the
    1161             :                  * task elsewhere. This necessarily changes rq.
    1162             :                  */
    1163             :                 lockdep_unpin_lock(__rq_lockp(rq), rf.cookie);
    1164             :                 rq = dl_task_offline_migration(rq, p);
    1165             :                 rf.cookie = lockdep_pin_lock(__rq_lockp(rq));
    1166             :                 update_rq_clock(rq);
    1167             : 
    1168             :                 /*
    1169             :                  * Now that the task has been migrated to the new RQ and we
    1170             :                  * have that locked, proceed as normal and enqueue the task
    1171             :                  * there.
    1172             :                  */
    1173             :         }
    1174             : #endif
    1175             : 
    1176           0 :         enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
    1177           0 :         if (dl_task(rq->curr))
    1178             :                 check_preempt_curr_dl(rq, p, 0);
    1179             :         else
    1180           0 :                 resched_curr(rq);
    1181             : 
    1182             : #ifdef CONFIG_SMP
    1183             :         /*
    1184             :          * Queueing this task back might have overloaded rq, check if we need
    1185             :          * to kick someone away.
    1186             :          */
    1187             :         if (has_pushable_dl_tasks(rq)) {
    1188             :                 /*
    1189             :                  * Nothing relies on rq->lock after this, so its safe to drop
    1190             :                  * rq->lock.
    1191             :                  */
    1192             :                 rq_unpin_lock(rq, &rf);
    1193             :                 push_dl_task(rq);
    1194             :                 rq_repin_lock(rq, &rf);
    1195             :         }
    1196             : #endif
    1197             : 
    1198             : unlock:
    1199           0 :         task_rq_unlock(rq, p, &rf);
    1200             : 
    1201             :         /*
    1202             :          * This can free the task_struct, including this hrtimer, do not touch
    1203             :          * anything related to that after this.
    1204             :          */
    1205           0 :         put_task_struct(p);
    1206             : 
    1207           0 :         return HRTIMER_NORESTART;
    1208             : }
    1209             : 
    1210         176 : void init_dl_task_timer(struct sched_dl_entity *dl_se)
    1211             : {
    1212         176 :         struct hrtimer *timer = &dl_se->dl_timer;
    1213             : 
    1214         176 :         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
    1215         176 :         timer->function = dl_task_timer;
    1216         176 : }
    1217             : 
    1218             : /*
    1219             :  * During the activation, CBS checks if it can reuse the current task's
    1220             :  * runtime and period. If the deadline of the task is in the past, CBS
    1221             :  * cannot use the runtime, and so it replenishes the task. This rule
    1222             :  * works fine for implicit deadline tasks (deadline == period), and the
    1223             :  * CBS was designed for implicit deadline tasks. However, a task with
    1224             :  * constrained deadline (deadline < period) might be awakened after the
    1225             :  * deadline, but before the next period. In this case, replenishing the
    1226             :  * task would allow it to run for runtime / deadline. As in this case
    1227             :  * deadline < period, CBS enables a task to run for more than the
    1228             :  * runtime / period. In a very loaded system, this can cause a domino
    1229             :  * effect, making other tasks miss their deadlines.
    1230             :  *
    1231             :  * To avoid this problem, in the activation of a constrained deadline
    1232             :  * task after the deadline but before the next period, throttle the
    1233             :  * task and set the replenishing timer to the begin of the next period,
    1234             :  * unless it is boosted.
    1235             :  */
    1236           0 : static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
    1237             : {
    1238           0 :         struct task_struct *p = dl_task_of(dl_se);
    1239           0 :         struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
    1240             : 
    1241           0 :         if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
    1242           0 :             dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
    1243           0 :                 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p)))
    1244             :                         return;
    1245           0 :                 dl_se->dl_throttled = 1;
    1246           0 :                 if (dl_se->runtime > 0)
    1247           0 :                         dl_se->runtime = 0;
    1248             :         }
    1249             : }
    1250             : 
    1251             : static
    1252             : int dl_runtime_exceeded(struct sched_dl_entity *dl_se)
    1253             : {
    1254             :         return (dl_se->runtime <= 0);
    1255             : }
    1256             : 
    1257             : /*
    1258             :  * This function implements the GRUB accounting rule. According to the
    1259             :  * GRUB reclaiming algorithm, the runtime is not decreased as "dq = -dt",
    1260             :  * but as "dq = -(max{u, (Umax - Uinact - Uextra)} / Umax) dt",
    1261             :  * where u is the utilization of the task, Umax is the maximum reclaimable
    1262             :  * utilization, Uinact is the (per-runqueue) inactive utilization, computed
    1263             :  * as the difference between the "total runqueue utilization" and the
    1264             :  * "runqueue active utilization", and Uextra is the (per runqueue) extra
    1265             :  * reclaimable utilization.
    1266             :  * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations multiplied
    1267             :  * by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT.
    1268             :  * Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT, dl_bw
    1269             :  * is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
    1270             :  * Since delta is a 64 bit variable, to have an overflow its value should be
    1271             :  * larger than 2^(64 - 20 - 8), which is more than 64 seconds. So, overflow is
    1272             :  * not an issue here.
    1273             :  */
    1274             : static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
    1275             : {
    1276             :         u64 u_act;
    1277           0 :         u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
    1278             : 
    1279             :         /*
    1280             :          * Instead of computing max{u, (u_max - u_inact - u_extra)}, we
    1281             :          * compare u_inact + u_extra with u_max - u, because u_inact + u_extra
    1282             :          * can be larger than u_max. So, u_max - u_inact - u_extra would be
    1283             :          * negative leading to wrong results.
    1284             :          */
    1285           0 :         if (u_inact + rq->dl.extra_bw > rq->dl.max_bw - dl_se->dl_bw)
    1286             :                 u_act = dl_se->dl_bw;
    1287             :         else
    1288           0 :                 u_act = rq->dl.max_bw - u_inact - rq->dl.extra_bw;
    1289             : 
    1290           0 :         u_act = (u_act * rq->dl.bw_ratio) >> RATIO_SHIFT;
    1291           0 :         return (delta * u_act) >> BW_SHIFT;
    1292             : }
    1293             : 
    1294             : /*
    1295             :  * Update the current task's runtime statistics (provided it is still
    1296             :  * a -deadline task and has not been removed from the dl_rq).
    1297             :  */
    1298           0 : static void update_curr_dl(struct rq *rq)
    1299             : {
    1300           0 :         struct task_struct *curr = rq->curr;
    1301           0 :         struct sched_dl_entity *dl_se = &curr->dl;
    1302             :         u64 delta_exec, scaled_delta_exec;
    1303           0 :         int cpu = cpu_of(rq);
    1304             :         u64 now;
    1305             : 
    1306           0 :         if (!dl_task(curr) || !on_dl_rq(dl_se))
    1307             :                 return;
    1308             : 
    1309             :         /*
    1310             :          * Consumed budget is computed considering the time as
    1311             :          * observed by schedulable tasks (excluding time spent
    1312             :          * in hardirq context, etc.). Deadlines are instead
    1313             :          * computed using hard walltime. This seems to be the more
    1314             :          * natural solution, but the full ramifications of this
    1315             :          * approach need further study.
    1316             :          */
    1317           0 :         now = rq_clock_task(rq);
    1318           0 :         delta_exec = now - curr->se.exec_start;
    1319           0 :         if (unlikely((s64)delta_exec <= 0)) {
    1320           0 :                 if (unlikely(dl_se->dl_yielded))
    1321             :                         goto throttle;
    1322             :                 return;
    1323             :         }
    1324             : 
    1325             :         schedstat_set(curr->stats.exec_max,
    1326             :                       max(curr->stats.exec_max, delta_exec));
    1327             : 
    1328           0 :         trace_sched_stat_runtime(curr, delta_exec, 0);
    1329             : 
    1330           0 :         update_current_exec_runtime(curr, now, delta_exec);
    1331             : 
    1332           0 :         if (dl_entity_is_special(dl_se))
    1333             :                 return;
    1334             : 
    1335             :         /*
    1336             :          * For tasks that participate in GRUB, we implement GRUB-PA: the
    1337             :          * spare reclaimed bandwidth is used to clock down frequency.
    1338             :          *
    1339             :          * For the others, we still need to scale reservation parameters
    1340             :          * according to current frequency and CPU maximum capacity.
    1341             :          */
    1342           0 :         if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) {
    1343           0 :                 scaled_delta_exec = grub_reclaim(delta_exec,
    1344             :                                                  rq,
    1345             :                                                  &curr->dl);
    1346             :         } else {
    1347           0 :                 unsigned long scale_freq = arch_scale_freq_capacity(cpu);
    1348           0 :                 unsigned long scale_cpu = arch_scale_cpu_capacity(cpu);
    1349             : 
    1350           0 :                 scaled_delta_exec = cap_scale(delta_exec, scale_freq);
    1351           0 :                 scaled_delta_exec = cap_scale(scaled_delta_exec, scale_cpu);
    1352             :         }
    1353             : 
    1354           0 :         dl_se->runtime -= scaled_delta_exec;
    1355             : 
    1356             : throttle:
    1357           0 :         if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) {
    1358           0 :                 dl_se->dl_throttled = 1;
    1359             : 
    1360             :                 /* If requested, inform the user about runtime overruns. */
    1361           0 :                 if (dl_runtime_exceeded(dl_se) &&
    1362           0 :                     (dl_se->flags & SCHED_FLAG_DL_OVERRUN))
    1363           0 :                         dl_se->dl_overrun = 1;
    1364             : 
    1365           0 :                 __dequeue_task_dl(rq, curr, 0);
    1366           0 :                 if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr)))
    1367           0 :                         enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
    1368             : 
    1369           0 :                 if (!is_leftmost(curr, &rq->dl))
    1370           0 :                         resched_curr(rq);
    1371             :         }
    1372             : 
    1373             :         /*
    1374             :          * Because -- for now -- we share the rt bandwidth, we need to
    1375             :          * account our runtime there too, otherwise actual rt tasks
    1376             :          * would be able to exceed the shared quota.
    1377             :          *
    1378             :          * Account to the root rt group for now.
    1379             :          *
    1380             :          * The solution we're working towards is having the RT groups scheduled
    1381             :          * using deadline servers -- however there's a few nasties to figure
    1382             :          * out before that can happen.
    1383             :          */
    1384           0 :         if (rt_bandwidth_enabled()) {
    1385           0 :                 struct rt_rq *rt_rq = &rq->rt;
    1386             : 
    1387           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    1388             :                 /*
    1389             :                  * We'll let actual RT tasks worry about the overflow here, we
    1390             :                  * have our own CBS to keep us inline; only account when RT
    1391             :                  * bandwidth is relevant.
    1392             :                  */
    1393           0 :                 if (sched_rt_bandwidth_account(rt_rq))
    1394           0 :                         rt_rq->rt_time += delta_exec;
    1395           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    1396             :         }
    1397             : }
    1398             : 
    1399           0 : static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
    1400             : {
    1401           0 :         struct sched_dl_entity *dl_se = container_of(timer,
    1402             :                                                      struct sched_dl_entity,
    1403             :                                                      inactive_timer);
    1404           0 :         struct task_struct *p = dl_task_of(dl_se);
    1405             :         struct rq_flags rf;
    1406             :         struct rq *rq;
    1407             : 
    1408           0 :         rq = task_rq_lock(p, &rf);
    1409             : 
    1410             :         sched_clock_tick();
    1411           0 :         update_rq_clock(rq);
    1412             : 
    1413           0 :         if (!dl_task(p) || READ_ONCE(p->__state) == TASK_DEAD) {
    1414           0 :                 struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
    1415             : 
    1416           0 :                 if (READ_ONCE(p->__state) == TASK_DEAD && dl_se->dl_non_contending) {
    1417           0 :                         sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
    1418           0 :                         sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
    1419           0 :                         dl_se->dl_non_contending = 0;
    1420             :                 }
    1421             : 
    1422           0 :                 raw_spin_lock(&dl_b->lock);
    1423           0 :                 __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
    1424           0 :                 raw_spin_unlock(&dl_b->lock);
    1425             :                 __dl_clear_params(p);
    1426             : 
    1427             :                 goto unlock;
    1428             :         }
    1429           0 :         if (dl_se->dl_non_contending == 0)
    1430             :                 goto unlock;
    1431             : 
    1432           0 :         sub_running_bw(dl_se, &rq->dl);
    1433           0 :         dl_se->dl_non_contending = 0;
    1434             : unlock:
    1435           0 :         task_rq_unlock(rq, p, &rf);
    1436           0 :         put_task_struct(p);
    1437             : 
    1438           0 :         return HRTIMER_NORESTART;
    1439             : }
    1440             : 
    1441         176 : void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se)
    1442             : {
    1443         176 :         struct hrtimer *timer = &dl_se->inactive_timer;
    1444             : 
    1445         176 :         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
    1446         176 :         timer->function = inactive_task_timer;
    1447         176 : }
    1448             : 
    1449             : #define __node_2_dle(node) \
    1450             :         rb_entry((node), struct sched_dl_entity, rb_node)
    1451             : 
    1452             : #ifdef CONFIG_SMP
    1453             : 
    1454             : static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
    1455             : {
    1456             :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1457             : 
    1458             :         if (dl_rq->earliest_dl.curr == 0 ||
    1459             :             dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
    1460             :                 if (dl_rq->earliest_dl.curr == 0)
    1461             :                         cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_HIGHER);
    1462             :                 dl_rq->earliest_dl.curr = deadline;
    1463             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
    1464             :         }
    1465             : }
    1466             : 
    1467             : static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
    1468             : {
    1469             :         struct rq *rq = rq_of_dl_rq(dl_rq);
    1470             : 
    1471             :         /*
    1472             :          * Since we may have removed our earliest (and/or next earliest)
    1473             :          * task we must recompute them.
    1474             :          */
    1475             :         if (!dl_rq->dl_nr_running) {
    1476             :                 dl_rq->earliest_dl.curr = 0;
    1477             :                 dl_rq->earliest_dl.next = 0;
    1478             :                 cpudl_clear(&rq->rd->cpudl, rq->cpu);
    1479             :                 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
    1480             :         } else {
    1481             :                 struct rb_node *leftmost = rb_first_cached(&dl_rq->root);
    1482             :                 struct sched_dl_entity *entry = __node_2_dle(leftmost);
    1483             : 
    1484             :                 dl_rq->earliest_dl.curr = entry->deadline;
    1485             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
    1486             :         }
    1487             : }
    1488             : 
    1489             : #else
    1490             : 
    1491             : static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
    1492             : static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
    1493             : 
    1494             : #endif /* CONFIG_SMP */
    1495             : 
    1496             : static inline
    1497           0 : void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
    1498             : {
    1499           0 :         int prio = dl_task_of(dl_se)->prio;
    1500           0 :         u64 deadline = dl_se->deadline;
    1501             : 
    1502           0 :         WARN_ON(!dl_prio(prio));
    1503           0 :         dl_rq->dl_nr_running++;
    1504           0 :         add_nr_running(rq_of_dl_rq(dl_rq), 1);
    1505             : 
    1506           0 :         inc_dl_deadline(dl_rq, deadline);
    1507           0 :         inc_dl_migration(dl_se, dl_rq);
    1508           0 : }
    1509             : 
    1510             : static inline
    1511           0 : void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
    1512             : {
    1513           0 :         int prio = dl_task_of(dl_se)->prio;
    1514             : 
    1515           0 :         WARN_ON(!dl_prio(prio));
    1516           0 :         WARN_ON(!dl_rq->dl_nr_running);
    1517           0 :         dl_rq->dl_nr_running--;
    1518           0 :         sub_nr_running(rq_of_dl_rq(dl_rq), 1);
    1519             : 
    1520           0 :         dec_dl_deadline(dl_rq, dl_se->deadline);
    1521           0 :         dec_dl_migration(dl_se, dl_rq);
    1522           0 : }
    1523             : 
    1524             : static inline bool __dl_less(struct rb_node *a, const struct rb_node *b)
    1525             : {
    1526           0 :         return dl_time_before(__node_2_dle(a)->deadline, __node_2_dle(b)->deadline);
    1527             : }
    1528             : 
    1529             : static inline struct sched_statistics *
    1530             : __schedstats_from_dl_se(struct sched_dl_entity *dl_se)
    1531             : {
    1532             :         return &dl_task_of(dl_se)->stats;
    1533             : }
    1534             : 
    1535             : static inline void
    1536             : update_stats_wait_start_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1537             : {
    1538             :         struct sched_statistics *stats;
    1539             : 
    1540             :         if (!schedstat_enabled())
    1541             :                 return;
    1542             : 
    1543             :         stats = __schedstats_from_dl_se(dl_se);
    1544             :         __update_stats_wait_start(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1545             : }
    1546             : 
    1547             : static inline void
    1548             : update_stats_wait_end_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1549             : {
    1550             :         struct sched_statistics *stats;
    1551             : 
    1552             :         if (!schedstat_enabled())
    1553             :                 return;
    1554             : 
    1555             :         stats = __schedstats_from_dl_se(dl_se);
    1556             :         __update_stats_wait_end(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1557             : }
    1558             : 
    1559             : static inline void
    1560             : update_stats_enqueue_sleeper_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se)
    1561             : {
    1562             :         struct sched_statistics *stats;
    1563             : 
    1564             :         if (!schedstat_enabled())
    1565             :                 return;
    1566             : 
    1567             :         stats = __schedstats_from_dl_se(dl_se);
    1568             :         __update_stats_enqueue_sleeper(rq_of_dl_rq(dl_rq), dl_task_of(dl_se), stats);
    1569             : }
    1570             : 
    1571             : static inline void
    1572             : update_stats_enqueue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
    1573             :                         int flags)
    1574             : {
    1575             :         if (!schedstat_enabled())
    1576             :                 return;
    1577             : 
    1578             :         if (flags & ENQUEUE_WAKEUP)
    1579             :                 update_stats_enqueue_sleeper_dl(dl_rq, dl_se);
    1580             : }
    1581             : 
    1582             : static inline void
    1583             : update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
    1584             :                         int flags)
    1585             : {
    1586           0 :         struct task_struct *p = dl_task_of(dl_se);
    1587             : 
    1588             :         if (!schedstat_enabled())
    1589             :                 return;
    1590             : 
    1591             :         if ((flags & DEQUEUE_SLEEP)) {
    1592             :                 unsigned int state;
    1593             : 
    1594             :                 state = READ_ONCE(p->__state);
    1595             :                 if (state & TASK_INTERRUPTIBLE)
    1596             :                         __schedstat_set(p->stats.sleep_start,
    1597             :                                         rq_clock(rq_of_dl_rq(dl_rq)));
    1598             : 
    1599             :                 if (state & TASK_UNINTERRUPTIBLE)
    1600             :                         __schedstat_set(p->stats.block_start,
    1601             :                                         rq_clock(rq_of_dl_rq(dl_rq)));
    1602             :         }
    1603             : }
    1604             : 
    1605           0 : static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
    1606             : {
    1607           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1608             : 
    1609           0 :         WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
    1610             : 
    1611           0 :         rb_add_cached(&dl_se->rb_node, &dl_rq->root, __dl_less);
    1612             : 
    1613           0 :         inc_dl_tasks(dl_se, dl_rq);
    1614           0 : }
    1615             : 
    1616           0 : static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
    1617             : {
    1618           0 :         struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
    1619             : 
    1620           0 :         if (RB_EMPTY_NODE(&dl_se->rb_node))
    1621             :                 return;
    1622             : 
    1623           0 :         rb_erase_cached(&dl_se->rb_node, &dl_rq->root);
    1624             : 
    1625           0 :         RB_CLEAR_NODE(&dl_se->rb_node);
    1626             : 
    1627           0 :         dec_dl_tasks(dl_se, dl_rq);
    1628             : }
    1629             : 
    1630             : static void
    1631           0 : enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
    1632             : {
    1633           0 :         WARN_ON_ONCE(on_dl_rq(dl_se));
    1634             : 
    1635           0 :         update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);
    1636             : 
    1637             :         /*
    1638             :          * If this is a wakeup or a new instance, the scheduling
    1639             :          * parameters of the task might need updating. Otherwise,
    1640             :          * we want a replenishment of its runtime.
    1641             :          */
    1642           0 :         if (flags & ENQUEUE_WAKEUP) {
    1643           0 :                 task_contending(dl_se, flags);
    1644           0 :                 update_dl_entity(dl_se);
    1645           0 :         } else if (flags & ENQUEUE_REPLENISH) {
    1646           0 :                 replenish_dl_entity(dl_se);
    1647           0 :         } else if ((flags & ENQUEUE_RESTORE) &&
    1648           0 :                   dl_time_before(dl_se->deadline,
    1649             :                                  rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) {
    1650           0 :                 setup_new_dl_entity(dl_se);
    1651             :         }
    1652             : 
    1653           0 :         __enqueue_dl_entity(dl_se);
    1654           0 : }
    1655             : 
    1656             : static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
    1657             : {
    1658           0 :         __dequeue_dl_entity(dl_se);
    1659             : }
    1660             : 
    1661           0 : static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1662             : {
    1663           0 :         if (is_dl_boosted(&p->dl)) {
    1664             :                 /*
    1665             :                  * Because of delays in the detection of the overrun of a
    1666             :                  * thread's runtime, it might be the case that a thread
    1667             :                  * goes to sleep in a rt mutex with negative runtime. As
    1668             :                  * a consequence, the thread will be throttled.
    1669             :                  *
    1670             :                  * While waiting for the mutex, this thread can also be
    1671             :                  * boosted via PI, resulting in a thread that is throttled
    1672             :                  * and boosted at the same time.
    1673             :                  *
    1674             :                  * In this case, the boost overrides the throttle.
    1675             :                  */
    1676           0 :                 if (p->dl.dl_throttled) {
    1677             :                         /*
    1678             :                          * The replenish timer needs to be canceled. No
    1679             :                          * problem if it fires concurrently: boosted threads
    1680             :                          * are ignored in dl_task_timer().
    1681             :                          */
    1682           0 :                         hrtimer_try_to_cancel(&p->dl.dl_timer);
    1683           0 :                         p->dl.dl_throttled = 0;
    1684             :                 }
    1685           0 :         } else if (!dl_prio(p->normal_prio)) {
    1686             :                 /*
    1687             :                  * Special case in which we have a !SCHED_DEADLINE task that is going
    1688             :                  * to be deboosted, but exceeds its runtime while doing so. No point in
    1689             :                  * replenishing it, as it's going to return back to its original
    1690             :                  * scheduling class after this. If it has been throttled, we need to
    1691             :                  * clear the flag, otherwise the task may wake up as throttled after
    1692             :                  * being boosted again with no means to replenish the runtime and clear
    1693             :                  * the throttle.
    1694             :                  */
    1695           0 :                 p->dl.dl_throttled = 0;
    1696           0 :                 if (!(flags & ENQUEUE_REPLENISH))
    1697           0 :                         printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
    1698             :                                              task_pid_nr(p));
    1699             : 
    1700             :                 return;
    1701             :         }
    1702             : 
    1703             :         /*
    1704             :          * Check if a constrained deadline task was activated
    1705             :          * after the deadline but before the next period.
    1706             :          * If that is the case, the task will be throttled and
    1707             :          * the replenishment timer will be set to the next period.
    1708             :          */
    1709           0 :         if (!p->dl.dl_throttled && !dl_is_implicit(&p->dl))
    1710           0 :                 dl_check_constrained_dl(&p->dl);
    1711             : 
    1712           0 :         if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
    1713           0 :                 add_rq_bw(&p->dl, &rq->dl);
    1714           0 :                 add_running_bw(&p->dl, &rq->dl);
    1715             :         }
    1716             : 
    1717             :         /*
    1718             :          * If p is throttled, we do not enqueue it. In fact, if it exhausted
    1719             :          * its budget it needs a replenishment and, since it now is on
    1720             :          * its rq, the bandwidth timer callback (which clearly has not
    1721             :          * run yet) will take care of this.
    1722             :          * However, the active utilization does not depend on the fact
    1723             :          * that the task is on the runqueue or not (but depends on the
    1724             :          * task's state - in GRUB parlance, "inactive" vs "active contending").
    1725             :          * In other words, even if a task is throttled its utilization must
    1726             :          * be counted in the active utilization; hence, we need to call
    1727             :          * add_running_bw().
    1728             :          */
    1729           0 :         if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
    1730           0 :                 if (flags & ENQUEUE_WAKEUP)
    1731           0 :                         task_contending(&p->dl, flags);
    1732             : 
    1733             :                 return;
    1734             :         }
    1735             : 
    1736             :         check_schedstat_required();
    1737           0 :         update_stats_wait_start_dl(dl_rq_of_se(&p->dl), &p->dl);
    1738             : 
    1739           0 :         enqueue_dl_entity(&p->dl, flags);
    1740             : 
    1741           0 :         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
    1742             :                 enqueue_pushable_dl_task(rq, p);
    1743             : }
    1744             : 
    1745             : static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1746             : {
    1747           0 :         update_stats_dequeue_dl(&rq->dl, &p->dl, flags);
    1748           0 :         dequeue_dl_entity(&p->dl);
    1749           0 :         dequeue_pushable_dl_task(rq, p);
    1750             : }
    1751             : 
    1752           0 : static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
    1753             : {
    1754           0 :         update_curr_dl(rq);
    1755           0 :         __dequeue_task_dl(rq, p, flags);
    1756             : 
    1757           0 :         if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
    1758           0 :                 sub_running_bw(&p->dl, &rq->dl);
    1759           0 :                 sub_rq_bw(&p->dl, &rq->dl);
    1760             :         }
    1761             : 
    1762             :         /*
    1763             :          * This check allows to start the inactive timer (or to immediately
    1764             :          * decrease the active utilization, if needed) in two cases:
    1765             :          * when the task blocks and when it is terminating
    1766             :          * (p->state == TASK_DEAD). We can handle the two cases in the same
    1767             :          * way, because from GRUB's point of view the same thing is happening
    1768             :          * (the task moves from "active contending" to "active non contending"
    1769             :          * or "inactive")
    1770             :          */
    1771           0 :         if (flags & DEQUEUE_SLEEP)
    1772           0 :                 task_non_contending(p);
    1773           0 : }
    1774             : 
    1775             : /*
    1776             :  * Yield task semantic for -deadline tasks is:
    1777             :  *
    1778             :  *   get off from the CPU until our next instance, with
    1779             :  *   a new runtime. This is of little use now, since we
    1780             :  *   don't have a bandwidth reclaiming mechanism. Anyway,
    1781             :  *   bandwidth reclaiming is planned for the future, and
    1782             :  *   yield_task_dl will indicate that some spare budget
    1783             :  *   is available for other task instances to use it.
    1784             :  */
    1785           0 : static void yield_task_dl(struct rq *rq)
    1786             : {
    1787             :         /*
    1788             :          * We make the task go to sleep until its current deadline by
    1789             :          * forcing its runtime to zero. This way, update_curr_dl() stops
    1790             :          * it and the bandwidth timer will wake it up and will give it
    1791             :          * new scheduling parameters (thanks to dl_yielded=1).
    1792             :          */
    1793           0 :         rq->curr->dl.dl_yielded = 1;
    1794             : 
    1795           0 :         update_rq_clock(rq);
    1796           0 :         update_curr_dl(rq);
    1797             :         /*
    1798             :          * Tell update_rq_clock() that we've just updated,
    1799             :          * so we don't do microscopic update in schedule()
    1800             :          * and double the fastpath cost.
    1801             :          */
    1802           0 :         rq_clock_skip_update(rq);
    1803           0 : }
    1804             : 
    1805             : #ifdef CONFIG_SMP
    1806             : 
    1807             : static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
    1808             :                                                  struct rq *rq)
    1809             : {
    1810             :         return (!rq->dl.dl_nr_running ||
    1811             :                 dl_time_before(p->dl.deadline,
    1812             :                                rq->dl.earliest_dl.curr));
    1813             : }
    1814             : 
    1815             : static int find_later_rq(struct task_struct *task);
    1816             : 
    1817             : static int
    1818             : select_task_rq_dl(struct task_struct *p, int cpu, int flags)
    1819             : {
    1820             :         struct task_struct *curr;
    1821             :         bool select_rq;
    1822             :         struct rq *rq;
    1823             : 
    1824             :         if (!(flags & WF_TTWU))
    1825             :                 goto out;
    1826             : 
    1827             :         rq = cpu_rq(cpu);
    1828             : 
    1829             :         rcu_read_lock();
    1830             :         curr = READ_ONCE(rq->curr); /* unlocked access */
    1831             : 
    1832             :         /*
    1833             :          * If we are dealing with a -deadline task, we must
    1834             :          * decide where to wake it up.
    1835             :          * If it has a later deadline and the current task
    1836             :          * on this rq can't move (provided the waking task
    1837             :          * can!) we prefer to send it somewhere else. On the
    1838             :          * other hand, if it has a shorter deadline, we
    1839             :          * try to make it stay here, it might be important.
    1840             :          */
    1841             :         select_rq = unlikely(dl_task(curr)) &&
    1842             :                     (curr->nr_cpus_allowed < 2 ||
    1843             :                      !dl_entity_preempt(&p->dl, &curr->dl)) &&
    1844             :                     p->nr_cpus_allowed > 1;
    1845             : 
    1846             :         /*
    1847             :          * Take the capacity of the CPU into account to
    1848             :          * ensure it fits the requirement of the task.
    1849             :          */
    1850             :         if (sched_asym_cpucap_active())
    1851             :                 select_rq |= !dl_task_fits_capacity(p, cpu);
    1852             : 
    1853             :         if (select_rq) {
    1854             :                 int target = find_later_rq(p);
    1855             : 
    1856             :                 if (target != -1 &&
    1857             :                     dl_task_is_earliest_deadline(p, cpu_rq(target)))
    1858             :                         cpu = target;
    1859             :         }
    1860             :         rcu_read_unlock();
    1861             : 
    1862             : out:
    1863             :         return cpu;
    1864             : }
    1865             : 
    1866             : static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
    1867             : {
    1868             :         struct rq_flags rf;
    1869             :         struct rq *rq;
    1870             : 
    1871             :         if (READ_ONCE(p->__state) != TASK_WAKING)
    1872             :                 return;
    1873             : 
    1874             :         rq = task_rq(p);
    1875             :         /*
    1876             :          * Since p->state == TASK_WAKING, set_task_cpu() has been called
    1877             :          * from try_to_wake_up(). Hence, p->pi_lock is locked, but
    1878             :          * rq->lock is not... So, lock it
    1879             :          */
    1880             :         rq_lock(rq, &rf);
    1881             :         if (p->dl.dl_non_contending) {
    1882             :                 update_rq_clock(rq);
    1883             :                 sub_running_bw(&p->dl, &rq->dl);
    1884             :                 p->dl.dl_non_contending = 0;
    1885             :                 /*
    1886             :                  * If the timer handler is currently running and the
    1887             :                  * timer cannot be canceled, inactive_task_timer()
    1888             :                  * will see that dl_not_contending is not set, and
    1889             :                  * will not touch the rq's active utilization,
    1890             :                  * so we are still safe.
    1891             :                  */
    1892             :                 if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
    1893             :                         put_task_struct(p);
    1894             :         }
    1895             :         sub_rq_bw(&p->dl, &rq->dl);
    1896             :         rq_unlock(rq, &rf);
    1897             : }
    1898             : 
    1899             : static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
    1900             : {
    1901             :         /*
    1902             :          * Current can't be migrated, useless to reschedule,
    1903             :          * let's hope p can move out.
    1904             :          */
    1905             :         if (rq->curr->nr_cpus_allowed == 1 ||
    1906             :             !cpudl_find(&rq->rd->cpudl, rq->curr, NULL))
    1907             :                 return;
    1908             : 
    1909             :         /*
    1910             :          * p is migratable, so let's not schedule it and
    1911             :          * see if it is pushed or pulled somewhere else.
    1912             :          */
    1913             :         if (p->nr_cpus_allowed != 1 &&
    1914             :             cpudl_find(&rq->rd->cpudl, p, NULL))
    1915             :                 return;
    1916             : 
    1917             :         resched_curr(rq);
    1918             : }
    1919             : 
    1920             : static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1921             : {
    1922             :         if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
    1923             :                 /*
    1924             :                  * This is OK, because current is on_cpu, which avoids it being
    1925             :                  * picked for load-balance and preemption/IRQs are still
    1926             :                  * disabled avoiding further scheduler activity on it and we've
    1927             :                  * not yet started the picking loop.
    1928             :                  */
    1929             :                 rq_unpin_lock(rq, rf);
    1930             :                 pull_dl_task(rq);
    1931             :                 rq_repin_lock(rq, rf);
    1932             :         }
    1933             : 
    1934             :         return sched_stop_runnable(rq) || sched_dl_runnable(rq);
    1935             : }
    1936             : #endif /* CONFIG_SMP */
    1937             : 
    1938             : /*
    1939             :  * Only called when both the current and waking task are -deadline
    1940             :  * tasks.
    1941             :  */
    1942           0 : static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p,
    1943             :                                   int flags)
    1944             : {
    1945           0 :         if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
    1946           0 :                 resched_curr(rq);
    1947           0 :                 return;
    1948             :         }
    1949             : 
    1950             : #ifdef CONFIG_SMP
    1951             :         /*
    1952             :          * In the unlikely case current and p have the same deadline
    1953             :          * let us try to decide what's the best thing to do...
    1954             :          */
    1955             :         if ((p->dl.deadline == rq->curr->dl.deadline) &&
    1956             :             !test_tsk_need_resched(rq->curr))
    1957             :                 check_preempt_equal_dl(rq, p);
    1958             : #endif /* CONFIG_SMP */
    1959             : }
    1960             : 
    1961             : #ifdef CONFIG_SCHED_HRTICK
    1962             : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
    1963             : {
    1964             :         hrtick_start(rq, p->dl.runtime);
    1965             : }
    1966             : #else /* !CONFIG_SCHED_HRTICK */
    1967             : static void start_hrtick_dl(struct rq *rq, struct task_struct *p)
    1968             : {
    1969             : }
    1970             : #endif
    1971             : 
    1972           0 : static void set_next_task_dl(struct rq *rq, struct task_struct *p, bool first)
    1973             : {
    1974           0 :         struct sched_dl_entity *dl_se = &p->dl;
    1975           0 :         struct dl_rq *dl_rq = &rq->dl;
    1976             : 
    1977           0 :         p->se.exec_start = rq_clock_task(rq);
    1978           0 :         if (on_dl_rq(&p->dl))
    1979             :                 update_stats_wait_end_dl(dl_rq, dl_se);
    1980             : 
    1981             :         /* You can't push away the running task */
    1982           0 :         dequeue_pushable_dl_task(rq, p);
    1983             : 
    1984             :         if (!first)
    1985             :                 return;
    1986             : 
    1987             :         if (hrtick_enabled_dl(rq))
    1988             :                 start_hrtick_dl(rq, p);
    1989             : 
    1990             :         if (rq->curr->sched_class != &dl_sched_class)
    1991             :                 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    1992             : 
    1993             :         deadline_queue_push_tasks(rq);
    1994             : }
    1995             : 
    1996             : static struct sched_dl_entity *pick_next_dl_entity(struct dl_rq *dl_rq)
    1997             : {
    1998           0 :         struct rb_node *left = rb_first_cached(&dl_rq->root);
    1999             : 
    2000           0 :         if (!left)
    2001             :                 return NULL;
    2002             : 
    2003           0 :         return __node_2_dle(left);
    2004             : }
    2005             : 
    2006           0 : static struct task_struct *pick_task_dl(struct rq *rq)
    2007             : {
    2008             :         struct sched_dl_entity *dl_se;
    2009           0 :         struct dl_rq *dl_rq = &rq->dl;
    2010             :         struct task_struct *p;
    2011             : 
    2012           0 :         if (!sched_dl_runnable(rq))
    2013             :                 return NULL;
    2014             : 
    2015           0 :         dl_se = pick_next_dl_entity(dl_rq);
    2016           0 :         WARN_ON_ONCE(!dl_se);
    2017           0 :         p = dl_task_of(dl_se);
    2018             : 
    2019           0 :         return p;
    2020             : }
    2021             : 
    2022           0 : static struct task_struct *pick_next_task_dl(struct rq *rq)
    2023             : {
    2024             :         struct task_struct *p;
    2025             : 
    2026           0 :         p = pick_task_dl(rq);
    2027           0 :         if (p)
    2028             :                 set_next_task_dl(rq, p, true);
    2029             : 
    2030           0 :         return p;
    2031             : }
    2032             : 
    2033           0 : static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
    2034             : {
    2035           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2036           0 :         struct dl_rq *dl_rq = &rq->dl;
    2037             : 
    2038           0 :         if (on_dl_rq(&p->dl))
    2039             :                 update_stats_wait_start_dl(dl_rq, dl_se);
    2040             : 
    2041           0 :         update_curr_dl(rq);
    2042             : 
    2043           0 :         update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2044           0 :         if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
    2045             :                 enqueue_pushable_dl_task(rq, p);
    2046           0 : }
    2047             : 
    2048             : /*
    2049             :  * scheduler tick hitting a task of our scheduling class.
    2050             :  *
    2051             :  * NOTE: This function can be called remotely by the tick offload that
    2052             :  * goes along full dynticks. Therefore no local assumption can be made
    2053             :  * and everything must be accessed through the @rq and @curr passed in
    2054             :  * parameters.
    2055             :  */
    2056           0 : static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
    2057             : {
    2058           0 :         update_curr_dl(rq);
    2059             : 
    2060           0 :         update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2061             :         /*
    2062             :          * Even when we have runtime, update_curr_dl() might have resulted in us
    2063             :          * not being the leftmost task anymore. In that case NEED_RESCHED will
    2064             :          * be set and schedule() will start a new hrtick for the next task.
    2065             :          */
    2066           0 :         if (hrtick_enabled_dl(rq) && queued && p->dl.runtime > 0 &&
    2067             :             is_leftmost(p, &rq->dl))
    2068             :                 start_hrtick_dl(rq, p);
    2069           0 : }
    2070             : 
    2071           0 : static void task_fork_dl(struct task_struct *p)
    2072             : {
    2073             :         /*
    2074             :          * SCHED_DEADLINE tasks cannot fork and this is achieved through
    2075             :          * sched_fork()
    2076             :          */
    2077           0 : }
    2078             : 
    2079             : #ifdef CONFIG_SMP
    2080             : 
    2081             : /* Only try algorithms three times */
    2082             : #define DL_MAX_TRIES 3
    2083             : 
    2084             : static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
    2085             : {
    2086             :         if (!task_on_cpu(rq, p) &&
    2087             :             cpumask_test_cpu(cpu, &p->cpus_mask))
    2088             :                 return 1;
    2089             :         return 0;
    2090             : }
    2091             : 
    2092             : /*
    2093             :  * Return the earliest pushable rq's task, which is suitable to be executed
    2094             :  * on the CPU, NULL otherwise:
    2095             :  */
    2096             : static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu)
    2097             : {
    2098             :         struct task_struct *p = NULL;
    2099             :         struct rb_node *next_node;
    2100             : 
    2101             :         if (!has_pushable_dl_tasks(rq))
    2102             :                 return NULL;
    2103             : 
    2104             :         next_node = rb_first_cached(&rq->dl.pushable_dl_tasks_root);
    2105             : 
    2106             : next_node:
    2107             :         if (next_node) {
    2108             :                 p = __node_2_pdl(next_node);
    2109             : 
    2110             :                 if (pick_dl_task(rq, p, cpu))
    2111             :                         return p;
    2112             : 
    2113             :                 next_node = rb_next(next_node);
    2114             :                 goto next_node;
    2115             :         }
    2116             : 
    2117             :         return NULL;
    2118             : }
    2119             : 
    2120             : static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
    2121             : 
    2122             : static int find_later_rq(struct task_struct *task)
    2123             : {
    2124             :         struct sched_domain *sd;
    2125             :         struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
    2126             :         int this_cpu = smp_processor_id();
    2127             :         int cpu = task_cpu(task);
    2128             : 
    2129             :         /* Make sure the mask is initialized first */
    2130             :         if (unlikely(!later_mask))
    2131             :                 return -1;
    2132             : 
    2133             :         if (task->nr_cpus_allowed == 1)
    2134             :                 return -1;
    2135             : 
    2136             :         /*
    2137             :          * We have to consider system topology and task affinity
    2138             :          * first, then we can look for a suitable CPU.
    2139             :          */
    2140             :         if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
    2141             :                 return -1;
    2142             : 
    2143             :         /*
    2144             :          * If we are here, some targets have been found, including
    2145             :          * the most suitable which is, among the runqueues where the
    2146             :          * current tasks have later deadlines than the task's one, the
    2147             :          * rq with the latest possible one.
    2148             :          *
    2149             :          * Now we check how well this matches with task's
    2150             :          * affinity and system topology.
    2151             :          *
    2152             :          * The last CPU where the task run is our first
    2153             :          * guess, since it is most likely cache-hot there.
    2154             :          */
    2155             :         if (cpumask_test_cpu(cpu, later_mask))
    2156             :                 return cpu;
    2157             :         /*
    2158             :          * Check if this_cpu is to be skipped (i.e., it is
    2159             :          * not in the mask) or not.
    2160             :          */
    2161             :         if (!cpumask_test_cpu(this_cpu, later_mask))
    2162             :                 this_cpu = -1;
    2163             : 
    2164             :         rcu_read_lock();
    2165             :         for_each_domain(cpu, sd) {
    2166             :                 if (sd->flags & SD_WAKE_AFFINE) {
    2167             :                         int best_cpu;
    2168             : 
    2169             :                         /*
    2170             :                          * If possible, preempting this_cpu is
    2171             :                          * cheaper than migrating.
    2172             :                          */
    2173             :                         if (this_cpu != -1 &&
    2174             :                             cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
    2175             :                                 rcu_read_unlock();
    2176             :                                 return this_cpu;
    2177             :                         }
    2178             : 
    2179             :                         best_cpu = cpumask_any_and_distribute(later_mask,
    2180             :                                                               sched_domain_span(sd));
    2181             :                         /*
    2182             :                          * Last chance: if a CPU being in both later_mask
    2183             :                          * and current sd span is valid, that becomes our
    2184             :                          * choice. Of course, the latest possible CPU is
    2185             :                          * already under consideration through later_mask.
    2186             :                          */
    2187             :                         if (best_cpu < nr_cpu_ids) {
    2188             :                                 rcu_read_unlock();
    2189             :                                 return best_cpu;
    2190             :                         }
    2191             :                 }
    2192             :         }
    2193             :         rcu_read_unlock();
    2194             : 
    2195             :         /*
    2196             :          * At this point, all our guesses failed, we just return
    2197             :          * 'something', and let the caller sort the things out.
    2198             :          */
    2199             :         if (this_cpu != -1)
    2200             :                 return this_cpu;
    2201             : 
    2202             :         cpu = cpumask_any_distribute(later_mask);
    2203             :         if (cpu < nr_cpu_ids)
    2204             :                 return cpu;
    2205             : 
    2206             :         return -1;
    2207             : }
    2208             : 
    2209             : /* Locks the rq it finds */
    2210             : static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
    2211             : {
    2212             :         struct rq *later_rq = NULL;
    2213             :         int tries;
    2214             :         int cpu;
    2215             : 
    2216             :         for (tries = 0; tries < DL_MAX_TRIES; tries++) {
    2217             :                 cpu = find_later_rq(task);
    2218             : 
    2219             :                 if ((cpu == -1) || (cpu == rq->cpu))
    2220             :                         break;
    2221             : 
    2222             :                 later_rq = cpu_rq(cpu);
    2223             : 
    2224             :                 if (!dl_task_is_earliest_deadline(task, later_rq)) {
    2225             :                         /*
    2226             :                          * Target rq has tasks of equal or earlier deadline,
    2227             :                          * retrying does not release any lock and is unlikely
    2228             :                          * to yield a different result.
    2229             :                          */
    2230             :                         later_rq = NULL;
    2231             :                         break;
    2232             :                 }
    2233             : 
    2234             :                 /* Retry if something changed. */
    2235             :                 if (double_lock_balance(rq, later_rq)) {
    2236             :                         if (unlikely(task_rq(task) != rq ||
    2237             :                                      !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
    2238             :                                      task_on_cpu(rq, task) ||
    2239             :                                      !dl_task(task) ||
    2240             :                                      is_migration_disabled(task) ||
    2241             :                                      !task_on_rq_queued(task))) {
    2242             :                                 double_unlock_balance(rq, later_rq);
    2243             :                                 later_rq = NULL;
    2244             :                                 break;
    2245             :                         }
    2246             :                 }
    2247             : 
    2248             :                 /*
    2249             :                  * If the rq we found has no -deadline task, or
    2250             :                  * its earliest one has a later deadline than our
    2251             :                  * task, the rq is a good one.
    2252             :                  */
    2253             :                 if (dl_task_is_earliest_deadline(task, later_rq))
    2254             :                         break;
    2255             : 
    2256             :                 /* Otherwise we try again. */
    2257             :                 double_unlock_balance(rq, later_rq);
    2258             :                 later_rq = NULL;
    2259             :         }
    2260             : 
    2261             :         return later_rq;
    2262             : }
    2263             : 
    2264             : static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
    2265             : {
    2266             :         struct task_struct *p;
    2267             : 
    2268             :         if (!has_pushable_dl_tasks(rq))
    2269             :                 return NULL;
    2270             : 
    2271             :         p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
    2272             : 
    2273             :         WARN_ON_ONCE(rq->cpu != task_cpu(p));
    2274             :         WARN_ON_ONCE(task_current(rq, p));
    2275             :         WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
    2276             : 
    2277             :         WARN_ON_ONCE(!task_on_rq_queued(p));
    2278             :         WARN_ON_ONCE(!dl_task(p));
    2279             : 
    2280             :         return p;
    2281             : }
    2282             : 
    2283             : /*
    2284             :  * See if the non running -deadline tasks on this rq
    2285             :  * can be sent to some other CPU where they can preempt
    2286             :  * and start executing.
    2287             :  */
    2288             : static int push_dl_task(struct rq *rq)
    2289             : {
    2290             :         struct task_struct *next_task;
    2291             :         struct rq *later_rq;
    2292             :         int ret = 0;
    2293             : 
    2294             :         if (!rq->dl.overloaded)
    2295             :                 return 0;
    2296             : 
    2297             :         next_task = pick_next_pushable_dl_task(rq);
    2298             :         if (!next_task)
    2299             :                 return 0;
    2300             : 
    2301             : retry:
    2302             :         /*
    2303             :          * If next_task preempts rq->curr, and rq->curr
    2304             :          * can move away, it makes sense to just reschedule
    2305             :          * without going further in pushing next_task.
    2306             :          */
    2307             :         if (dl_task(rq->curr) &&
    2308             :             dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
    2309             :             rq->curr->nr_cpus_allowed > 1) {
    2310             :                 resched_curr(rq);
    2311             :                 return 0;
    2312             :         }
    2313             : 
    2314             :         if (is_migration_disabled(next_task))
    2315             :                 return 0;
    2316             : 
    2317             :         if (WARN_ON(next_task == rq->curr))
    2318             :                 return 0;
    2319             : 
    2320             :         /* We might release rq lock */
    2321             :         get_task_struct(next_task);
    2322             : 
    2323             :         /* Will lock the rq it'll find */
    2324             :         later_rq = find_lock_later_rq(next_task, rq);
    2325             :         if (!later_rq) {
    2326             :                 struct task_struct *task;
    2327             : 
    2328             :                 /*
    2329             :                  * We must check all this again, since
    2330             :                  * find_lock_later_rq releases rq->lock and it is
    2331             :                  * then possible that next_task has migrated.
    2332             :                  */
    2333             :                 task = pick_next_pushable_dl_task(rq);
    2334             :                 if (task == next_task) {
    2335             :                         /*
    2336             :                          * The task is still there. We don't try
    2337             :                          * again, some other CPU will pull it when ready.
    2338             :                          */
    2339             :                         goto out;
    2340             :                 }
    2341             : 
    2342             :                 if (!task)
    2343             :                         /* No more tasks */
    2344             :                         goto out;
    2345             : 
    2346             :                 put_task_struct(next_task);
    2347             :                 next_task = task;
    2348             :                 goto retry;
    2349             :         }
    2350             : 
    2351             :         deactivate_task(rq, next_task, 0);
    2352             :         set_task_cpu(next_task, later_rq->cpu);
    2353             :         activate_task(later_rq, next_task, 0);
    2354             :         ret = 1;
    2355             : 
    2356             :         resched_curr(later_rq);
    2357             : 
    2358             :         double_unlock_balance(rq, later_rq);
    2359             : 
    2360             : out:
    2361             :         put_task_struct(next_task);
    2362             : 
    2363             :         return ret;
    2364             : }
    2365             : 
    2366             : static void push_dl_tasks(struct rq *rq)
    2367             : {
    2368             :         /* push_dl_task() will return true if it moved a -deadline task */
    2369             :         while (push_dl_task(rq))
    2370             :                 ;
    2371             : }
    2372             : 
    2373             : static void pull_dl_task(struct rq *this_rq)
    2374             : {
    2375             :         int this_cpu = this_rq->cpu, cpu;
    2376             :         struct task_struct *p, *push_task;
    2377             :         bool resched = false;
    2378             :         struct rq *src_rq;
    2379             :         u64 dmin = LONG_MAX;
    2380             : 
    2381             :         if (likely(!dl_overloaded(this_rq)))
    2382             :                 return;
    2383             : 
    2384             :         /*
    2385             :          * Match the barrier from dl_set_overloaded; this guarantees that if we
    2386             :          * see overloaded we must also see the dlo_mask bit.
    2387             :          */
    2388             :         smp_rmb();
    2389             : 
    2390             :         for_each_cpu(cpu, this_rq->rd->dlo_mask) {
    2391             :                 if (this_cpu == cpu)
    2392             :                         continue;
    2393             : 
    2394             :                 src_rq = cpu_rq(cpu);
    2395             : 
    2396             :                 /*
    2397             :                  * It looks racy, abd it is! However, as in sched_rt.c,
    2398             :                  * we are fine with this.
    2399             :                  */
    2400             :                 if (this_rq->dl.dl_nr_running &&
    2401             :                     dl_time_before(this_rq->dl.earliest_dl.curr,
    2402             :                                    src_rq->dl.earliest_dl.next))
    2403             :                         continue;
    2404             : 
    2405             :                 /* Might drop this_rq->lock */
    2406             :                 push_task = NULL;
    2407             :                 double_lock_balance(this_rq, src_rq);
    2408             : 
    2409             :                 /*
    2410             :                  * If there are no more pullable tasks on the
    2411             :                  * rq, we're done with it.
    2412             :                  */
    2413             :                 if (src_rq->dl.dl_nr_running <= 1)
    2414             :                         goto skip;
    2415             : 
    2416             :                 p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
    2417             : 
    2418             :                 /*
    2419             :                  * We found a task to be pulled if:
    2420             :                  *  - it preempts our current (if there's one),
    2421             :                  *  - it will preempt the last one we pulled (if any).
    2422             :                  */
    2423             :                 if (p && dl_time_before(p->dl.deadline, dmin) &&
    2424             :                     dl_task_is_earliest_deadline(p, this_rq)) {
    2425             :                         WARN_ON(p == src_rq->curr);
    2426             :                         WARN_ON(!task_on_rq_queued(p));
    2427             : 
    2428             :                         /*
    2429             :                          * Then we pull iff p has actually an earlier
    2430             :                          * deadline than the current task of its runqueue.
    2431             :                          */
    2432             :                         if (dl_time_before(p->dl.deadline,
    2433             :                                            src_rq->curr->dl.deadline))
    2434             :                                 goto skip;
    2435             : 
    2436             :                         if (is_migration_disabled(p)) {
    2437             :                                 push_task = get_push_task(src_rq);
    2438             :                         } else {
    2439             :                                 deactivate_task(src_rq, p, 0);
    2440             :                                 set_task_cpu(p, this_cpu);
    2441             :                                 activate_task(this_rq, p, 0);
    2442             :                                 dmin = p->dl.deadline;
    2443             :                                 resched = true;
    2444             :                         }
    2445             : 
    2446             :                         /* Is there any other task even earlier? */
    2447             :                 }
    2448             : skip:
    2449             :                 double_unlock_balance(this_rq, src_rq);
    2450             : 
    2451             :                 if (push_task) {
    2452             :                         raw_spin_rq_unlock(this_rq);
    2453             :                         stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
    2454             :                                             push_task, &src_rq->push_work);
    2455             :                         raw_spin_rq_lock(this_rq);
    2456             :                 }
    2457             :         }
    2458             : 
    2459             :         if (resched)
    2460             :                 resched_curr(this_rq);
    2461             : }
    2462             : 
    2463             : /*
    2464             :  * Since the task is not running and a reschedule is not going to happen
    2465             :  * anytime soon on its runqueue, we try pushing it away now.
    2466             :  */
    2467             : static void task_woken_dl(struct rq *rq, struct task_struct *p)
    2468             : {
    2469             :         if (!task_on_cpu(rq, p) &&
    2470             :             !test_tsk_need_resched(rq->curr) &&
    2471             :             p->nr_cpus_allowed > 1 &&
    2472             :             dl_task(rq->curr) &&
    2473             :             (rq->curr->nr_cpus_allowed < 2 ||
    2474             :              !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
    2475             :                 push_dl_tasks(rq);
    2476             :         }
    2477             : }
    2478             : 
    2479             : static void set_cpus_allowed_dl(struct task_struct *p,
    2480             :                                 struct affinity_context *ctx)
    2481             : {
    2482             :         struct root_domain *src_rd;
    2483             :         struct rq *rq;
    2484             : 
    2485             :         WARN_ON_ONCE(!dl_task(p));
    2486             : 
    2487             :         rq = task_rq(p);
    2488             :         src_rd = rq->rd;
    2489             :         /*
    2490             :          * Migrating a SCHED_DEADLINE task between exclusive
    2491             :          * cpusets (different root_domains) entails a bandwidth
    2492             :          * update. We already made space for us in the destination
    2493             :          * domain (see cpuset_can_attach()).
    2494             :          */
    2495             :         if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
    2496             :                 struct dl_bw *src_dl_b;
    2497             : 
    2498             :                 src_dl_b = dl_bw_of(cpu_of(rq));
    2499             :                 /*
    2500             :                  * We now free resources of the root_domain we are migrating
    2501             :                  * off. In the worst case, sched_setattr() may temporary fail
    2502             :                  * until we complete the update.
    2503             :                  */
    2504             :                 raw_spin_lock(&src_dl_b->lock);
    2505             :                 __dl_sub(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
    2506             :                 raw_spin_unlock(&src_dl_b->lock);
    2507             :         }
    2508             : 
    2509             :         set_cpus_allowed_common(p, ctx);
    2510             : }
    2511             : 
    2512             : /* Assumes rq->lock is held */
    2513             : static void rq_online_dl(struct rq *rq)
    2514             : {
    2515             :         if (rq->dl.overloaded)
    2516             :                 dl_set_overload(rq);
    2517             : 
    2518             :         cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
    2519             :         if (rq->dl.dl_nr_running > 0)
    2520             :                 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
    2521             : }
    2522             : 
    2523             : /* Assumes rq->lock is held */
    2524             : static void rq_offline_dl(struct rq *rq)
    2525             : {
    2526             :         if (rq->dl.overloaded)
    2527             :                 dl_clear_overload(rq);
    2528             : 
    2529             :         cpudl_clear(&rq->rd->cpudl, rq->cpu);
    2530             :         cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
    2531             : }
    2532             : 
    2533             : void __init init_sched_dl_class(void)
    2534             : {
    2535             :         unsigned int i;
    2536             : 
    2537             :         for_each_possible_cpu(i)
    2538             :                 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
    2539             :                                         GFP_KERNEL, cpu_to_node(i));
    2540             : }
    2541             : 
    2542             : void dl_add_task_root_domain(struct task_struct *p)
    2543             : {
    2544             :         struct rq_flags rf;
    2545             :         struct rq *rq;
    2546             :         struct dl_bw *dl_b;
    2547             : 
    2548             :         raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
    2549             :         if (!dl_task(p)) {
    2550             :                 raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags);
    2551             :                 return;
    2552             :         }
    2553             : 
    2554             :         rq = __task_rq_lock(p, &rf);
    2555             : 
    2556             :         dl_b = &rq->rd->dl_bw;
    2557             :         raw_spin_lock(&dl_b->lock);
    2558             : 
    2559             :         __dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
    2560             : 
    2561             :         raw_spin_unlock(&dl_b->lock);
    2562             : 
    2563             :         task_rq_unlock(rq, p, &rf);
    2564             : }
    2565             : 
    2566             : void dl_clear_root_domain(struct root_domain *rd)
    2567             : {
    2568             :         unsigned long flags;
    2569             : 
    2570             :         raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
    2571             :         rd->dl_bw.total_bw = 0;
    2572             :         raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
    2573             : }
    2574             : 
    2575             : #endif /* CONFIG_SMP */
    2576             : 
    2577           0 : static void switched_from_dl(struct rq *rq, struct task_struct *p)
    2578             : {
    2579             :         /*
    2580             :          * task_non_contending() can start the "inactive timer" (if the 0-lag
    2581             :          * time is in the future). If the task switches back to dl before
    2582             :          * the "inactive timer" fires, it can continue to consume its current
    2583             :          * runtime using its current deadline. If it stays outside of
    2584             :          * SCHED_DEADLINE until the 0-lag time passes, inactive_task_timer()
    2585             :          * will reset the task parameters.
    2586             :          */
    2587           0 :         if (task_on_rq_queued(p) && p->dl.dl_runtime)
    2588           0 :                 task_non_contending(p);
    2589             : 
    2590             :         /*
    2591             :          * In case a task is setscheduled out from SCHED_DEADLINE we need to
    2592             :          * keep track of that on its cpuset (for correct bandwidth tracking).
    2593             :          */
    2594           0 :         dec_dl_tasks_cs(p);
    2595             : 
    2596           0 :         if (!task_on_rq_queued(p)) {
    2597             :                 /*
    2598             :                  * Inactive timer is armed. However, p is leaving DEADLINE and
    2599             :                  * might migrate away from this rq while continuing to run on
    2600             :                  * some other class. We need to remove its contribution from
    2601             :                  * this rq running_bw now, or sub_rq_bw (below) will complain.
    2602             :                  */
    2603           0 :                 if (p->dl.dl_non_contending)
    2604           0 :                         sub_running_bw(&p->dl, &rq->dl);
    2605           0 :                 sub_rq_bw(&p->dl, &rq->dl);
    2606             :         }
    2607             : 
    2608             :         /*
    2609             :          * We cannot use inactive_task_timer() to invoke sub_running_bw()
    2610             :          * at the 0-lag time, because the task could have been migrated
    2611             :          * while SCHED_OTHER in the meanwhile.
    2612             :          */
    2613           0 :         if (p->dl.dl_non_contending)
    2614           0 :                 p->dl.dl_non_contending = 0;
    2615             : 
    2616             :         /*
    2617             :          * Since this might be the only -deadline task on the rq,
    2618             :          * this is the right place to try to pull some other one
    2619             :          * from an overloaded CPU, if any.
    2620             :          */
    2621           0 :         if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
    2622             :                 return;
    2623             : 
    2624             :         deadline_queue_pull_task(rq);
    2625             : }
    2626             : 
    2627             : /*
    2628             :  * When switching to -deadline, we may overload the rq, then
    2629             :  * we try to push someone off, if possible.
    2630             :  */
    2631           0 : static void switched_to_dl(struct rq *rq, struct task_struct *p)
    2632             : {
    2633           0 :         if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
    2634           0 :                 put_task_struct(p);
    2635             : 
    2636             :         /*
    2637             :          * In case a task is setscheduled to SCHED_DEADLINE we need to keep
    2638             :          * track of that on its cpuset (for correct bandwidth tracking).
    2639             :          */
    2640           0 :         inc_dl_tasks_cs(p);
    2641             : 
    2642             :         /* If p is not queued we will update its parameters at next wakeup. */
    2643           0 :         if (!task_on_rq_queued(p)) {
    2644           0 :                 add_rq_bw(&p->dl, &rq->dl);
    2645             : 
    2646             :                 return;
    2647             :         }
    2648             : 
    2649           0 :         if (rq->curr != p) {
    2650             : #ifdef CONFIG_SMP
    2651             :                 if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
    2652             :                         deadline_queue_push_tasks(rq);
    2653             : #endif
    2654           0 :                 if (dl_task(rq->curr))
    2655             :                         check_preempt_curr_dl(rq, p, 0);
    2656             :                 else
    2657           0 :                         resched_curr(rq);
    2658             :         } else {
    2659             :                 update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    2660             :         }
    2661             : }
    2662             : 
    2663             : /*
    2664             :  * If the scheduling parameters of a -deadline task changed,
    2665             :  * a push or pull operation might be needed.
    2666             :  */
    2667           0 : static void prio_changed_dl(struct rq *rq, struct task_struct *p,
    2668             :                             int oldprio)
    2669             : {
    2670           0 :         if (!task_on_rq_queued(p))
    2671             :                 return;
    2672             : 
    2673             : #ifdef CONFIG_SMP
    2674             :         /*
    2675             :          * This might be too much, but unfortunately
    2676             :          * we don't have the old deadline value, and
    2677             :          * we can't argue if the task is increasing
    2678             :          * or lowering its prio, so...
    2679             :          */
    2680             :         if (!rq->dl.overloaded)
    2681             :                 deadline_queue_pull_task(rq);
    2682             : 
    2683             :         if (task_current(rq, p)) {
    2684             :                 /*
    2685             :                  * If we now have a earlier deadline task than p,
    2686             :                  * then reschedule, provided p is still on this
    2687             :                  * runqueue.
    2688             :                  */
    2689             :                 if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline))
    2690             :                         resched_curr(rq);
    2691             :         } else {
    2692             :                 /*
    2693             :                  * Current may not be deadline in case p was throttled but we
    2694             :                  * have just replenished it (e.g. rt_mutex_setprio()).
    2695             :                  *
    2696             :                  * Otherwise, if p was given an earlier deadline, reschedule.
    2697             :                  */
    2698             :                 if (!dl_task(rq->curr) ||
    2699             :                     dl_time_before(p->dl.deadline, rq->curr->dl.deadline))
    2700             :                         resched_curr(rq);
    2701             :         }
    2702             : #else
    2703             :         /*
    2704             :          * We don't know if p has a earlier or later deadline, so let's blindly
    2705             :          * set a (maybe not needed) rescheduling point.
    2706             :          */
    2707           0 :         resched_curr(rq);
    2708             : #endif
    2709             : }
    2710             : 
    2711             : #ifdef CONFIG_SCHED_CORE
    2712             : static int task_is_throttled_dl(struct task_struct *p, int cpu)
    2713             : {
    2714             :         return p->dl.dl_throttled;
    2715             : }
    2716             : #endif
    2717             : 
    2718             : DEFINE_SCHED_CLASS(dl) = {
    2719             : 
    2720             :         .enqueue_task           = enqueue_task_dl,
    2721             :         .dequeue_task           = dequeue_task_dl,
    2722             :         .yield_task             = yield_task_dl,
    2723             : 
    2724             :         .check_preempt_curr     = check_preempt_curr_dl,
    2725             : 
    2726             :         .pick_next_task         = pick_next_task_dl,
    2727             :         .put_prev_task          = put_prev_task_dl,
    2728             :         .set_next_task          = set_next_task_dl,
    2729             : 
    2730             : #ifdef CONFIG_SMP
    2731             :         .balance                = balance_dl,
    2732             :         .pick_task              = pick_task_dl,
    2733             :         .select_task_rq         = select_task_rq_dl,
    2734             :         .migrate_task_rq        = migrate_task_rq_dl,
    2735             :         .set_cpus_allowed       = set_cpus_allowed_dl,
    2736             :         .rq_online              = rq_online_dl,
    2737             :         .rq_offline             = rq_offline_dl,
    2738             :         .task_woken             = task_woken_dl,
    2739             :         .find_lock_rq           = find_lock_later_rq,
    2740             : #endif
    2741             : 
    2742             :         .task_tick              = task_tick_dl,
    2743             :         .task_fork              = task_fork_dl,
    2744             : 
    2745             :         .prio_changed           = prio_changed_dl,
    2746             :         .switched_from          = switched_from_dl,
    2747             :         .switched_to            = switched_to_dl,
    2748             : 
    2749             :         .update_curr            = update_curr_dl,
    2750             : #ifdef CONFIG_SCHED_CORE
    2751             :         .task_is_throttled      = task_is_throttled_dl,
    2752             : #endif
    2753             : };
    2754             : 
    2755             : /* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
    2756             : static u64 dl_generation;
    2757             : 
    2758           0 : int sched_dl_global_validate(void)
    2759             : {
    2760           0 :         u64 runtime = global_rt_runtime();
    2761           0 :         u64 period = global_rt_period();
    2762           0 :         u64 new_bw = to_ratio(period, runtime);
    2763           0 :         u64 gen = ++dl_generation;
    2764             :         struct dl_bw *dl_b;
    2765           0 :         int cpu, cpus, ret = 0;
    2766             :         unsigned long flags;
    2767             : 
    2768             :         /*
    2769             :          * Here we want to check the bandwidth not being set to some
    2770             :          * value smaller than the currently allocated bandwidth in
    2771             :          * any of the root_domains.
    2772             :          */
    2773           0 :         for_each_possible_cpu(cpu) {
    2774             :                 rcu_read_lock_sched();
    2775             : 
    2776           0 :                 if (dl_bw_visited(cpu, gen))
    2777             :                         goto next;
    2778             : 
    2779           0 :                 dl_b = dl_bw_of(cpu);
    2780           0 :                 cpus = dl_bw_cpus(cpu);
    2781             : 
    2782           0 :                 raw_spin_lock_irqsave(&dl_b->lock, flags);
    2783           0 :                 if (new_bw * cpus < dl_b->total_bw)
    2784           0 :                         ret = -EBUSY;
    2785           0 :                 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    2786             : 
    2787             : next:
    2788             :                 rcu_read_unlock_sched();
    2789             : 
    2790           0 :                 if (ret)
    2791             :                         break;
    2792             :         }
    2793             : 
    2794           0 :         return ret;
    2795             : }
    2796             : 
    2797           1 : static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
    2798             : {
    2799           1 :         if (global_rt_runtime() == RUNTIME_INF) {
    2800           0 :                 dl_rq->bw_ratio = 1 << RATIO_SHIFT;
    2801           0 :                 dl_rq->max_bw = dl_rq->extra_bw = 1 << BW_SHIFT;
    2802             :         } else {
    2803           2 :                 dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
    2804           1 :                           global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
    2805           1 :                 dl_rq->max_bw = dl_rq->extra_bw =
    2806           1 :                         to_ratio(global_rt_period(), global_rt_runtime());
    2807             :         }
    2808           1 : }
    2809             : 
    2810           0 : void sched_dl_do_global(void)
    2811             : {
    2812           0 :         u64 new_bw = -1;
    2813           0 :         u64 gen = ++dl_generation;
    2814             :         struct dl_bw *dl_b;
    2815             :         int cpu;
    2816             :         unsigned long flags;
    2817             : 
    2818           0 :         if (global_rt_runtime() != RUNTIME_INF)
    2819           0 :                 new_bw = to_ratio(global_rt_period(), global_rt_runtime());
    2820             : 
    2821           0 :         for_each_possible_cpu(cpu) {
    2822             :                 rcu_read_lock_sched();
    2823             : 
    2824           0 :                 if (dl_bw_visited(cpu, gen)) {
    2825             :                         rcu_read_unlock_sched();
    2826             :                         continue;
    2827             :                 }
    2828             : 
    2829           0 :                 dl_b = dl_bw_of(cpu);
    2830             : 
    2831           0 :                 raw_spin_lock_irqsave(&dl_b->lock, flags);
    2832           0 :                 dl_b->bw = new_bw;
    2833           0 :                 raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    2834             : 
    2835             :                 rcu_read_unlock_sched();
    2836           0 :                 init_dl_rq_bw_ratio(&cpu_rq(cpu)->dl);
    2837             :         }
    2838           0 : }
    2839             : 
    2840             : /*
    2841             :  * We must be sure that accepting a new task (or allowing changing the
    2842             :  * parameters of an existing one) is consistent with the bandwidth
    2843             :  * constraints. If yes, this function also accordingly updates the currently
    2844             :  * allocated bandwidth to reflect the new situation.
    2845             :  *
    2846             :  * This function is called while holding p's rq->lock.
    2847             :  */
    2848           0 : int sched_dl_overflow(struct task_struct *p, int policy,
    2849             :                       const struct sched_attr *attr)
    2850             : {
    2851           0 :         u64 period = attr->sched_period ?: attr->sched_deadline;
    2852           0 :         u64 runtime = attr->sched_runtime;
    2853           0 :         u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
    2854           0 :         int cpus, err = -1, cpu = task_cpu(p);
    2855           0 :         struct dl_bw *dl_b = dl_bw_of(cpu);
    2856             :         unsigned long cap;
    2857             : 
    2858           0 :         if (attr->sched_flags & SCHED_FLAG_SUGOV)
    2859             :                 return 0;
    2860             : 
    2861             :         /* !deadline task may carry old deadline bandwidth */
    2862           0 :         if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
    2863             :                 return 0;
    2864             : 
    2865             :         /*
    2866             :          * Either if a task, enters, leave, or stays -deadline but changes
    2867             :          * its parameters, we may need to update accordingly the total
    2868             :          * allocated bandwidth of the container.
    2869             :          */
    2870           0 :         raw_spin_lock(&dl_b->lock);
    2871           0 :         cpus = dl_bw_cpus(cpu);
    2872           0 :         cap = dl_bw_capacity(cpu);
    2873             : 
    2874           0 :         if (dl_policy(policy) && !task_has_dl_policy(p) &&
    2875           0 :             !__dl_overflow(dl_b, cap, 0, new_bw)) {
    2876           0 :                 if (hrtimer_active(&p->dl.inactive_timer))
    2877           0 :                         __dl_sub(dl_b, p->dl.dl_bw, cpus);
    2878           0 :                 __dl_add(dl_b, new_bw, cpus);
    2879           0 :                 err = 0;
    2880           0 :         } else if (dl_policy(policy) && task_has_dl_policy(p) &&
    2881           0 :                    !__dl_overflow(dl_b, cap, p->dl.dl_bw, new_bw)) {
    2882             :                 /*
    2883             :                  * XXX this is slightly incorrect: when the task
    2884             :                  * utilization decreases, we should delay the total
    2885             :                  * utilization change until the task's 0-lag point.
    2886             :                  * But this would require to set the task's "inactive
    2887             :                  * timer" when the task is not inactive.
    2888             :                  */
    2889           0 :                 __dl_sub(dl_b, p->dl.dl_bw, cpus);
    2890           0 :                 __dl_add(dl_b, new_bw, cpus);
    2891           0 :                 dl_change_utilization(p, new_bw);
    2892           0 :                 err = 0;
    2893           0 :         } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
    2894             :                 /*
    2895             :                  * Do not decrease the total deadline utilization here,
    2896             :                  * switched_from_dl() will take care to do it at the correct
    2897             :                  * (0-lag) time.
    2898             :                  */
    2899           0 :                 err = 0;
    2900             :         }
    2901           0 :         raw_spin_unlock(&dl_b->lock);
    2902             : 
    2903           0 :         return err;
    2904             : }
    2905             : 
    2906             : /*
    2907             :  * This function initializes the sched_dl_entity of a newly becoming
    2908             :  * SCHED_DEADLINE task.
    2909             :  *
    2910             :  * Only the static values are considered here, the actual runtime and the
    2911             :  * absolute deadline will be properly calculated when the task is enqueued
    2912             :  * for the first time with its new policy.
    2913             :  */
    2914           0 : void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
    2915             : {
    2916           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2917             : 
    2918           0 :         dl_se->dl_runtime = attr->sched_runtime;
    2919           0 :         dl_se->dl_deadline = attr->sched_deadline;
    2920           0 :         dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
    2921           0 :         dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
    2922           0 :         dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
    2923           0 :         dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
    2924           0 : }
    2925             : 
    2926           0 : void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
    2927             : {
    2928           0 :         struct sched_dl_entity *dl_se = &p->dl;
    2929             : 
    2930           0 :         attr->sched_priority = p->rt_priority;
    2931           0 :         attr->sched_runtime = dl_se->dl_runtime;
    2932           0 :         attr->sched_deadline = dl_se->dl_deadline;
    2933           0 :         attr->sched_period = dl_se->dl_period;
    2934           0 :         attr->sched_flags &= ~SCHED_DL_FLAGS;
    2935           0 :         attr->sched_flags |= dl_se->flags;
    2936           0 : }
    2937             : 
    2938             : /*
    2939             :  * This function validates the new parameters of a -deadline task.
    2940             :  * We ask for the deadline not being zero, and greater or equal
    2941             :  * than the runtime, as well as the period of being zero or
    2942             :  * greater than deadline. Furthermore, we have to be sure that
    2943             :  * user parameters are above the internal resolution of 1us (we
    2944             :  * check sched_runtime only since it is always the smaller one) and
    2945             :  * below 2^63 ns (we have to check both sched_deadline and
    2946             :  * sched_period, as the latter can be zero).
    2947             :  */
    2948           0 : bool __checkparam_dl(const struct sched_attr *attr)
    2949             : {
    2950             :         u64 period, max, min;
    2951             : 
    2952             :         /* special dl tasks don't actually use any parameter */
    2953           0 :         if (attr->sched_flags & SCHED_FLAG_SUGOV)
    2954             :                 return true;
    2955             : 
    2956             :         /* deadline != 0 */
    2957           0 :         if (attr->sched_deadline == 0)
    2958             :                 return false;
    2959             : 
    2960             :         /*
    2961             :          * Since we truncate DL_SCALE bits, make sure we're at least
    2962             :          * that big.
    2963             :          */
    2964           0 :         if (attr->sched_runtime < (1ULL << DL_SCALE))
    2965             :                 return false;
    2966             : 
    2967             :         /*
    2968             :          * Since we use the MSB for wrap-around and sign issues, make
    2969             :          * sure it's not set (mind that period can be equal to zero).
    2970             :          */
    2971           0 :         if (attr->sched_deadline & (1ULL << 63) ||
    2972           0 :             attr->sched_period & (1ULL << 63))
    2973             :                 return false;
    2974             : 
    2975           0 :         period = attr->sched_period;
    2976           0 :         if (!period)
    2977           0 :                 period = attr->sched_deadline;
    2978             : 
    2979             :         /* runtime <= deadline <= period (if period != 0) */
    2980           0 :         if (period < attr->sched_deadline ||
    2981             :             attr->sched_deadline < attr->sched_runtime)
    2982             :                 return false;
    2983             : 
    2984           0 :         max = (u64)READ_ONCE(sysctl_sched_dl_period_max) * NSEC_PER_USEC;
    2985           0 :         min = (u64)READ_ONCE(sysctl_sched_dl_period_min) * NSEC_PER_USEC;
    2986             : 
    2987           0 :         if (period < min || period > max)
    2988             :                 return false;
    2989             : 
    2990           0 :         return true;
    2991             : }
    2992             : 
    2993             : /*
    2994             :  * This function clears the sched_dl_entity static params.
    2995             :  */
    2996         176 : void __dl_clear_params(struct task_struct *p)
    2997             : {
    2998         176 :         struct sched_dl_entity *dl_se = &p->dl;
    2999             : 
    3000         176 :         dl_se->dl_runtime            = 0;
    3001         176 :         dl_se->dl_deadline           = 0;
    3002         176 :         dl_se->dl_period             = 0;
    3003         176 :         dl_se->flags                 = 0;
    3004         176 :         dl_se->dl_bw                 = 0;
    3005         176 :         dl_se->dl_density            = 0;
    3006             : 
    3007         176 :         dl_se->dl_throttled          = 0;
    3008         176 :         dl_se->dl_yielded            = 0;
    3009         176 :         dl_se->dl_non_contending     = 0;
    3010         176 :         dl_se->dl_overrun            = 0;
    3011             : 
    3012             : #ifdef CONFIG_RT_MUTEXES
    3013         176 :         dl_se->pi_se                 = dl_se;
    3014             : #endif
    3015         176 : }
    3016             : 
    3017           0 : bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
    3018             : {
    3019           0 :         struct sched_dl_entity *dl_se = &p->dl;
    3020             : 
    3021           0 :         if (dl_se->dl_runtime != attr->sched_runtime ||
    3022           0 :             dl_se->dl_deadline != attr->sched_deadline ||
    3023           0 :             dl_se->dl_period != attr->sched_period ||
    3024           0 :             dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
    3025             :                 return true;
    3026             : 
    3027           0 :         return false;
    3028             : }
    3029             : 
    3030             : #ifdef CONFIG_SMP
    3031             : int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
    3032             :                                  const struct cpumask *trial)
    3033             : {
    3034             :         unsigned long flags, cap;
    3035             :         struct dl_bw *cur_dl_b;
    3036             :         int ret = 1;
    3037             : 
    3038             :         rcu_read_lock_sched();
    3039             :         cur_dl_b = dl_bw_of(cpumask_any(cur));
    3040             :         cap = __dl_bw_capacity(trial);
    3041             :         raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
    3042             :         if (__dl_overflow(cur_dl_b, cap, 0, 0))
    3043             :                 ret = 0;
    3044             :         raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
    3045             :         rcu_read_unlock_sched();
    3046             : 
    3047             :         return ret;
    3048             : }
    3049             : 
    3050             : enum dl_bw_request {
    3051             :         dl_bw_req_check_overflow = 0,
    3052             :         dl_bw_req_alloc,
    3053             :         dl_bw_req_free
    3054             : };
    3055             : 
    3056             : static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
    3057             : {
    3058             :         unsigned long flags;
    3059             :         struct dl_bw *dl_b;
    3060             :         bool overflow = 0;
    3061             : 
    3062             :         rcu_read_lock_sched();
    3063             :         dl_b = dl_bw_of(cpu);
    3064             :         raw_spin_lock_irqsave(&dl_b->lock, flags);
    3065             : 
    3066             :         if (req == dl_bw_req_free) {
    3067             :                 __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
    3068             :         } else {
    3069             :                 unsigned long cap = dl_bw_capacity(cpu);
    3070             : 
    3071             :                 overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
    3072             : 
    3073             :                 if (req == dl_bw_req_alloc && !overflow) {
    3074             :                         /*
    3075             :                          * We reserve space in the destination
    3076             :                          * root_domain, as we can't fail after this point.
    3077             :                          * We will free resources in the source root_domain
    3078             :                          * later on (see set_cpus_allowed_dl()).
    3079             :                          */
    3080             :                         __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
    3081             :                 }
    3082             :         }
    3083             : 
    3084             :         raw_spin_unlock_irqrestore(&dl_b->lock, flags);
    3085             :         rcu_read_unlock_sched();
    3086             : 
    3087             :         return overflow ? -EBUSY : 0;
    3088             : }
    3089             : 
    3090             : int dl_bw_check_overflow(int cpu)
    3091             : {
    3092             :         return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
    3093             : }
    3094             : 
    3095             : int dl_bw_alloc(int cpu, u64 dl_bw)
    3096             : {
    3097             :         return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
    3098             : }
    3099             : 
    3100             : void dl_bw_free(int cpu, u64 dl_bw)
    3101             : {
    3102             :         dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
    3103             : }
    3104             : #endif
    3105             : 
    3106             : #ifdef CONFIG_SCHED_DEBUG
    3107             : void print_dl_stats(struct seq_file *m, int cpu)
    3108             : {
    3109             :         print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
    3110             : }
    3111             : #endif /* CONFIG_SCHED_DEBUG */

Generated by: LCOV version 1.14