LCOV - code coverage report
Current view: top level - kernel/sched - rt.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 20 383 5.2 %
Date: 2023-07-19 18:55:55 Functions: 3 39 7.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
       4             :  * policies)
       5             :  */
       6             : 
       7             : int sched_rr_timeslice = RR_TIMESLICE;
       8             : /* More than 4 hours if BW_SHIFT equals 20. */
       9             : static const u64 max_rt_runtime = MAX_BW;
      10             : 
      11             : static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
      12             : 
      13             : struct rt_bandwidth def_rt_bandwidth;
      14             : 
      15             : /*
      16             :  * period over which we measure -rt task CPU usage in us.
      17             :  * default: 1s
      18             :  */
      19             : unsigned int sysctl_sched_rt_period = 1000000;
      20             : 
      21             : /*
      22             :  * part of the period that we allow rt tasks to run in us.
      23             :  * default: 0.95s
      24             :  */
      25             : int sysctl_sched_rt_runtime = 950000;
      26             : 
      27             : #ifdef CONFIG_SYSCTL
      28             : static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
      29             : static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
      30             :                 size_t *lenp, loff_t *ppos);
      31             : static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
      32             :                 size_t *lenp, loff_t *ppos);
      33             : static struct ctl_table sched_rt_sysctls[] = {
      34             :         {
      35             :                 .procname       = "sched_rt_period_us",
      36             :                 .data           = &sysctl_sched_rt_period,
      37             :                 .maxlen         = sizeof(unsigned int),
      38             :                 .mode           = 0644,
      39             :                 .proc_handler   = sched_rt_handler,
      40             :         },
      41             :         {
      42             :                 .procname       = "sched_rt_runtime_us",
      43             :                 .data           = &sysctl_sched_rt_runtime,
      44             :                 .maxlen         = sizeof(int),
      45             :                 .mode           = 0644,
      46             :                 .proc_handler   = sched_rt_handler,
      47             :         },
      48             :         {
      49             :                 .procname       = "sched_rr_timeslice_ms",
      50             :                 .data           = &sysctl_sched_rr_timeslice,
      51             :                 .maxlen         = sizeof(int),
      52             :                 .mode           = 0644,
      53             :                 .proc_handler   = sched_rr_handler,
      54             :         },
      55             :         {}
      56             : };
      57             : 
      58           1 : static int __init sched_rt_sysctl_init(void)
      59             : {
      60           1 :         register_sysctl_init("kernel", sched_rt_sysctls);
      61           1 :         return 0;
      62             : }
      63             : late_initcall(sched_rt_sysctl_init);
      64             : #endif
      65             : 
      66           0 : static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
      67             : {
      68           0 :         struct rt_bandwidth *rt_b =
      69           0 :                 container_of(timer, struct rt_bandwidth, rt_period_timer);
      70           0 :         int idle = 0;
      71             :         int overrun;
      72             : 
      73           0 :         raw_spin_lock(&rt_b->rt_runtime_lock);
      74             :         for (;;) {
      75           0 :                 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
      76           0 :                 if (!overrun)
      77             :                         break;
      78             : 
      79           0 :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
      80           0 :                 idle = do_sched_rt_period_timer(rt_b, overrun);
      81           0 :                 raw_spin_lock(&rt_b->rt_runtime_lock);
      82             :         }
      83           0 :         if (idle)
      84           0 :                 rt_b->rt_period_active = 0;
      85           0 :         raw_spin_unlock(&rt_b->rt_runtime_lock);
      86             : 
      87           0 :         return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
      88             : }
      89             : 
      90           1 : void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
      91             : {
      92           1 :         rt_b->rt_period = ns_to_ktime(period);
      93           1 :         rt_b->rt_runtime = runtime;
      94             : 
      95             :         raw_spin_lock_init(&rt_b->rt_runtime_lock);
      96             : 
      97           1 :         hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC,
      98             :                      HRTIMER_MODE_REL_HARD);
      99           1 :         rt_b->rt_period_timer.function = sched_rt_period_timer;
     100           1 : }
     101             : 
     102           0 : static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
     103             : {
     104           0 :         raw_spin_lock(&rt_b->rt_runtime_lock);
     105           0 :         if (!rt_b->rt_period_active) {
     106           0 :                 rt_b->rt_period_active = 1;
     107             :                 /*
     108             :                  * SCHED_DEADLINE updates the bandwidth, as a run away
     109             :                  * RT task with a DL task could hog a CPU. But DL does
     110             :                  * not reset the period. If a deadline task was running
     111             :                  * without an RT task running, it can cause RT tasks to
     112             :                  * throttle when they start up. Kick the timer right away
     113             :                  * to update the period.
     114             :                  */
     115           0 :                 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
     116           0 :                 hrtimer_start_expires(&rt_b->rt_period_timer,
     117             :                                       HRTIMER_MODE_ABS_PINNED_HARD);
     118             :         }
     119           0 :         raw_spin_unlock(&rt_b->rt_runtime_lock);
     120           0 : }
     121             : 
     122             : static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
     123             : {
     124           0 :         if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
     125             :                 return;
     126             : 
     127           0 :         do_start_rt_bandwidth(rt_b);
     128             : }
     129             : 
     130           1 : void init_rt_rq(struct rt_rq *rt_rq)
     131             : {
     132             :         struct rt_prio_array *array;
     133             :         int i;
     134             : 
     135           1 :         array = &rt_rq->active;
     136         101 :         for (i = 0; i < MAX_RT_PRIO; i++) {
     137         200 :                 INIT_LIST_HEAD(array->queue + i);
     138         200 :                 __clear_bit(i, array->bitmap);
     139             :         }
     140             :         /* delimiter for bitsearch: */
     141           2 :         __set_bit(MAX_RT_PRIO, array->bitmap);
     142             : 
     143             : #if defined CONFIG_SMP
     144             :         rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
     145             :         rt_rq->highest_prio.next = MAX_RT_PRIO-1;
     146             :         rt_rq->rt_nr_migratory = 0;
     147             :         rt_rq->overloaded = 0;
     148             :         plist_head_init(&rt_rq->pushable_tasks);
     149             : #endif /* CONFIG_SMP */
     150             :         /* We start is dequeued state, because no RT tasks are queued */
     151           1 :         rt_rq->rt_queued = 0;
     152             : 
     153           1 :         rt_rq->rt_time = 0;
     154           1 :         rt_rq->rt_throttled = 0;
     155           1 :         rt_rq->rt_runtime = 0;
     156             :         raw_spin_lock_init(&rt_rq->rt_runtime_lock);
     157           1 : }
     158             : 
     159             : #ifdef CONFIG_RT_GROUP_SCHED
     160             : static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
     161             : {
     162             :         hrtimer_cancel(&rt_b->rt_period_timer);
     163             : }
     164             : 
     165             : #define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
     166             : 
     167             : static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
     168             : {
     169             : #ifdef CONFIG_SCHED_DEBUG
     170             :         WARN_ON_ONCE(!rt_entity_is_task(rt_se));
     171             : #endif
     172             :         return container_of(rt_se, struct task_struct, rt);
     173             : }
     174             : 
     175             : static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
     176             : {
     177             :         return rt_rq->rq;
     178             : }
     179             : 
     180             : static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
     181             : {
     182             :         return rt_se->rt_rq;
     183             : }
     184             : 
     185             : static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
     186             : {
     187             :         struct rt_rq *rt_rq = rt_se->rt_rq;
     188             : 
     189             :         return rt_rq->rq;
     190             : }
     191             : 
     192             : void unregister_rt_sched_group(struct task_group *tg)
     193             : {
     194             :         if (tg->rt_se)
     195             :                 destroy_rt_bandwidth(&tg->rt_bandwidth);
     196             : 
     197             : }
     198             : 
     199             : void free_rt_sched_group(struct task_group *tg)
     200             : {
     201             :         int i;
     202             : 
     203             :         for_each_possible_cpu(i) {
     204             :                 if (tg->rt_rq)
     205             :                         kfree(tg->rt_rq[i]);
     206             :                 if (tg->rt_se)
     207             :                         kfree(tg->rt_se[i]);
     208             :         }
     209             : 
     210             :         kfree(tg->rt_rq);
     211             :         kfree(tg->rt_se);
     212             : }
     213             : 
     214             : void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
     215             :                 struct sched_rt_entity *rt_se, int cpu,
     216             :                 struct sched_rt_entity *parent)
     217             : {
     218             :         struct rq *rq = cpu_rq(cpu);
     219             : 
     220             :         rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
     221             :         rt_rq->rt_nr_boosted = 0;
     222             :         rt_rq->rq = rq;
     223             :         rt_rq->tg = tg;
     224             : 
     225             :         tg->rt_rq[cpu] = rt_rq;
     226             :         tg->rt_se[cpu] = rt_se;
     227             : 
     228             :         if (!rt_se)
     229             :                 return;
     230             : 
     231             :         if (!parent)
     232             :                 rt_se->rt_rq = &rq->rt;
     233             :         else
     234             :                 rt_se->rt_rq = parent->my_q;
     235             : 
     236             :         rt_se->my_q = rt_rq;
     237             :         rt_se->parent = parent;
     238             :         INIT_LIST_HEAD(&rt_se->run_list);
     239             : }
     240             : 
     241             : int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
     242             : {
     243             :         struct rt_rq *rt_rq;
     244             :         struct sched_rt_entity *rt_se;
     245             :         int i;
     246             : 
     247             :         tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
     248             :         if (!tg->rt_rq)
     249             :                 goto err;
     250             :         tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
     251             :         if (!tg->rt_se)
     252             :                 goto err;
     253             : 
     254             :         init_rt_bandwidth(&tg->rt_bandwidth,
     255             :                         ktime_to_ns(def_rt_bandwidth.rt_period), 0);
     256             : 
     257             :         for_each_possible_cpu(i) {
     258             :                 rt_rq = kzalloc_node(sizeof(struct rt_rq),
     259             :                                      GFP_KERNEL, cpu_to_node(i));
     260             :                 if (!rt_rq)
     261             :                         goto err;
     262             : 
     263             :                 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
     264             :                                      GFP_KERNEL, cpu_to_node(i));
     265             :                 if (!rt_se)
     266             :                         goto err_free_rq;
     267             : 
     268             :                 init_rt_rq(rt_rq);
     269             :                 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
     270             :                 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
     271             :         }
     272             : 
     273             :         return 1;
     274             : 
     275             : err_free_rq:
     276             :         kfree(rt_rq);
     277             : err:
     278             :         return 0;
     279             : }
     280             : 
     281             : #else /* CONFIG_RT_GROUP_SCHED */
     282             : 
     283             : #define rt_entity_is_task(rt_se) (1)
     284             : 
     285             : static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
     286             : {
     287           0 :         return container_of(rt_se, struct task_struct, rt);
     288             : }
     289             : 
     290             : static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
     291             : {
     292           0 :         return container_of(rt_rq, struct rq, rt);
     293             : }
     294             : 
     295             : static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
     296             : {
     297           0 :         struct task_struct *p = rt_task_of(rt_se);
     298             : 
     299           0 :         return task_rq(p);
     300             : }
     301             : 
     302             : static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
     303             : {
     304           0 :         struct rq *rq = rq_of_rt_se(rt_se);
     305             : 
     306             :         return &rq->rt;
     307             : }
     308             : 
     309           0 : void unregister_rt_sched_group(struct task_group *tg) { }
     310             : 
     311           0 : void free_rt_sched_group(struct task_group *tg) { }
     312             : 
     313           0 : int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
     314             : {
     315           0 :         return 1;
     316             : }
     317             : #endif /* CONFIG_RT_GROUP_SCHED */
     318             : 
     319             : #ifdef CONFIG_SMP
     320             : 
     321             : static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
     322             : {
     323             :         /* Try to pull RT tasks here if we lower this rq's prio */
     324             :         return rq->online && rq->rt.highest_prio.curr > prev->prio;
     325             : }
     326             : 
     327             : static inline int rt_overloaded(struct rq *rq)
     328             : {
     329             :         return atomic_read(&rq->rd->rto_count);
     330             : }
     331             : 
     332             : static inline void rt_set_overload(struct rq *rq)
     333             : {
     334             :         if (!rq->online)
     335             :                 return;
     336             : 
     337             :         cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
     338             :         /*
     339             :          * Make sure the mask is visible before we set
     340             :          * the overload count. That is checked to determine
     341             :          * if we should look at the mask. It would be a shame
     342             :          * if we looked at the mask, but the mask was not
     343             :          * updated yet.
     344             :          *
     345             :          * Matched by the barrier in pull_rt_task().
     346             :          */
     347             :         smp_wmb();
     348             :         atomic_inc(&rq->rd->rto_count);
     349             : }
     350             : 
     351             : static inline void rt_clear_overload(struct rq *rq)
     352             : {
     353             :         if (!rq->online)
     354             :                 return;
     355             : 
     356             :         /* the order here really doesn't matter */
     357             :         atomic_dec(&rq->rd->rto_count);
     358             :         cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
     359             : }
     360             : 
     361             : static void update_rt_migration(struct rt_rq *rt_rq)
     362             : {
     363             :         if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
     364             :                 if (!rt_rq->overloaded) {
     365             :                         rt_set_overload(rq_of_rt_rq(rt_rq));
     366             :                         rt_rq->overloaded = 1;
     367             :                 }
     368             :         } else if (rt_rq->overloaded) {
     369             :                 rt_clear_overload(rq_of_rt_rq(rt_rq));
     370             :                 rt_rq->overloaded = 0;
     371             :         }
     372             : }
     373             : 
     374             : static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     375             : {
     376             :         struct task_struct *p;
     377             : 
     378             :         if (!rt_entity_is_task(rt_se))
     379             :                 return;
     380             : 
     381             :         p = rt_task_of(rt_se);
     382             :         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
     383             : 
     384             :         rt_rq->rt_nr_total++;
     385             :         if (p->nr_cpus_allowed > 1)
     386             :                 rt_rq->rt_nr_migratory++;
     387             : 
     388             :         update_rt_migration(rt_rq);
     389             : }
     390             : 
     391             : static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     392             : {
     393             :         struct task_struct *p;
     394             : 
     395             :         if (!rt_entity_is_task(rt_se))
     396             :                 return;
     397             : 
     398             :         p = rt_task_of(rt_se);
     399             :         rt_rq = &rq_of_rt_rq(rt_rq)->rt;
     400             : 
     401             :         rt_rq->rt_nr_total--;
     402             :         if (p->nr_cpus_allowed > 1)
     403             :                 rt_rq->rt_nr_migratory--;
     404             : 
     405             :         update_rt_migration(rt_rq);
     406             : }
     407             : 
     408             : static inline int has_pushable_tasks(struct rq *rq)
     409             : {
     410             :         return !plist_head_empty(&rq->rt.pushable_tasks);
     411             : }
     412             : 
     413             : static DEFINE_PER_CPU(struct balance_callback, rt_push_head);
     414             : static DEFINE_PER_CPU(struct balance_callback, rt_pull_head);
     415             : 
     416             : static void push_rt_tasks(struct rq *);
     417             : static void pull_rt_task(struct rq *);
     418             : 
     419             : static inline void rt_queue_push_tasks(struct rq *rq)
     420             : {
     421             :         if (!has_pushable_tasks(rq))
     422             :                 return;
     423             : 
     424             :         queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
     425             : }
     426             : 
     427             : static inline void rt_queue_pull_task(struct rq *rq)
     428             : {
     429             :         queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
     430             : }
     431             : 
     432             : static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
     433             : {
     434             :         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
     435             :         plist_node_init(&p->pushable_tasks, p->prio);
     436             :         plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
     437             : 
     438             :         /* Update the highest prio pushable task */
     439             :         if (p->prio < rq->rt.highest_prio.next)
     440             :                 rq->rt.highest_prio.next = p->prio;
     441             : }
     442             : 
     443             : static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
     444             : {
     445             :         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
     446             : 
     447             :         /* Update the new highest prio pushable task */
     448             :         if (has_pushable_tasks(rq)) {
     449             :                 p = plist_first_entry(&rq->rt.pushable_tasks,
     450             :                                       struct task_struct, pushable_tasks);
     451             :                 rq->rt.highest_prio.next = p->prio;
     452             :         } else {
     453             :                 rq->rt.highest_prio.next = MAX_RT_PRIO-1;
     454             :         }
     455             : }
     456             : 
     457             : #else
     458             : 
     459             : static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
     460             : {
     461             : }
     462             : 
     463             : static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
     464             : {
     465             : }
     466             : 
     467             : static inline
     468             : void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     469             : {
     470             : }
     471             : 
     472             : static inline
     473             : void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
     474             : {
     475             : }
     476             : 
     477             : static inline void rt_queue_push_tasks(struct rq *rq)
     478             : {
     479             : }
     480             : #endif /* CONFIG_SMP */
     481             : 
     482             : static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
     483             : static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
     484             : 
     485             : static inline int on_rt_rq(struct sched_rt_entity *rt_se)
     486             : {
     487           0 :         return rt_se->on_rq;
     488             : }
     489             : 
     490             : #ifdef CONFIG_UCLAMP_TASK
     491             : /*
     492             :  * Verify the fitness of task @p to run on @cpu taking into account the uclamp
     493             :  * settings.
     494             :  *
     495             :  * This check is only important for heterogeneous systems where uclamp_min value
     496             :  * is higher than the capacity of a @cpu. For non-heterogeneous system this
     497             :  * function will always return true.
     498             :  *
     499             :  * The function will return true if the capacity of the @cpu is >= the
     500             :  * uclamp_min and false otherwise.
     501             :  *
     502             :  * Note that uclamp_min will be clamped to uclamp_max if uclamp_min
     503             :  * > uclamp_max.
     504             :  */
     505             : static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
     506             : {
     507             :         unsigned int min_cap;
     508             :         unsigned int max_cap;
     509             :         unsigned int cpu_cap;
     510             : 
     511             :         /* Only heterogeneous systems can benefit from this check */
     512             :         if (!sched_asym_cpucap_active())
     513             :                 return true;
     514             : 
     515             :         min_cap = uclamp_eff_value(p, UCLAMP_MIN);
     516             :         max_cap = uclamp_eff_value(p, UCLAMP_MAX);
     517             : 
     518             :         cpu_cap = capacity_orig_of(cpu);
     519             : 
     520             :         return cpu_cap >= min(min_cap, max_cap);
     521             : }
     522             : #else
     523             : static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu)
     524             : {
     525             :         return true;
     526             : }
     527             : #endif
     528             : 
     529             : #ifdef CONFIG_RT_GROUP_SCHED
     530             : 
     531             : static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
     532             : {
     533             :         if (!rt_rq->tg)
     534             :                 return RUNTIME_INF;
     535             : 
     536             :         return rt_rq->rt_runtime;
     537             : }
     538             : 
     539             : static inline u64 sched_rt_period(struct rt_rq *rt_rq)
     540             : {
     541             :         return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
     542             : }
     543             : 
     544             : typedef struct task_group *rt_rq_iter_t;
     545             : 
     546             : static inline struct task_group *next_task_group(struct task_group *tg)
     547             : {
     548             :         do {
     549             :                 tg = list_entry_rcu(tg->list.next,
     550             :                         typeof(struct task_group), list);
     551             :         } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
     552             : 
     553             :         if (&tg->list == &task_groups)
     554             :                 tg = NULL;
     555             : 
     556             :         return tg;
     557             : }
     558             : 
     559             : #define for_each_rt_rq(rt_rq, iter, rq)                                 \
     560             :         for (iter = container_of(&task_groups, typeof(*iter), list);        \
     561             :                 (iter = next_task_group(iter)) &&                       \
     562             :                 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
     563             : 
     564             : #define for_each_sched_rt_entity(rt_se) \
     565             :         for (; rt_se; rt_se = rt_se->parent)
     566             : 
     567             : static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
     568             : {
     569             :         return rt_se->my_q;
     570             : }
     571             : 
     572             : static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
     573             : static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
     574             : 
     575             : static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
     576             : {
     577             :         struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
     578             :         struct rq *rq = rq_of_rt_rq(rt_rq);
     579             :         struct sched_rt_entity *rt_se;
     580             : 
     581             :         int cpu = cpu_of(rq);
     582             : 
     583             :         rt_se = rt_rq->tg->rt_se[cpu];
     584             : 
     585             :         if (rt_rq->rt_nr_running) {
     586             :                 if (!rt_se)
     587             :                         enqueue_top_rt_rq(rt_rq);
     588             :                 else if (!on_rt_rq(rt_se))
     589             :                         enqueue_rt_entity(rt_se, 0);
     590             : 
     591             :                 if (rt_rq->highest_prio.curr < curr->prio)
     592             :                         resched_curr(rq);
     593             :         }
     594             : }
     595             : 
     596             : static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
     597             : {
     598             :         struct sched_rt_entity *rt_se;
     599             :         int cpu = cpu_of(rq_of_rt_rq(rt_rq));
     600             : 
     601             :         rt_se = rt_rq->tg->rt_se[cpu];
     602             : 
     603             :         if (!rt_se) {
     604             :                 dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
     605             :                 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
     606             :                 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
     607             :         }
     608             :         else if (on_rt_rq(rt_se))
     609             :                 dequeue_rt_entity(rt_se, 0);
     610             : }
     611             : 
     612             : static inline int rt_rq_throttled(struct rt_rq *rt_rq)
     613             : {
     614             :         return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
     615             : }
     616             : 
     617             : static int rt_se_boosted(struct sched_rt_entity *rt_se)
     618             : {
     619             :         struct rt_rq *rt_rq = group_rt_rq(rt_se);
     620             :         struct task_struct *p;
     621             : 
     622             :         if (rt_rq)
     623             :                 return !!rt_rq->rt_nr_boosted;
     624             : 
     625             :         p = rt_task_of(rt_se);
     626             :         return p->prio != p->normal_prio;
     627             : }
     628             : 
     629             : #ifdef CONFIG_SMP
     630             : static inline const struct cpumask *sched_rt_period_mask(void)
     631             : {
     632             :         return this_rq()->rd->span;
     633             : }
     634             : #else
     635             : static inline const struct cpumask *sched_rt_period_mask(void)
     636             : {
     637             :         return cpu_online_mask;
     638             : }
     639             : #endif
     640             : 
     641             : static inline
     642             : struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
     643             : {
     644             :         return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
     645             : }
     646             : 
     647             : static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
     648             : {
     649             :         return &rt_rq->tg->rt_bandwidth;
     650             : }
     651             : 
     652             : #else /* !CONFIG_RT_GROUP_SCHED */
     653             : 
     654             : static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
     655             : {
     656             :         return rt_rq->rt_runtime;
     657             : }
     658             : 
     659             : static inline u64 sched_rt_period(struct rt_rq *rt_rq)
     660             : {
     661           0 :         return ktime_to_ns(def_rt_bandwidth.rt_period);
     662             : }
     663             : 
     664             : typedef struct rt_rq *rt_rq_iter_t;
     665             : 
     666             : #define for_each_rt_rq(rt_rq, iter, rq) \
     667             :         for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
     668             : 
     669             : #define for_each_sched_rt_entity(rt_se) \
     670             :         for (; rt_se; rt_se = NULL)
     671             : 
     672             : static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
     673             : {
     674             :         return NULL;
     675             : }
     676             : 
     677           0 : static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
     678             : {
     679           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
     680             : 
     681           0 :         if (!rt_rq->rt_nr_running)
     682             :                 return;
     683             : 
     684           0 :         enqueue_top_rt_rq(rt_rq);
     685           0 :         resched_curr(rq);
     686             : }
     687             : 
     688             : static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
     689             : {
     690           0 :         dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
     691             : }
     692             : 
     693             : static inline int rt_rq_throttled(struct rt_rq *rt_rq)
     694             : {
     695             :         return rt_rq->rt_throttled;
     696             : }
     697             : 
     698             : static inline const struct cpumask *sched_rt_period_mask(void)
     699             : {
     700             :         return cpu_online_mask;
     701             : }
     702             : 
     703             : static inline
     704             : struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
     705             : {
     706           0 :         return &cpu_rq(cpu)->rt;
     707             : }
     708             : 
     709             : static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
     710             : {
     711             :         return &def_rt_bandwidth;
     712             : }
     713             : 
     714             : #endif /* CONFIG_RT_GROUP_SCHED */
     715             : 
     716           0 : bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
     717             : {
     718           0 :         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     719             : 
     720           0 :         return (hrtimer_active(&rt_b->rt_period_timer) ||
     721           0 :                 rt_rq->rt_time < rt_b->rt_runtime);
     722             : }
     723             : 
     724             : #ifdef CONFIG_SMP
     725             : /*
     726             :  * We ran out of runtime, see if we can borrow some from our neighbours.
     727             :  */
     728             : static void do_balance_runtime(struct rt_rq *rt_rq)
     729             : {
     730             :         struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     731             :         struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
     732             :         int i, weight;
     733             :         u64 rt_period;
     734             : 
     735             :         weight = cpumask_weight(rd->span);
     736             : 
     737             :         raw_spin_lock(&rt_b->rt_runtime_lock);
     738             :         rt_period = ktime_to_ns(rt_b->rt_period);
     739             :         for_each_cpu(i, rd->span) {
     740             :                 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
     741             :                 s64 diff;
     742             : 
     743             :                 if (iter == rt_rq)
     744             :                         continue;
     745             : 
     746             :                 raw_spin_lock(&iter->rt_runtime_lock);
     747             :                 /*
     748             :                  * Either all rqs have inf runtime and there's nothing to steal
     749             :                  * or __disable_runtime() below sets a specific rq to inf to
     750             :                  * indicate its been disabled and disallow stealing.
     751             :                  */
     752             :                 if (iter->rt_runtime == RUNTIME_INF)
     753             :                         goto next;
     754             : 
     755             :                 /*
     756             :                  * From runqueues with spare time, take 1/n part of their
     757             :                  * spare time, but no more than our period.
     758             :                  */
     759             :                 diff = iter->rt_runtime - iter->rt_time;
     760             :                 if (diff > 0) {
     761             :                         diff = div_u64((u64)diff, weight);
     762             :                         if (rt_rq->rt_runtime + diff > rt_period)
     763             :                                 diff = rt_period - rt_rq->rt_runtime;
     764             :                         iter->rt_runtime -= diff;
     765             :                         rt_rq->rt_runtime += diff;
     766             :                         if (rt_rq->rt_runtime == rt_period) {
     767             :                                 raw_spin_unlock(&iter->rt_runtime_lock);
     768             :                                 break;
     769             :                         }
     770             :                 }
     771             : next:
     772             :                 raw_spin_unlock(&iter->rt_runtime_lock);
     773             :         }
     774             :         raw_spin_unlock(&rt_b->rt_runtime_lock);
     775             : }
     776             : 
     777             : /*
     778             :  * Ensure this RQ takes back all the runtime it lend to its neighbours.
     779             :  */
     780             : static void __disable_runtime(struct rq *rq)
     781             : {
     782             :         struct root_domain *rd = rq->rd;
     783             :         rt_rq_iter_t iter;
     784             :         struct rt_rq *rt_rq;
     785             : 
     786             :         if (unlikely(!scheduler_running))
     787             :                 return;
     788             : 
     789             :         for_each_rt_rq(rt_rq, iter, rq) {
     790             :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     791             :                 s64 want;
     792             :                 int i;
     793             : 
     794             :                 raw_spin_lock(&rt_b->rt_runtime_lock);
     795             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     796             :                 /*
     797             :                  * Either we're all inf and nobody needs to borrow, or we're
     798             :                  * already disabled and thus have nothing to do, or we have
     799             :                  * exactly the right amount of runtime to take out.
     800             :                  */
     801             :                 if (rt_rq->rt_runtime == RUNTIME_INF ||
     802             :                                 rt_rq->rt_runtime == rt_b->rt_runtime)
     803             :                         goto balanced;
     804             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     805             : 
     806             :                 /*
     807             :                  * Calculate the difference between what we started out with
     808             :                  * and what we current have, that's the amount of runtime
     809             :                  * we lend and now have to reclaim.
     810             :                  */
     811             :                 want = rt_b->rt_runtime - rt_rq->rt_runtime;
     812             : 
     813             :                 /*
     814             :                  * Greedy reclaim, take back as much as we can.
     815             :                  */
     816             :                 for_each_cpu(i, rd->span) {
     817             :                         struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
     818             :                         s64 diff;
     819             : 
     820             :                         /*
     821             :                          * Can't reclaim from ourselves or disabled runqueues.
     822             :                          */
     823             :                         if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
     824             :                                 continue;
     825             : 
     826             :                         raw_spin_lock(&iter->rt_runtime_lock);
     827             :                         if (want > 0) {
     828             :                                 diff = min_t(s64, iter->rt_runtime, want);
     829             :                                 iter->rt_runtime -= diff;
     830             :                                 want -= diff;
     831             :                         } else {
     832             :                                 iter->rt_runtime -= want;
     833             :                                 want -= want;
     834             :                         }
     835             :                         raw_spin_unlock(&iter->rt_runtime_lock);
     836             : 
     837             :                         if (!want)
     838             :                                 break;
     839             :                 }
     840             : 
     841             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     842             :                 /*
     843             :                  * We cannot be left wanting - that would mean some runtime
     844             :                  * leaked out of the system.
     845             :                  */
     846             :                 WARN_ON_ONCE(want);
     847             : balanced:
     848             :                 /*
     849             :                  * Disable all the borrow logic by pretending we have inf
     850             :                  * runtime - in which case borrowing doesn't make sense.
     851             :                  */
     852             :                 rt_rq->rt_runtime = RUNTIME_INF;
     853             :                 rt_rq->rt_throttled = 0;
     854             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     855             :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
     856             : 
     857             :                 /* Make rt_rq available for pick_next_task() */
     858             :                 sched_rt_rq_enqueue(rt_rq);
     859             :         }
     860             : }
     861             : 
     862             : static void __enable_runtime(struct rq *rq)
     863             : {
     864             :         rt_rq_iter_t iter;
     865             :         struct rt_rq *rt_rq;
     866             : 
     867             :         if (unlikely(!scheduler_running))
     868             :                 return;
     869             : 
     870             :         /*
     871             :          * Reset each runqueue's bandwidth settings
     872             :          */
     873             :         for_each_rt_rq(rt_rq, iter, rq) {
     874             :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
     875             : 
     876             :                 raw_spin_lock(&rt_b->rt_runtime_lock);
     877             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     878             :                 rt_rq->rt_runtime = rt_b->rt_runtime;
     879             :                 rt_rq->rt_time = 0;
     880             :                 rt_rq->rt_throttled = 0;
     881             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     882             :                 raw_spin_unlock(&rt_b->rt_runtime_lock);
     883             :         }
     884             : }
     885             : 
     886             : static void balance_runtime(struct rt_rq *rt_rq)
     887             : {
     888             :         if (!sched_feat(RT_RUNTIME_SHARE))
     889             :                 return;
     890             : 
     891             :         if (rt_rq->rt_time > rt_rq->rt_runtime) {
     892             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     893             :                 do_balance_runtime(rt_rq);
     894             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     895             :         }
     896             : }
     897             : #else /* !CONFIG_SMP */
     898             : static inline void balance_runtime(struct rt_rq *rt_rq) {}
     899             : #endif /* CONFIG_SMP */
     900             : 
     901           0 : static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
     902             : {
     903           0 :         int i, idle = 1, throttled = 0;
     904             :         const struct cpumask *span;
     905             : 
     906           0 :         span = sched_rt_period_mask();
     907             : #ifdef CONFIG_RT_GROUP_SCHED
     908             :         /*
     909             :          * FIXME: isolated CPUs should really leave the root task group,
     910             :          * whether they are isolcpus or were isolated via cpusets, lest
     911             :          * the timer run on a CPU which does not service all runqueues,
     912             :          * potentially leaving other CPUs indefinitely throttled.  If
     913             :          * isolation is really required, the user will turn the throttle
     914             :          * off to kill the perturbations it causes anyway.  Meanwhile,
     915             :          * this maintains functionality for boot and/or troubleshooting.
     916             :          */
     917             :         if (rt_b == &root_task_group.rt_bandwidth)
     918             :                 span = cpu_online_mask;
     919             : #endif
     920           0 :         for_each_cpu(i, span) {
     921           0 :                 int enqueue = 0;
     922           0 :                 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
     923           0 :                 struct rq *rq = rq_of_rt_rq(rt_rq);
     924             :                 struct rq_flags rf;
     925             :                 int skip;
     926             : 
     927             :                 /*
     928             :                  * When span == cpu_online_mask, taking each rq->lock
     929             :                  * can be time-consuming. Try to avoid it when possible.
     930             :                  */
     931           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
     932           0 :                 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
     933           0 :                         rt_rq->rt_runtime = rt_b->rt_runtime;
     934           0 :                 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
     935           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
     936           0 :                 if (skip)
     937           0 :                         continue;
     938             : 
     939           0 :                 rq_lock(rq, &rf);
     940           0 :                 update_rq_clock(rq);
     941             : 
     942           0 :                 if (rt_rq->rt_time) {
     943             :                         u64 runtime;
     944             : 
     945           0 :                         raw_spin_lock(&rt_rq->rt_runtime_lock);
     946           0 :                         if (rt_rq->rt_throttled)
     947             :                                 balance_runtime(rt_rq);
     948           0 :                         runtime = rt_rq->rt_runtime;
     949           0 :                         rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
     950           0 :                         if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
     951           0 :                                 rt_rq->rt_throttled = 0;
     952           0 :                                 enqueue = 1;
     953             : 
     954             :                                 /*
     955             :                                  * When we're idle and a woken (rt) task is
     956             :                                  * throttled check_preempt_curr() will set
     957             :                                  * skip_update and the time between the wakeup
     958             :                                  * and this unthrottle will get accounted as
     959             :                                  * 'runtime'.
     960             :                                  */
     961           0 :                                 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
     962             :                                         rq_clock_cancel_skipupdate(rq);
     963             :                         }
     964           0 :                         if (rt_rq->rt_time || rt_rq->rt_nr_running)
     965           0 :                                 idle = 0;
     966           0 :                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
     967           0 :                 } else if (rt_rq->rt_nr_running) {
     968           0 :                         idle = 0;
     969           0 :                         if (!rt_rq_throttled(rt_rq))
     970           0 :                                 enqueue = 1;
     971             :                 }
     972           0 :                 if (rt_rq->rt_throttled)
     973           0 :                         throttled = 1;
     974             : 
     975           0 :                 if (enqueue)
     976           0 :                         sched_rt_rq_enqueue(rt_rq);
     977           0 :                 rq_unlock(rq, &rf);
     978             :         }
     979             : 
     980           0 :         if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
     981             :                 return 1;
     982             : 
     983             :         return idle;
     984             : }
     985             : 
     986             : static inline int rt_se_prio(struct sched_rt_entity *rt_se)
     987             : {
     988             : #ifdef CONFIG_RT_GROUP_SCHED
     989             :         struct rt_rq *rt_rq = group_rt_rq(rt_se);
     990             : 
     991             :         if (rt_rq)
     992             :                 return rt_rq->highest_prio.curr;
     993             : #endif
     994             : 
     995           0 :         return rt_task_of(rt_se)->prio;
     996             : }
     997             : 
     998           0 : static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
     999             : {
    1000           0 :         u64 runtime = sched_rt_runtime(rt_rq);
    1001             : 
    1002           0 :         if (rt_rq->rt_throttled)
    1003             :                 return rt_rq_throttled(rt_rq);
    1004             : 
    1005           0 :         if (runtime >= sched_rt_period(rt_rq))
    1006             :                 return 0;
    1007             : 
    1008           0 :         balance_runtime(rt_rq);
    1009           0 :         runtime = sched_rt_runtime(rt_rq);
    1010           0 :         if (runtime == RUNTIME_INF)
    1011             :                 return 0;
    1012             : 
    1013           0 :         if (rt_rq->rt_time > runtime) {
    1014           0 :                 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
    1015             : 
    1016             :                 /*
    1017             :                  * Don't actually throttle groups that have no runtime assigned
    1018             :                  * but accrue some time due to boosting.
    1019             :                  */
    1020           0 :                 if (likely(rt_b->rt_runtime)) {
    1021           0 :                         rt_rq->rt_throttled = 1;
    1022           0 :                         printk_deferred_once("sched: RT throttling activated\n");
    1023             :                 } else {
    1024             :                         /*
    1025             :                          * In case we did anyway, make it go away,
    1026             :                          * replenishment is a joke, since it will replenish us
    1027             :                          * with exactly 0 ns.
    1028             :                          */
    1029           0 :                         rt_rq->rt_time = 0;
    1030             :                 }
    1031             : 
    1032           0 :                 if (rt_rq_throttled(rt_rq)) {
    1033           0 :                         sched_rt_rq_dequeue(rt_rq);
    1034           0 :                         return 1;
    1035             :                 }
    1036             :         }
    1037             : 
    1038             :         return 0;
    1039             : }
    1040             : 
    1041             : /*
    1042             :  * Update the current task's runtime statistics. Skip current tasks that
    1043             :  * are not in our scheduling class.
    1044             :  */
    1045           0 : static void update_curr_rt(struct rq *rq)
    1046             : {
    1047           0 :         struct task_struct *curr = rq->curr;
    1048           0 :         struct sched_rt_entity *rt_se = &curr->rt;
    1049             :         u64 delta_exec;
    1050             :         u64 now;
    1051             : 
    1052           0 :         if (curr->sched_class != &rt_sched_class)
    1053             :                 return;
    1054             : 
    1055           0 :         now = rq_clock_task(rq);
    1056           0 :         delta_exec = now - curr->se.exec_start;
    1057           0 :         if (unlikely((s64)delta_exec <= 0))
    1058             :                 return;
    1059             : 
    1060             :         schedstat_set(curr->stats.exec_max,
    1061             :                       max(curr->stats.exec_max, delta_exec));
    1062             : 
    1063           0 :         trace_sched_stat_runtime(curr, delta_exec, 0);
    1064             : 
    1065           0 :         update_current_exec_runtime(curr, now, delta_exec);
    1066             : 
    1067           0 :         if (!rt_bandwidth_enabled())
    1068             :                 return;
    1069             : 
    1070           0 :         for_each_sched_rt_entity(rt_se) {
    1071           0 :                 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1072             :                 int exceeded;
    1073             : 
    1074           0 :                 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
    1075           0 :                         raw_spin_lock(&rt_rq->rt_runtime_lock);
    1076           0 :                         rt_rq->rt_time += delta_exec;
    1077           0 :                         exceeded = sched_rt_runtime_exceeded(rt_rq);
    1078           0 :                         if (exceeded)
    1079           0 :                                 resched_curr(rq);
    1080           0 :                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
    1081           0 :                         if (exceeded)
    1082           0 :                                 do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
    1083             :                 }
    1084             :         }
    1085             : }
    1086             : 
    1087             : static void
    1088           0 : dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
    1089             : {
    1090           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1091             : 
    1092           0 :         BUG_ON(&rq->rt != rt_rq);
    1093             : 
    1094           0 :         if (!rt_rq->rt_queued)
    1095             :                 return;
    1096             : 
    1097           0 :         BUG_ON(!rq->nr_running);
    1098             : 
    1099           0 :         sub_nr_running(rq, count);
    1100           0 :         rt_rq->rt_queued = 0;
    1101             : 
    1102             : }
    1103             : 
    1104             : static void
    1105           0 : enqueue_top_rt_rq(struct rt_rq *rt_rq)
    1106             : {
    1107           0 :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1108             : 
    1109           0 :         BUG_ON(&rq->rt != rt_rq);
    1110             : 
    1111           0 :         if (rt_rq->rt_queued)
    1112             :                 return;
    1113             : 
    1114           0 :         if (rt_rq_throttled(rt_rq))
    1115             :                 return;
    1116             : 
    1117           0 :         if (rt_rq->rt_nr_running) {
    1118           0 :                 add_nr_running(rq, rt_rq->rt_nr_running);
    1119           0 :                 rt_rq->rt_queued = 1;
    1120             :         }
    1121             : 
    1122             :         /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
    1123             :         cpufreq_update_util(rq, 0);
    1124             : }
    1125             : 
    1126             : #if defined CONFIG_SMP
    1127             : 
    1128             : static void
    1129             : inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
    1130             : {
    1131             :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1132             : 
    1133             : #ifdef CONFIG_RT_GROUP_SCHED
    1134             :         /*
    1135             :          * Change rq's cpupri only if rt_rq is the top queue.
    1136             :          */
    1137             :         if (&rq->rt != rt_rq)
    1138             :                 return;
    1139             : #endif
    1140             :         if (rq->online && prio < prev_prio)
    1141             :                 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
    1142             : }
    1143             : 
    1144             : static void
    1145             : dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
    1146             : {
    1147             :         struct rq *rq = rq_of_rt_rq(rt_rq);
    1148             : 
    1149             : #ifdef CONFIG_RT_GROUP_SCHED
    1150             :         /*
    1151             :          * Change rq's cpupri only if rt_rq is the top queue.
    1152             :          */
    1153             :         if (&rq->rt != rt_rq)
    1154             :                 return;
    1155             : #endif
    1156             :         if (rq->online && rt_rq->highest_prio.curr != prev_prio)
    1157             :                 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
    1158             : }
    1159             : 
    1160             : #else /* CONFIG_SMP */
    1161             : 
    1162             : static inline
    1163             : void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
    1164             : static inline
    1165             : void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
    1166             : 
    1167             : #endif /* CONFIG_SMP */
    1168             : 
    1169             : #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
    1170             : static void
    1171             : inc_rt_prio(struct rt_rq *rt_rq, int prio)
    1172             : {
    1173             :         int prev_prio = rt_rq->highest_prio.curr;
    1174             : 
    1175             :         if (prio < prev_prio)
    1176             :                 rt_rq->highest_prio.curr = prio;
    1177             : 
    1178             :         inc_rt_prio_smp(rt_rq, prio, prev_prio);
    1179             : }
    1180             : 
    1181             : static void
    1182             : dec_rt_prio(struct rt_rq *rt_rq, int prio)
    1183             : {
    1184             :         int prev_prio = rt_rq->highest_prio.curr;
    1185             : 
    1186             :         if (rt_rq->rt_nr_running) {
    1187             : 
    1188             :                 WARN_ON(prio < prev_prio);
    1189             : 
    1190             :                 /*
    1191             :                  * This may have been our highest task, and therefore
    1192             :                  * we may have some recomputation to do
    1193             :                  */
    1194             :                 if (prio == prev_prio) {
    1195             :                         struct rt_prio_array *array = &rt_rq->active;
    1196             : 
    1197             :                         rt_rq->highest_prio.curr =
    1198             :                                 sched_find_first_bit(array->bitmap);
    1199             :                 }
    1200             : 
    1201             :         } else {
    1202             :                 rt_rq->highest_prio.curr = MAX_RT_PRIO-1;
    1203             :         }
    1204             : 
    1205             :         dec_rt_prio_smp(rt_rq, prio, prev_prio);
    1206             : }
    1207             : 
    1208             : #else
    1209             : 
    1210             : static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
    1211             : static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
    1212             : 
    1213             : #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
    1214             : 
    1215             : #ifdef CONFIG_RT_GROUP_SCHED
    1216             : 
    1217             : static void
    1218             : inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1219             : {
    1220             :         if (rt_se_boosted(rt_se))
    1221             :                 rt_rq->rt_nr_boosted++;
    1222             : 
    1223             :         if (rt_rq->tg)
    1224             :                 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
    1225             : }
    1226             : 
    1227             : static void
    1228             : dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1229             : {
    1230             :         if (rt_se_boosted(rt_se))
    1231             :                 rt_rq->rt_nr_boosted--;
    1232             : 
    1233             :         WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
    1234             : }
    1235             : 
    1236             : #else /* CONFIG_RT_GROUP_SCHED */
    1237             : 
    1238             : static void
    1239             : inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1240             : {
    1241           0 :         start_rt_bandwidth(&def_rt_bandwidth);
    1242             : }
    1243             : 
    1244             : static inline
    1245             : void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
    1246             : 
    1247             : #endif /* CONFIG_RT_GROUP_SCHED */
    1248             : 
    1249             : static inline
    1250             : unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
    1251             : {
    1252           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1253             : 
    1254             :         if (group_rq)
    1255             :                 return group_rq->rt_nr_running;
    1256             :         else
    1257             :                 return 1;
    1258             : }
    1259             : 
    1260             : static inline
    1261             : unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
    1262             : {
    1263           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1264             :         struct task_struct *tsk;
    1265             : 
    1266             :         if (group_rq)
    1267             :                 return group_rq->rr_nr_running;
    1268             : 
    1269           0 :         tsk = rt_task_of(rt_se);
    1270             : 
    1271           0 :         return (tsk->policy == SCHED_RR) ? 1 : 0;
    1272             : }
    1273             : 
    1274             : static inline
    1275           0 : void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1276             : {
    1277           0 :         int prio = rt_se_prio(rt_se);
    1278             : 
    1279           0 :         WARN_ON(!rt_prio(prio));
    1280           0 :         rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
    1281           0 :         rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
    1282             : 
    1283           0 :         inc_rt_prio(rt_rq, prio);
    1284           0 :         inc_rt_migration(rt_se, rt_rq);
    1285           0 :         inc_rt_group(rt_se, rt_rq);
    1286           0 : }
    1287             : 
    1288             : static inline
    1289           0 : void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
    1290             : {
    1291           0 :         WARN_ON(!rt_prio(rt_se_prio(rt_se)));
    1292           0 :         WARN_ON(!rt_rq->rt_nr_running);
    1293           0 :         rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
    1294           0 :         rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
    1295             : 
    1296           0 :         dec_rt_prio(rt_rq, rt_se_prio(rt_se));
    1297           0 :         dec_rt_migration(rt_se, rt_rq);
    1298           0 :         dec_rt_group(rt_se, rt_rq);
    1299           0 : }
    1300             : 
    1301             : /*
    1302             :  * Change rt_se->run_list location unless SAVE && !MOVE
    1303             :  *
    1304             :  * assumes ENQUEUE/DEQUEUE flags match
    1305             :  */
    1306             : static inline bool move_entity(unsigned int flags)
    1307             : {
    1308           0 :         if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
    1309             :                 return false;
    1310             : 
    1311             :         return true;
    1312             : }
    1313             : 
    1314             : static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
    1315             : {
    1316           0 :         list_del_init(&rt_se->run_list);
    1317             : 
    1318           0 :         if (list_empty(array->queue + rt_se_prio(rt_se)))
    1319           0 :                 __clear_bit(rt_se_prio(rt_se), array->bitmap);
    1320             : 
    1321           0 :         rt_se->on_list = 0;
    1322             : }
    1323             : 
    1324             : static inline struct sched_statistics *
    1325             : __schedstats_from_rt_se(struct sched_rt_entity *rt_se)
    1326             : {
    1327             : #ifdef CONFIG_RT_GROUP_SCHED
    1328             :         /* schedstats is not supported for rt group. */
    1329             :         if (!rt_entity_is_task(rt_se))
    1330             :                 return NULL;
    1331             : #endif
    1332             : 
    1333             :         return &rt_task_of(rt_se)->stats;
    1334             : }
    1335             : 
    1336             : static inline void
    1337             : update_stats_wait_start_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
    1338             : {
    1339             :         struct sched_statistics *stats;
    1340           0 :         struct task_struct *p = NULL;
    1341             : 
    1342             :         if (!schedstat_enabled())
    1343             :                 return;
    1344             : 
    1345             :         if (rt_entity_is_task(rt_se))
    1346             :                 p = rt_task_of(rt_se);
    1347             : 
    1348             :         stats = __schedstats_from_rt_se(rt_se);
    1349             :         if (!stats)
    1350             :                 return;
    1351             : 
    1352             :         __update_stats_wait_start(rq_of_rt_rq(rt_rq), p, stats);
    1353             : }
    1354             : 
    1355             : static inline void
    1356             : update_stats_enqueue_sleeper_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
    1357             : {
    1358             :         struct sched_statistics *stats;
    1359             :         struct task_struct *p = NULL;
    1360             : 
    1361             :         if (!schedstat_enabled())
    1362             :                 return;
    1363             : 
    1364             :         if (rt_entity_is_task(rt_se))
    1365             :                 p = rt_task_of(rt_se);
    1366             : 
    1367             :         stats = __schedstats_from_rt_se(rt_se);
    1368             :         if (!stats)
    1369             :                 return;
    1370             : 
    1371             :         __update_stats_enqueue_sleeper(rq_of_rt_rq(rt_rq), p, stats);
    1372             : }
    1373             : 
    1374             : static inline void
    1375             : update_stats_enqueue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
    1376             :                         int flags)
    1377             : {
    1378             :         if (!schedstat_enabled())
    1379             :                 return;
    1380             : 
    1381             :         if (flags & ENQUEUE_WAKEUP)
    1382             :                 update_stats_enqueue_sleeper_rt(rt_rq, rt_se);
    1383             : }
    1384             : 
    1385             : static inline void
    1386             : update_stats_wait_end_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
    1387             : {
    1388             :         struct sched_statistics *stats;
    1389             :         struct task_struct *p = NULL;
    1390             : 
    1391             :         if (!schedstat_enabled())
    1392             :                 return;
    1393             : 
    1394             :         if (rt_entity_is_task(rt_se))
    1395             :                 p = rt_task_of(rt_se);
    1396             : 
    1397             :         stats = __schedstats_from_rt_se(rt_se);
    1398             :         if (!stats)
    1399             :                 return;
    1400             : 
    1401             :         __update_stats_wait_end(rq_of_rt_rq(rt_rq), p, stats);
    1402             : }
    1403             : 
    1404             : static inline void
    1405             : update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
    1406             :                         int flags)
    1407             : {
    1408           0 :         struct task_struct *p = NULL;
    1409             : 
    1410             :         if (!schedstat_enabled())
    1411             :                 return;
    1412             : 
    1413             :         if (rt_entity_is_task(rt_se))
    1414             :                 p = rt_task_of(rt_se);
    1415             : 
    1416             :         if ((flags & DEQUEUE_SLEEP) && p) {
    1417             :                 unsigned int state;
    1418             : 
    1419             :                 state = READ_ONCE(p->__state);
    1420             :                 if (state & TASK_INTERRUPTIBLE)
    1421             :                         __schedstat_set(p->stats.sleep_start,
    1422             :                                         rq_clock(rq_of_rt_rq(rt_rq)));
    1423             : 
    1424             :                 if (state & TASK_UNINTERRUPTIBLE)
    1425             :                         __schedstat_set(p->stats.block_start,
    1426             :                                         rq_clock(rq_of_rt_rq(rt_rq)));
    1427             :         }
    1428             : }
    1429             : 
    1430           0 : static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1431             : {
    1432           0 :         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1433           0 :         struct rt_prio_array *array = &rt_rq->active;
    1434           0 :         struct rt_rq *group_rq = group_rt_rq(rt_se);
    1435           0 :         struct list_head *queue = array->queue + rt_se_prio(rt_se);
    1436             : 
    1437             :         /*
    1438             :          * Don't enqueue the group if its throttled, or when empty.
    1439             :          * The latter is a consequence of the former when a child group
    1440             :          * get throttled and the current group doesn't have any other
    1441             :          * active members.
    1442             :          */
    1443             :         if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
    1444             :                 if (rt_se->on_list)
    1445             :                         __delist_rt_entity(rt_se, array);
    1446             :                 return;
    1447             :         }
    1448             : 
    1449           0 :         if (move_entity(flags)) {
    1450           0 :                 WARN_ON_ONCE(rt_se->on_list);
    1451           0 :                 if (flags & ENQUEUE_HEAD)
    1452           0 :                         list_add(&rt_se->run_list, queue);
    1453             :                 else
    1454           0 :                         list_add_tail(&rt_se->run_list, queue);
    1455             : 
    1456           0 :                 __set_bit(rt_se_prio(rt_se), array->bitmap);
    1457           0 :                 rt_se->on_list = 1;
    1458             :         }
    1459           0 :         rt_se->on_rq = 1;
    1460             : 
    1461           0 :         inc_rt_tasks(rt_se, rt_rq);
    1462             : }
    1463             : 
    1464           0 : static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1465             : {
    1466           0 :         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
    1467           0 :         struct rt_prio_array *array = &rt_rq->active;
    1468             : 
    1469           0 :         if (move_entity(flags)) {
    1470           0 :                 WARN_ON_ONCE(!rt_se->on_list);
    1471             :                 __delist_rt_entity(rt_se, array);
    1472             :         }
    1473           0 :         rt_se->on_rq = 0;
    1474             : 
    1475           0 :         dec_rt_tasks(rt_se, rt_rq);
    1476           0 : }
    1477             : 
    1478             : /*
    1479             :  * Because the prio of an upper entry depends on the lower
    1480             :  * entries, we must remove entries top - down.
    1481             :  */
    1482           0 : static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
    1483             : {
    1484           0 :         struct sched_rt_entity *back = NULL;
    1485             :         unsigned int rt_nr_running;
    1486             : 
    1487           0 :         for_each_sched_rt_entity(rt_se) {
    1488           0 :                 rt_se->back = back;
    1489           0 :                 back = rt_se;
    1490             :         }
    1491             : 
    1492           0 :         rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
    1493             : 
    1494           0 :         for (rt_se = back; rt_se; rt_se = rt_se->back) {
    1495           0 :                 if (on_rt_rq(rt_se))
    1496           0 :                         __dequeue_rt_entity(rt_se, flags);
    1497             :         }
    1498             : 
    1499           0 :         dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
    1500           0 : }
    1501             : 
    1502           0 : static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1503             : {
    1504           0 :         struct rq *rq = rq_of_rt_se(rt_se);
    1505             : 
    1506           0 :         update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
    1507             : 
    1508           0 :         dequeue_rt_stack(rt_se, flags);
    1509           0 :         for_each_sched_rt_entity(rt_se)
    1510           0 :                 __enqueue_rt_entity(rt_se, flags);
    1511           0 :         enqueue_top_rt_rq(&rq->rt);
    1512           0 : }
    1513             : 
    1514             : static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
    1515             : {
    1516           0 :         struct rq *rq = rq_of_rt_se(rt_se);
    1517             : 
    1518           0 :         update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
    1519             : 
    1520           0 :         dequeue_rt_stack(rt_se, flags);
    1521             : 
    1522           0 :         for_each_sched_rt_entity(rt_se) {
    1523             :                 struct rt_rq *rt_rq = group_rt_rq(rt_se);
    1524             : 
    1525             :                 if (rt_rq && rt_rq->rt_nr_running)
    1526             :                         __enqueue_rt_entity(rt_se, flags);
    1527             :         }
    1528           0 :         enqueue_top_rt_rq(&rq->rt);
    1529             : }
    1530             : 
    1531             : /*
    1532             :  * Adding/removing a task to/from a priority array:
    1533             :  */
    1534             : static void
    1535           0 : enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
    1536             : {
    1537           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1538             : 
    1539           0 :         if (flags & ENQUEUE_WAKEUP)
    1540           0 :                 rt_se->timeout = 0;
    1541             : 
    1542             :         check_schedstat_required();
    1543           0 :         update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se);
    1544             : 
    1545           0 :         enqueue_rt_entity(rt_se, flags);
    1546             : 
    1547           0 :         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
    1548             :                 enqueue_pushable_task(rq, p);
    1549           0 : }
    1550             : 
    1551           0 : static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
    1552             : {
    1553           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1554             : 
    1555           0 :         update_curr_rt(rq);
    1556           0 :         dequeue_rt_entity(rt_se, flags);
    1557             : 
    1558           0 :         dequeue_pushable_task(rq, p);
    1559           0 : }
    1560             : 
    1561             : /*
    1562             :  * Put task to the head or the end of the run list without the overhead of
    1563             :  * dequeue followed by enqueue.
    1564             :  */
    1565             : static void
    1566           0 : requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
    1567             : {
    1568           0 :         if (on_rt_rq(rt_se)) {
    1569           0 :                 struct rt_prio_array *array = &rt_rq->active;
    1570           0 :                 struct list_head *queue = array->queue + rt_se_prio(rt_se);
    1571             : 
    1572           0 :                 if (head)
    1573           0 :                         list_move(&rt_se->run_list, queue);
    1574             :                 else
    1575           0 :                         list_move_tail(&rt_se->run_list, queue);
    1576             :         }
    1577           0 : }
    1578             : 
    1579             : static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
    1580             : {
    1581           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1582             :         struct rt_rq *rt_rq;
    1583             : 
    1584           0 :         for_each_sched_rt_entity(rt_se) {
    1585           0 :                 rt_rq = rt_rq_of_se(rt_se);
    1586           0 :                 requeue_rt_entity(rt_rq, rt_se, head);
    1587             :         }
    1588             : }
    1589             : 
    1590           0 : static void yield_task_rt(struct rq *rq)
    1591             : {
    1592           0 :         requeue_task_rt(rq, rq->curr, 0);
    1593           0 : }
    1594             : 
    1595             : #ifdef CONFIG_SMP
    1596             : static int find_lowest_rq(struct task_struct *task);
    1597             : 
    1598             : static int
    1599             : select_task_rq_rt(struct task_struct *p, int cpu, int flags)
    1600             : {
    1601             :         struct task_struct *curr;
    1602             :         struct rq *rq;
    1603             :         bool test;
    1604             : 
    1605             :         /* For anything but wake ups, just return the task_cpu */
    1606             :         if (!(flags & (WF_TTWU | WF_FORK)))
    1607             :                 goto out;
    1608             : 
    1609             :         rq = cpu_rq(cpu);
    1610             : 
    1611             :         rcu_read_lock();
    1612             :         curr = READ_ONCE(rq->curr); /* unlocked access */
    1613             : 
    1614             :         /*
    1615             :          * If the current task on @p's runqueue is an RT task, then
    1616             :          * try to see if we can wake this RT task up on another
    1617             :          * runqueue. Otherwise simply start this RT task
    1618             :          * on its current runqueue.
    1619             :          *
    1620             :          * We want to avoid overloading runqueues. If the woken
    1621             :          * task is a higher priority, then it will stay on this CPU
    1622             :          * and the lower prio task should be moved to another CPU.
    1623             :          * Even though this will probably make the lower prio task
    1624             :          * lose its cache, we do not want to bounce a higher task
    1625             :          * around just because it gave up its CPU, perhaps for a
    1626             :          * lock?
    1627             :          *
    1628             :          * For equal prio tasks, we just let the scheduler sort it out.
    1629             :          *
    1630             :          * Otherwise, just let it ride on the affined RQ and the
    1631             :          * post-schedule router will push the preempted task away
    1632             :          *
    1633             :          * This test is optimistic, if we get it wrong the load-balancer
    1634             :          * will have to sort it out.
    1635             :          *
    1636             :          * We take into account the capacity of the CPU to ensure it fits the
    1637             :          * requirement of the task - which is only important on heterogeneous
    1638             :          * systems like big.LITTLE.
    1639             :          */
    1640             :         test = curr &&
    1641             :                unlikely(rt_task(curr)) &&
    1642             :                (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio);
    1643             : 
    1644             :         if (test || !rt_task_fits_capacity(p, cpu)) {
    1645             :                 int target = find_lowest_rq(p);
    1646             : 
    1647             :                 /*
    1648             :                  * Bail out if we were forcing a migration to find a better
    1649             :                  * fitting CPU but our search failed.
    1650             :                  */
    1651             :                 if (!test && target != -1 && !rt_task_fits_capacity(p, target))
    1652             :                         goto out_unlock;
    1653             : 
    1654             :                 /*
    1655             :                  * Don't bother moving it if the destination CPU is
    1656             :                  * not running a lower priority task.
    1657             :                  */
    1658             :                 if (target != -1 &&
    1659             :                     p->prio < cpu_rq(target)->rt.highest_prio.curr)
    1660             :                         cpu = target;
    1661             :         }
    1662             : 
    1663             : out_unlock:
    1664             :         rcu_read_unlock();
    1665             : 
    1666             : out:
    1667             :         return cpu;
    1668             : }
    1669             : 
    1670             : static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
    1671             : {
    1672             :         /*
    1673             :          * Current can't be migrated, useless to reschedule,
    1674             :          * let's hope p can move out.
    1675             :          */
    1676             :         if (rq->curr->nr_cpus_allowed == 1 ||
    1677             :             !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
    1678             :                 return;
    1679             : 
    1680             :         /*
    1681             :          * p is migratable, so let's not schedule it and
    1682             :          * see if it is pushed or pulled somewhere else.
    1683             :          */
    1684             :         if (p->nr_cpus_allowed != 1 &&
    1685             :             cpupri_find(&rq->rd->cpupri, p, NULL))
    1686             :                 return;
    1687             : 
    1688             :         /*
    1689             :          * There appear to be other CPUs that can accept
    1690             :          * the current task but none can run 'p', so lets reschedule
    1691             :          * to try and push the current task away:
    1692             :          */
    1693             :         requeue_task_rt(rq, p, 1);
    1694             :         resched_curr(rq);
    1695             : }
    1696             : 
    1697             : static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
    1698             : {
    1699             :         if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
    1700             :                 /*
    1701             :                  * This is OK, because current is on_cpu, which avoids it being
    1702             :                  * picked for load-balance and preemption/IRQs are still
    1703             :                  * disabled avoiding further scheduler activity on it and we've
    1704             :                  * not yet started the picking loop.
    1705             :                  */
    1706             :                 rq_unpin_lock(rq, rf);
    1707             :                 pull_rt_task(rq);
    1708             :                 rq_repin_lock(rq, rf);
    1709             :         }
    1710             : 
    1711             :         return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
    1712             : }
    1713             : #endif /* CONFIG_SMP */
    1714             : 
    1715             : /*
    1716             :  * Preempt the current task with a newly woken task if needed:
    1717             :  */
    1718           0 : static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
    1719             : {
    1720           0 :         if (p->prio < rq->curr->prio) {
    1721           0 :                 resched_curr(rq);
    1722           0 :                 return;
    1723             :         }
    1724             : 
    1725             : #ifdef CONFIG_SMP
    1726             :         /*
    1727             :          * If:
    1728             :          *
    1729             :          * - the newly woken task is of equal priority to the current task
    1730             :          * - the newly woken task is non-migratable while current is migratable
    1731             :          * - current will be preempted on the next reschedule
    1732             :          *
    1733             :          * we should check to see if current can readily move to a different
    1734             :          * cpu.  If so, we will reschedule to allow the push logic to try
    1735             :          * to move current somewhere else, making room for our non-migratable
    1736             :          * task.
    1737             :          */
    1738             :         if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
    1739             :                 check_preempt_equal_prio(rq, p);
    1740             : #endif
    1741             : }
    1742             : 
    1743           0 : static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
    1744             : {
    1745           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1746           0 :         struct rt_rq *rt_rq = &rq->rt;
    1747             : 
    1748           0 :         p->se.exec_start = rq_clock_task(rq);
    1749           0 :         if (on_rt_rq(&p->rt))
    1750             :                 update_stats_wait_end_rt(rt_rq, rt_se);
    1751             : 
    1752             :         /* The running task is never eligible for pushing */
    1753           0 :         dequeue_pushable_task(rq, p);
    1754             : 
    1755             :         if (!first)
    1756             :                 return;
    1757             : 
    1758             :         /*
    1759             :          * If prev task was rt, put_prev_task() has already updated the
    1760             :          * utilization. We only care of the case where we start to schedule a
    1761             :          * rt task
    1762             :          */
    1763             :         if (rq->curr->sched_class != &rt_sched_class)
    1764             :                 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    1765             : 
    1766             :         rt_queue_push_tasks(rq);
    1767             : }
    1768             : 
    1769           0 : static struct sched_rt_entity *pick_next_rt_entity(struct rt_rq *rt_rq)
    1770             : {
    1771           0 :         struct rt_prio_array *array = &rt_rq->active;
    1772           0 :         struct sched_rt_entity *next = NULL;
    1773             :         struct list_head *queue;
    1774             :         int idx;
    1775             : 
    1776           0 :         idx = sched_find_first_bit(array->bitmap);
    1777           0 :         BUG_ON(idx >= MAX_RT_PRIO);
    1778             : 
    1779           0 :         queue = array->queue + idx;
    1780           0 :         if (SCHED_WARN_ON(list_empty(queue)))
    1781             :                 return NULL;
    1782           0 :         next = list_entry(queue->next, struct sched_rt_entity, run_list);
    1783             : 
    1784             :         return next;
    1785             : }
    1786             : 
    1787             : static struct task_struct *_pick_next_task_rt(struct rq *rq)
    1788             : {
    1789             :         struct sched_rt_entity *rt_se;
    1790           0 :         struct rt_rq *rt_rq  = &rq->rt;
    1791             : 
    1792             :         do {
    1793           0 :                 rt_se = pick_next_rt_entity(rt_rq);
    1794           0 :                 if (unlikely(!rt_se))
    1795             :                         return NULL;
    1796           0 :                 rt_rq = group_rt_rq(rt_se);
    1797             :         } while (rt_rq);
    1798             : 
    1799           0 :         return rt_task_of(rt_se);
    1800             : }
    1801             : 
    1802             : static struct task_struct *pick_task_rt(struct rq *rq)
    1803             : {
    1804             :         struct task_struct *p;
    1805             : 
    1806           0 :         if (!sched_rt_runnable(rq))
    1807             :                 return NULL;
    1808             : 
    1809             :         p = _pick_next_task_rt(rq);
    1810             : 
    1811             :         return p;
    1812             : }
    1813             : 
    1814           0 : static struct task_struct *pick_next_task_rt(struct rq *rq)
    1815             : {
    1816           0 :         struct task_struct *p = pick_task_rt(rq);
    1817             : 
    1818           0 :         if (p)
    1819             :                 set_next_task_rt(rq, p, true);
    1820             : 
    1821           0 :         return p;
    1822             : }
    1823             : 
    1824           0 : static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
    1825             : {
    1826           0 :         struct sched_rt_entity *rt_se = &p->rt;
    1827           0 :         struct rt_rq *rt_rq = &rq->rt;
    1828             : 
    1829           0 :         if (on_rt_rq(&p->rt))
    1830             :                 update_stats_wait_start_rt(rt_rq, rt_se);
    1831             : 
    1832           0 :         update_curr_rt(rq);
    1833             : 
    1834           0 :         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    1835             : 
    1836             :         /*
    1837             :          * The previous task needs to be made eligible for pushing
    1838             :          * if it is still active
    1839             :          */
    1840           0 :         if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
    1841             :                 enqueue_pushable_task(rq, p);
    1842           0 : }
    1843             : 
    1844             : #ifdef CONFIG_SMP
    1845             : 
    1846             : /* Only try algorithms three times */
    1847             : #define RT_MAX_TRIES 3
    1848             : 
    1849             : static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
    1850             : {
    1851             :         if (!task_on_cpu(rq, p) &&
    1852             :             cpumask_test_cpu(cpu, &p->cpus_mask))
    1853             :                 return 1;
    1854             : 
    1855             :         return 0;
    1856             : }
    1857             : 
    1858             : /*
    1859             :  * Return the highest pushable rq's task, which is suitable to be executed
    1860             :  * on the CPU, NULL otherwise
    1861             :  */
    1862             : static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
    1863             : {
    1864             :         struct plist_head *head = &rq->rt.pushable_tasks;
    1865             :         struct task_struct *p;
    1866             : 
    1867             :         if (!has_pushable_tasks(rq))
    1868             :                 return NULL;
    1869             : 
    1870             :         plist_for_each_entry(p, head, pushable_tasks) {
    1871             :                 if (pick_rt_task(rq, p, cpu))
    1872             :                         return p;
    1873             :         }
    1874             : 
    1875             :         return NULL;
    1876             : }
    1877             : 
    1878             : static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
    1879             : 
    1880             : static int find_lowest_rq(struct task_struct *task)
    1881             : {
    1882             :         struct sched_domain *sd;
    1883             :         struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
    1884             :         int this_cpu = smp_processor_id();
    1885             :         int cpu      = task_cpu(task);
    1886             :         int ret;
    1887             : 
    1888             :         /* Make sure the mask is initialized first */
    1889             :         if (unlikely(!lowest_mask))
    1890             :                 return -1;
    1891             : 
    1892             :         if (task->nr_cpus_allowed == 1)
    1893             :                 return -1; /* No other targets possible */
    1894             : 
    1895             :         /*
    1896             :          * If we're on asym system ensure we consider the different capacities
    1897             :          * of the CPUs when searching for the lowest_mask.
    1898             :          */
    1899             :         if (sched_asym_cpucap_active()) {
    1900             : 
    1901             :                 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
    1902             :                                           task, lowest_mask,
    1903             :                                           rt_task_fits_capacity);
    1904             :         } else {
    1905             : 
    1906             :                 ret = cpupri_find(&task_rq(task)->rd->cpupri,
    1907             :                                   task, lowest_mask);
    1908             :         }
    1909             : 
    1910             :         if (!ret)
    1911             :                 return -1; /* No targets found */
    1912             : 
    1913             :         /*
    1914             :          * At this point we have built a mask of CPUs representing the
    1915             :          * lowest priority tasks in the system.  Now we want to elect
    1916             :          * the best one based on our affinity and topology.
    1917             :          *
    1918             :          * We prioritize the last CPU that the task executed on since
    1919             :          * it is most likely cache-hot in that location.
    1920             :          */
    1921             :         if (cpumask_test_cpu(cpu, lowest_mask))
    1922             :                 return cpu;
    1923             : 
    1924             :         /*
    1925             :          * Otherwise, we consult the sched_domains span maps to figure
    1926             :          * out which CPU is logically closest to our hot cache data.
    1927             :          */
    1928             :         if (!cpumask_test_cpu(this_cpu, lowest_mask))
    1929             :                 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
    1930             : 
    1931             :         rcu_read_lock();
    1932             :         for_each_domain(cpu, sd) {
    1933             :                 if (sd->flags & SD_WAKE_AFFINE) {
    1934             :                         int best_cpu;
    1935             : 
    1936             :                         /*
    1937             :                          * "this_cpu" is cheaper to preempt than a
    1938             :                          * remote processor.
    1939             :                          */
    1940             :                         if (this_cpu != -1 &&
    1941             :                             cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
    1942             :                                 rcu_read_unlock();
    1943             :                                 return this_cpu;
    1944             :                         }
    1945             : 
    1946             :                         best_cpu = cpumask_any_and_distribute(lowest_mask,
    1947             :                                                               sched_domain_span(sd));
    1948             :                         if (best_cpu < nr_cpu_ids) {
    1949             :                                 rcu_read_unlock();
    1950             :                                 return best_cpu;
    1951             :                         }
    1952             :                 }
    1953             :         }
    1954             :         rcu_read_unlock();
    1955             : 
    1956             :         /*
    1957             :          * And finally, if there were no matches within the domains
    1958             :          * just give the caller *something* to work with from the compatible
    1959             :          * locations.
    1960             :          */
    1961             :         if (this_cpu != -1)
    1962             :                 return this_cpu;
    1963             : 
    1964             :         cpu = cpumask_any_distribute(lowest_mask);
    1965             :         if (cpu < nr_cpu_ids)
    1966             :                 return cpu;
    1967             : 
    1968             :         return -1;
    1969             : }
    1970             : 
    1971             : /* Will lock the rq it finds */
    1972             : static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
    1973             : {
    1974             :         struct rq *lowest_rq = NULL;
    1975             :         int tries;
    1976             :         int cpu;
    1977             : 
    1978             :         for (tries = 0; tries < RT_MAX_TRIES; tries++) {
    1979             :                 cpu = find_lowest_rq(task);
    1980             : 
    1981             :                 if ((cpu == -1) || (cpu == rq->cpu))
    1982             :                         break;
    1983             : 
    1984             :                 lowest_rq = cpu_rq(cpu);
    1985             : 
    1986             :                 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
    1987             :                         /*
    1988             :                          * Target rq has tasks of equal or higher priority,
    1989             :                          * retrying does not release any lock and is unlikely
    1990             :                          * to yield a different result.
    1991             :                          */
    1992             :                         lowest_rq = NULL;
    1993             :                         break;
    1994             :                 }
    1995             : 
    1996             :                 /* if the prio of this runqueue changed, try again */
    1997             :                 if (double_lock_balance(rq, lowest_rq)) {
    1998             :                         /*
    1999             :                          * We had to unlock the run queue. In
    2000             :                          * the mean time, task could have
    2001             :                          * migrated already or had its affinity changed.
    2002             :                          * Also make sure that it wasn't scheduled on its rq.
    2003             :                          * It is possible the task was scheduled, set
    2004             :                          * "migrate_disabled" and then got preempted, so we must
    2005             :                          * check the task migration disable flag here too.
    2006             :                          */
    2007             :                         if (unlikely(task_rq(task) != rq ||
    2008             :                                      !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
    2009             :                                      task_on_cpu(rq, task) ||
    2010             :                                      !rt_task(task) ||
    2011             :                                      is_migration_disabled(task) ||
    2012             :                                      !task_on_rq_queued(task))) {
    2013             : 
    2014             :                                 double_unlock_balance(rq, lowest_rq);
    2015             :                                 lowest_rq = NULL;
    2016             :                                 break;
    2017             :                         }
    2018             :                 }
    2019             : 
    2020             :                 /* If this rq is still suitable use it. */
    2021             :                 if (lowest_rq->rt.highest_prio.curr > task->prio)
    2022             :                         break;
    2023             : 
    2024             :                 /* try again */
    2025             :                 double_unlock_balance(rq, lowest_rq);
    2026             :                 lowest_rq = NULL;
    2027             :         }
    2028             : 
    2029             :         return lowest_rq;
    2030             : }
    2031             : 
    2032             : static struct task_struct *pick_next_pushable_task(struct rq *rq)
    2033             : {
    2034             :         struct task_struct *p;
    2035             : 
    2036             :         if (!has_pushable_tasks(rq))
    2037             :                 return NULL;
    2038             : 
    2039             :         p = plist_first_entry(&rq->rt.pushable_tasks,
    2040             :                               struct task_struct, pushable_tasks);
    2041             : 
    2042             :         BUG_ON(rq->cpu != task_cpu(p));
    2043             :         BUG_ON(task_current(rq, p));
    2044             :         BUG_ON(p->nr_cpus_allowed <= 1);
    2045             : 
    2046             :         BUG_ON(!task_on_rq_queued(p));
    2047             :         BUG_ON(!rt_task(p));
    2048             : 
    2049             :         return p;
    2050             : }
    2051             : 
    2052             : /*
    2053             :  * If the current CPU has more than one RT task, see if the non
    2054             :  * running task can migrate over to a CPU that is running a task
    2055             :  * of lesser priority.
    2056             :  */
    2057             : static int push_rt_task(struct rq *rq, bool pull)
    2058             : {
    2059             :         struct task_struct *next_task;
    2060             :         struct rq *lowest_rq;
    2061             :         int ret = 0;
    2062             : 
    2063             :         if (!rq->rt.overloaded)
    2064             :                 return 0;
    2065             : 
    2066             :         next_task = pick_next_pushable_task(rq);
    2067             :         if (!next_task)
    2068             :                 return 0;
    2069             : 
    2070             : retry:
    2071             :         /*
    2072             :          * It's possible that the next_task slipped in of
    2073             :          * higher priority than current. If that's the case
    2074             :          * just reschedule current.
    2075             :          */
    2076             :         if (unlikely(next_task->prio < rq->curr->prio)) {
    2077             :                 resched_curr(rq);
    2078             :                 return 0;
    2079             :         }
    2080             : 
    2081             :         if (is_migration_disabled(next_task)) {
    2082             :                 struct task_struct *push_task = NULL;
    2083             :                 int cpu;
    2084             : 
    2085             :                 if (!pull || rq->push_busy)
    2086             :                         return 0;
    2087             : 
    2088             :                 /*
    2089             :                  * Invoking find_lowest_rq() on anything but an RT task doesn't
    2090             :                  * make sense. Per the above priority check, curr has to
    2091             :                  * be of higher priority than next_task, so no need to
    2092             :                  * reschedule when bailing out.
    2093             :                  *
    2094             :                  * Note that the stoppers are masqueraded as SCHED_FIFO
    2095             :                  * (cf. sched_set_stop_task()), so we can't rely on rt_task().
    2096             :                  */
    2097             :                 if (rq->curr->sched_class != &rt_sched_class)
    2098             :                         return 0;
    2099             : 
    2100             :                 cpu = find_lowest_rq(rq->curr);
    2101             :                 if (cpu == -1 || cpu == rq->cpu)
    2102             :                         return 0;
    2103             : 
    2104             :                 /*
    2105             :                  * Given we found a CPU with lower priority than @next_task,
    2106             :                  * therefore it should be running. However we cannot migrate it
    2107             :                  * to this other CPU, instead attempt to push the current
    2108             :                  * running task on this CPU away.
    2109             :                  */
    2110             :                 push_task = get_push_task(rq);
    2111             :                 if (push_task) {
    2112             :                         raw_spin_rq_unlock(rq);
    2113             :                         stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
    2114             :                                             push_task, &rq->push_work);
    2115             :                         raw_spin_rq_lock(rq);
    2116             :                 }
    2117             : 
    2118             :                 return 0;
    2119             :         }
    2120             : 
    2121             :         if (WARN_ON(next_task == rq->curr))
    2122             :                 return 0;
    2123             : 
    2124             :         /* We might release rq lock */
    2125             :         get_task_struct(next_task);
    2126             : 
    2127             :         /* find_lock_lowest_rq locks the rq if found */
    2128             :         lowest_rq = find_lock_lowest_rq(next_task, rq);
    2129             :         if (!lowest_rq) {
    2130             :                 struct task_struct *task;
    2131             :                 /*
    2132             :                  * find_lock_lowest_rq releases rq->lock
    2133             :                  * so it is possible that next_task has migrated.
    2134             :                  *
    2135             :                  * We need to make sure that the task is still on the same
    2136             :                  * run-queue and is also still the next task eligible for
    2137             :                  * pushing.
    2138             :                  */
    2139             :                 task = pick_next_pushable_task(rq);
    2140             :                 if (task == next_task) {
    2141             :                         /*
    2142             :                          * The task hasn't migrated, and is still the next
    2143             :                          * eligible task, but we failed to find a run-queue
    2144             :                          * to push it to.  Do not retry in this case, since
    2145             :                          * other CPUs will pull from us when ready.
    2146             :                          */
    2147             :                         goto out;
    2148             :                 }
    2149             : 
    2150             :                 if (!task)
    2151             :                         /* No more tasks, just exit */
    2152             :                         goto out;
    2153             : 
    2154             :                 /*
    2155             :                  * Something has shifted, try again.
    2156             :                  */
    2157             :                 put_task_struct(next_task);
    2158             :                 next_task = task;
    2159             :                 goto retry;
    2160             :         }
    2161             : 
    2162             :         deactivate_task(rq, next_task, 0);
    2163             :         set_task_cpu(next_task, lowest_rq->cpu);
    2164             :         activate_task(lowest_rq, next_task, 0);
    2165             :         resched_curr(lowest_rq);
    2166             :         ret = 1;
    2167             : 
    2168             :         double_unlock_balance(rq, lowest_rq);
    2169             : out:
    2170             :         put_task_struct(next_task);
    2171             : 
    2172             :         return ret;
    2173             : }
    2174             : 
    2175             : static void push_rt_tasks(struct rq *rq)
    2176             : {
    2177             :         /* push_rt_task will return true if it moved an RT */
    2178             :         while (push_rt_task(rq, false))
    2179             :                 ;
    2180             : }
    2181             : 
    2182             : #ifdef HAVE_RT_PUSH_IPI
    2183             : 
    2184             : /*
    2185             :  * When a high priority task schedules out from a CPU and a lower priority
    2186             :  * task is scheduled in, a check is made to see if there's any RT tasks
    2187             :  * on other CPUs that are waiting to run because a higher priority RT task
    2188             :  * is currently running on its CPU. In this case, the CPU with multiple RT
    2189             :  * tasks queued on it (overloaded) needs to be notified that a CPU has opened
    2190             :  * up that may be able to run one of its non-running queued RT tasks.
    2191             :  *
    2192             :  * All CPUs with overloaded RT tasks need to be notified as there is currently
    2193             :  * no way to know which of these CPUs have the highest priority task waiting
    2194             :  * to run. Instead of trying to take a spinlock on each of these CPUs,
    2195             :  * which has shown to cause large latency when done on machines with many
    2196             :  * CPUs, sending an IPI to the CPUs to have them push off the overloaded
    2197             :  * RT tasks waiting to run.
    2198             :  *
    2199             :  * Just sending an IPI to each of the CPUs is also an issue, as on large
    2200             :  * count CPU machines, this can cause an IPI storm on a CPU, especially
    2201             :  * if its the only CPU with multiple RT tasks queued, and a large number
    2202             :  * of CPUs scheduling a lower priority task at the same time.
    2203             :  *
    2204             :  * Each root domain has its own irq work function that can iterate over
    2205             :  * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
    2206             :  * task must be checked if there's one or many CPUs that are lowering
    2207             :  * their priority, there's a single irq work iterator that will try to
    2208             :  * push off RT tasks that are waiting to run.
    2209             :  *
    2210             :  * When a CPU schedules a lower priority task, it will kick off the
    2211             :  * irq work iterator that will jump to each CPU with overloaded RT tasks.
    2212             :  * As it only takes the first CPU that schedules a lower priority task
    2213             :  * to start the process, the rto_start variable is incremented and if
    2214             :  * the atomic result is one, then that CPU will try to take the rto_lock.
    2215             :  * This prevents high contention on the lock as the process handles all
    2216             :  * CPUs scheduling lower priority tasks.
    2217             :  *
    2218             :  * All CPUs that are scheduling a lower priority task will increment the
    2219             :  * rt_loop_next variable. This will make sure that the irq work iterator
    2220             :  * checks all RT overloaded CPUs whenever a CPU schedules a new lower
    2221             :  * priority task, even if the iterator is in the middle of a scan. Incrementing
    2222             :  * the rt_loop_next will cause the iterator to perform another scan.
    2223             :  *
    2224             :  */
    2225             : static int rto_next_cpu(struct root_domain *rd)
    2226             : {
    2227             :         int next;
    2228             :         int cpu;
    2229             : 
    2230             :         /*
    2231             :          * When starting the IPI RT pushing, the rto_cpu is set to -1,
    2232             :          * rt_next_cpu() will simply return the first CPU found in
    2233             :          * the rto_mask.
    2234             :          *
    2235             :          * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
    2236             :          * will return the next CPU found in the rto_mask.
    2237             :          *
    2238             :          * If there are no more CPUs left in the rto_mask, then a check is made
    2239             :          * against rto_loop and rto_loop_next. rto_loop is only updated with
    2240             :          * the rto_lock held, but any CPU may increment the rto_loop_next
    2241             :          * without any locking.
    2242             :          */
    2243             :         for (;;) {
    2244             : 
    2245             :                 /* When rto_cpu is -1 this acts like cpumask_first() */
    2246             :                 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
    2247             : 
    2248             :                 rd->rto_cpu = cpu;
    2249             : 
    2250             :                 if (cpu < nr_cpu_ids)
    2251             :                         return cpu;
    2252             : 
    2253             :                 rd->rto_cpu = -1;
    2254             : 
    2255             :                 /*
    2256             :                  * ACQUIRE ensures we see the @rto_mask changes
    2257             :                  * made prior to the @next value observed.
    2258             :                  *
    2259             :                  * Matches WMB in rt_set_overload().
    2260             :                  */
    2261             :                 next = atomic_read_acquire(&rd->rto_loop_next);
    2262             : 
    2263             :                 if (rd->rto_loop == next)
    2264             :                         break;
    2265             : 
    2266             :                 rd->rto_loop = next;
    2267             :         }
    2268             : 
    2269             :         return -1;
    2270             : }
    2271             : 
    2272             : static inline bool rto_start_trylock(atomic_t *v)
    2273             : {
    2274             :         return !atomic_cmpxchg_acquire(v, 0, 1);
    2275             : }
    2276             : 
    2277             : static inline void rto_start_unlock(atomic_t *v)
    2278             : {
    2279             :         atomic_set_release(v, 0);
    2280             : }
    2281             : 
    2282             : static void tell_cpu_to_push(struct rq *rq)
    2283             : {
    2284             :         int cpu = -1;
    2285             : 
    2286             :         /* Keep the loop going if the IPI is currently active */
    2287             :         atomic_inc(&rq->rd->rto_loop_next);
    2288             : 
    2289             :         /* Only one CPU can initiate a loop at a time */
    2290             :         if (!rto_start_trylock(&rq->rd->rto_loop_start))
    2291             :                 return;
    2292             : 
    2293             :         raw_spin_lock(&rq->rd->rto_lock);
    2294             : 
    2295             :         /*
    2296             :          * The rto_cpu is updated under the lock, if it has a valid CPU
    2297             :          * then the IPI is still running and will continue due to the
    2298             :          * update to loop_next, and nothing needs to be done here.
    2299             :          * Otherwise it is finishing up and an ipi needs to be sent.
    2300             :          */
    2301             :         if (rq->rd->rto_cpu < 0)
    2302             :                 cpu = rto_next_cpu(rq->rd);
    2303             : 
    2304             :         raw_spin_unlock(&rq->rd->rto_lock);
    2305             : 
    2306             :         rto_start_unlock(&rq->rd->rto_loop_start);
    2307             : 
    2308             :         if (cpu >= 0) {
    2309             :                 /* Make sure the rd does not get freed while pushing */
    2310             :                 sched_get_rd(rq->rd);
    2311             :                 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
    2312             :         }
    2313             : }
    2314             : 
    2315             : /* Called from hardirq context */
    2316             : void rto_push_irq_work_func(struct irq_work *work)
    2317             : {
    2318             :         struct root_domain *rd =
    2319             :                 container_of(work, struct root_domain, rto_push_work);
    2320             :         struct rq *rq;
    2321             :         int cpu;
    2322             : 
    2323             :         rq = this_rq();
    2324             : 
    2325             :         /*
    2326             :          * We do not need to grab the lock to check for has_pushable_tasks.
    2327             :          * When it gets updated, a check is made if a push is possible.
    2328             :          */
    2329             :         if (has_pushable_tasks(rq)) {
    2330             :                 raw_spin_rq_lock(rq);
    2331             :                 while (push_rt_task(rq, true))
    2332             :                         ;
    2333             :                 raw_spin_rq_unlock(rq);
    2334             :         }
    2335             : 
    2336             :         raw_spin_lock(&rd->rto_lock);
    2337             : 
    2338             :         /* Pass the IPI to the next rt overloaded queue */
    2339             :         cpu = rto_next_cpu(rd);
    2340             : 
    2341             :         raw_spin_unlock(&rd->rto_lock);
    2342             : 
    2343             :         if (cpu < 0) {
    2344             :                 sched_put_rd(rd);
    2345             :                 return;
    2346             :         }
    2347             : 
    2348             :         /* Try the next RT overloaded CPU */
    2349             :         irq_work_queue_on(&rd->rto_push_work, cpu);
    2350             : }
    2351             : #endif /* HAVE_RT_PUSH_IPI */
    2352             : 
    2353             : static void pull_rt_task(struct rq *this_rq)
    2354             : {
    2355             :         int this_cpu = this_rq->cpu, cpu;
    2356             :         bool resched = false;
    2357             :         struct task_struct *p, *push_task;
    2358             :         struct rq *src_rq;
    2359             :         int rt_overload_count = rt_overloaded(this_rq);
    2360             : 
    2361             :         if (likely(!rt_overload_count))
    2362             :                 return;
    2363             : 
    2364             :         /*
    2365             :          * Match the barrier from rt_set_overloaded; this guarantees that if we
    2366             :          * see overloaded we must also see the rto_mask bit.
    2367             :          */
    2368             :         smp_rmb();
    2369             : 
    2370             :         /* If we are the only overloaded CPU do nothing */
    2371             :         if (rt_overload_count == 1 &&
    2372             :             cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
    2373             :                 return;
    2374             : 
    2375             : #ifdef HAVE_RT_PUSH_IPI
    2376             :         if (sched_feat(RT_PUSH_IPI)) {
    2377             :                 tell_cpu_to_push(this_rq);
    2378             :                 return;
    2379             :         }
    2380             : #endif
    2381             : 
    2382             :         for_each_cpu(cpu, this_rq->rd->rto_mask) {
    2383             :                 if (this_cpu == cpu)
    2384             :                         continue;
    2385             : 
    2386             :                 src_rq = cpu_rq(cpu);
    2387             : 
    2388             :                 /*
    2389             :                  * Don't bother taking the src_rq->lock if the next highest
    2390             :                  * task is known to be lower-priority than our current task.
    2391             :                  * This may look racy, but if this value is about to go
    2392             :                  * logically higher, the src_rq will push this task away.
    2393             :                  * And if its going logically lower, we do not care
    2394             :                  */
    2395             :                 if (src_rq->rt.highest_prio.next >=
    2396             :                     this_rq->rt.highest_prio.curr)
    2397             :                         continue;
    2398             : 
    2399             :                 /*
    2400             :                  * We can potentially drop this_rq's lock in
    2401             :                  * double_lock_balance, and another CPU could
    2402             :                  * alter this_rq
    2403             :                  */
    2404             :                 push_task = NULL;
    2405             :                 double_lock_balance(this_rq, src_rq);
    2406             : 
    2407             :                 /*
    2408             :                  * We can pull only a task, which is pushable
    2409             :                  * on its rq, and no others.
    2410             :                  */
    2411             :                 p = pick_highest_pushable_task(src_rq, this_cpu);
    2412             : 
    2413             :                 /*
    2414             :                  * Do we have an RT task that preempts
    2415             :                  * the to-be-scheduled task?
    2416             :                  */
    2417             :                 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
    2418             :                         WARN_ON(p == src_rq->curr);
    2419             :                         WARN_ON(!task_on_rq_queued(p));
    2420             : 
    2421             :                         /*
    2422             :                          * There's a chance that p is higher in priority
    2423             :                          * than what's currently running on its CPU.
    2424             :                          * This is just that p is waking up and hasn't
    2425             :                          * had a chance to schedule. We only pull
    2426             :                          * p if it is lower in priority than the
    2427             :                          * current task on the run queue
    2428             :                          */
    2429             :                         if (p->prio < src_rq->curr->prio)
    2430             :                                 goto skip;
    2431             : 
    2432             :                         if (is_migration_disabled(p)) {
    2433             :                                 push_task = get_push_task(src_rq);
    2434             :                         } else {
    2435             :                                 deactivate_task(src_rq, p, 0);
    2436             :                                 set_task_cpu(p, this_cpu);
    2437             :                                 activate_task(this_rq, p, 0);
    2438             :                                 resched = true;
    2439             :                         }
    2440             :                         /*
    2441             :                          * We continue with the search, just in
    2442             :                          * case there's an even higher prio task
    2443             :                          * in another runqueue. (low likelihood
    2444             :                          * but possible)
    2445             :                          */
    2446             :                 }
    2447             : skip:
    2448             :                 double_unlock_balance(this_rq, src_rq);
    2449             : 
    2450             :                 if (push_task) {
    2451             :                         raw_spin_rq_unlock(this_rq);
    2452             :                         stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
    2453             :                                             push_task, &src_rq->push_work);
    2454             :                         raw_spin_rq_lock(this_rq);
    2455             :                 }
    2456             :         }
    2457             : 
    2458             :         if (resched)
    2459             :                 resched_curr(this_rq);
    2460             : }
    2461             : 
    2462             : /*
    2463             :  * If we are not running and we are not going to reschedule soon, we should
    2464             :  * try to push tasks away now
    2465             :  */
    2466             : static void task_woken_rt(struct rq *rq, struct task_struct *p)
    2467             : {
    2468             :         bool need_to_push = !task_on_cpu(rq, p) &&
    2469             :                             !test_tsk_need_resched(rq->curr) &&
    2470             :                             p->nr_cpus_allowed > 1 &&
    2471             :                             (dl_task(rq->curr) || rt_task(rq->curr)) &&
    2472             :                             (rq->curr->nr_cpus_allowed < 2 ||
    2473             :                              rq->curr->prio <= p->prio);
    2474             : 
    2475             :         if (need_to_push)
    2476             :                 push_rt_tasks(rq);
    2477             : }
    2478             : 
    2479             : /* Assumes rq->lock is held */
    2480             : static void rq_online_rt(struct rq *rq)
    2481             : {
    2482             :         if (rq->rt.overloaded)
    2483             :                 rt_set_overload(rq);
    2484             : 
    2485             :         __enable_runtime(rq);
    2486             : 
    2487             :         cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
    2488             : }
    2489             : 
    2490             : /* Assumes rq->lock is held */
    2491             : static void rq_offline_rt(struct rq *rq)
    2492             : {
    2493             :         if (rq->rt.overloaded)
    2494             :                 rt_clear_overload(rq);
    2495             : 
    2496             :         __disable_runtime(rq);
    2497             : 
    2498             :         cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
    2499             : }
    2500             : 
    2501             : /*
    2502             :  * When switch from the rt queue, we bring ourselves to a position
    2503             :  * that we might want to pull RT tasks from other runqueues.
    2504             :  */
    2505             : static void switched_from_rt(struct rq *rq, struct task_struct *p)
    2506             : {
    2507             :         /*
    2508             :          * If there are other RT tasks then we will reschedule
    2509             :          * and the scheduling of the other RT tasks will handle
    2510             :          * the balancing. But if we are the last RT task
    2511             :          * we may need to handle the pulling of RT tasks
    2512             :          * now.
    2513             :          */
    2514             :         if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
    2515             :                 return;
    2516             : 
    2517             :         rt_queue_pull_task(rq);
    2518             : }
    2519             : 
    2520             : void __init init_sched_rt_class(void)
    2521             : {
    2522             :         unsigned int i;
    2523             : 
    2524             :         for_each_possible_cpu(i) {
    2525             :                 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
    2526             :                                         GFP_KERNEL, cpu_to_node(i));
    2527             :         }
    2528             : }
    2529             : #endif /* CONFIG_SMP */
    2530             : 
    2531             : /*
    2532             :  * When switching a task to RT, we may overload the runqueue
    2533             :  * with RT tasks. In this case we try to push them off to
    2534             :  * other runqueues.
    2535             :  */
    2536           0 : static void switched_to_rt(struct rq *rq, struct task_struct *p)
    2537             : {
    2538             :         /*
    2539             :          * If we are running, update the avg_rt tracking, as the running time
    2540             :          * will now on be accounted into the latter.
    2541             :          */
    2542           0 :         if (task_current(rq, p)) {
    2543             :                 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
    2544             :                 return;
    2545             :         }
    2546             : 
    2547             :         /*
    2548             :          * If we are not running we may need to preempt the current
    2549             :          * running task. If that current running task is also an RT task
    2550             :          * then see if we can move to another run queue.
    2551             :          */
    2552           0 :         if (task_on_rq_queued(p)) {
    2553             : #ifdef CONFIG_SMP
    2554             :                 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
    2555             :                         rt_queue_push_tasks(rq);
    2556             : #endif /* CONFIG_SMP */
    2557           0 :                 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
    2558           0 :                         resched_curr(rq);
    2559             :         }
    2560             : }
    2561             : 
    2562             : /*
    2563             :  * Priority of the task has changed. This may cause
    2564             :  * us to initiate a push or pull.
    2565             :  */
    2566             : static void
    2567           0 : prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
    2568             : {
    2569           0 :         if (!task_on_rq_queued(p))
    2570             :                 return;
    2571             : 
    2572           0 :         if (task_current(rq, p)) {
    2573             : #ifdef CONFIG_SMP
    2574             :                 /*
    2575             :                  * If our priority decreases while running, we
    2576             :                  * may need to pull tasks to this runqueue.
    2577             :                  */
    2578             :                 if (oldprio < p->prio)
    2579             :                         rt_queue_pull_task(rq);
    2580             : 
    2581             :                 /*
    2582             :                  * If there's a higher priority task waiting to run
    2583             :                  * then reschedule.
    2584             :                  */
    2585             :                 if (p->prio > rq->rt.highest_prio.curr)
    2586             :                         resched_curr(rq);
    2587             : #else
    2588             :                 /* For UP simply resched on drop of prio */
    2589           0 :                 if (oldprio < p->prio)
    2590           0 :                         resched_curr(rq);
    2591             : #endif /* CONFIG_SMP */
    2592             :         } else {
    2593             :                 /*
    2594             :                  * This task is not running, but if it is
    2595             :                  * greater than the current running task
    2596             :                  * then reschedule.
    2597             :                  */
    2598           0 :                 if (p->prio < rq->curr->prio)
    2599           0 :                         resched_curr(rq);
    2600             :         }
    2601             : }
    2602             : 
    2603             : #ifdef CONFIG_POSIX_TIMERS
    2604           0 : static void watchdog(struct rq *rq, struct task_struct *p)
    2605             : {
    2606             :         unsigned long soft, hard;
    2607             : 
    2608             :         /* max may change after cur was read, this will be fixed next tick */
    2609           0 :         soft = task_rlimit(p, RLIMIT_RTTIME);
    2610           0 :         hard = task_rlimit_max(p, RLIMIT_RTTIME);
    2611             : 
    2612           0 :         if (soft != RLIM_INFINITY) {
    2613             :                 unsigned long next;
    2614             : 
    2615           0 :                 if (p->rt.watchdog_stamp != jiffies) {
    2616           0 :                         p->rt.timeout++;
    2617           0 :                         p->rt.watchdog_stamp = jiffies;
    2618             :                 }
    2619             : 
    2620           0 :                 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
    2621           0 :                 if (p->rt.timeout > next) {
    2622           0 :                         posix_cputimers_rt_watchdog(&p->posix_cputimers,
    2623             :                                                     p->se.sum_exec_runtime);
    2624             :                 }
    2625             :         }
    2626           0 : }
    2627             : #else
    2628             : static inline void watchdog(struct rq *rq, struct task_struct *p) { }
    2629             : #endif
    2630             : 
    2631             : /*
    2632             :  * scheduler tick hitting a task of our scheduling class.
    2633             :  *
    2634             :  * NOTE: This function can be called remotely by the tick offload that
    2635             :  * goes along full dynticks. Therefore no local assumption can be made
    2636             :  * and everything must be accessed through the @rq and @curr passed in
    2637             :  * parameters.
    2638             :  */
    2639           0 : static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
    2640             : {
    2641           0 :         struct sched_rt_entity *rt_se = &p->rt;
    2642             : 
    2643           0 :         update_curr_rt(rq);
    2644           0 :         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
    2645             : 
    2646           0 :         watchdog(rq, p);
    2647             : 
    2648             :         /*
    2649             :          * RR tasks need a special form of timeslice management.
    2650             :          * FIFO tasks have no timeslices.
    2651             :          */
    2652           0 :         if (p->policy != SCHED_RR)
    2653             :                 return;
    2654             : 
    2655           0 :         if (--p->rt.time_slice)
    2656             :                 return;
    2657             : 
    2658           0 :         p->rt.time_slice = sched_rr_timeslice;
    2659             : 
    2660             :         /*
    2661             :          * Requeue to the end of queue if we (and all of our ancestors) are not
    2662             :          * the only element on the queue
    2663             :          */
    2664           0 :         for_each_sched_rt_entity(rt_se) {
    2665           0 :                 if (rt_se->run_list.prev != rt_se->run_list.next) {
    2666           0 :                         requeue_task_rt(rq, p, 0);
    2667           0 :                         resched_curr(rq);
    2668           0 :                         return;
    2669             :                 }
    2670             :         }
    2671             : }
    2672             : 
    2673           0 : static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
    2674             : {
    2675             :         /*
    2676             :          * Time slice is 0 for SCHED_FIFO tasks
    2677             :          */
    2678           0 :         if (task->policy == SCHED_RR)
    2679           0 :                 return sched_rr_timeslice;
    2680             :         else
    2681             :                 return 0;
    2682             : }
    2683             : 
    2684             : #ifdef CONFIG_SCHED_CORE
    2685             : static int task_is_throttled_rt(struct task_struct *p, int cpu)
    2686             : {
    2687             :         struct rt_rq *rt_rq;
    2688             : 
    2689             : #ifdef CONFIG_RT_GROUP_SCHED
    2690             :         rt_rq = task_group(p)->rt_rq[cpu];
    2691             : #else
    2692             :         rt_rq = &cpu_rq(cpu)->rt;
    2693             : #endif
    2694             : 
    2695             :         return rt_rq_throttled(rt_rq);
    2696             : }
    2697             : #endif
    2698             : 
    2699             : DEFINE_SCHED_CLASS(rt) = {
    2700             : 
    2701             :         .enqueue_task           = enqueue_task_rt,
    2702             :         .dequeue_task           = dequeue_task_rt,
    2703             :         .yield_task             = yield_task_rt,
    2704             : 
    2705             :         .check_preempt_curr     = check_preempt_curr_rt,
    2706             : 
    2707             :         .pick_next_task         = pick_next_task_rt,
    2708             :         .put_prev_task          = put_prev_task_rt,
    2709             :         .set_next_task          = set_next_task_rt,
    2710             : 
    2711             : #ifdef CONFIG_SMP
    2712             :         .balance                = balance_rt,
    2713             :         .pick_task              = pick_task_rt,
    2714             :         .select_task_rq         = select_task_rq_rt,
    2715             :         .set_cpus_allowed       = set_cpus_allowed_common,
    2716             :         .rq_online              = rq_online_rt,
    2717             :         .rq_offline             = rq_offline_rt,
    2718             :         .task_woken             = task_woken_rt,
    2719             :         .switched_from          = switched_from_rt,
    2720             :         .find_lock_rq           = find_lock_lowest_rq,
    2721             : #endif
    2722             : 
    2723             :         .task_tick              = task_tick_rt,
    2724             : 
    2725             :         .get_rr_interval        = get_rr_interval_rt,
    2726             : 
    2727             :         .prio_changed           = prio_changed_rt,
    2728             :         .switched_to            = switched_to_rt,
    2729             : 
    2730             :         .update_curr            = update_curr_rt,
    2731             : 
    2732             : #ifdef CONFIG_SCHED_CORE
    2733             :         .task_is_throttled      = task_is_throttled_rt,
    2734             : #endif
    2735             : 
    2736             : #ifdef CONFIG_UCLAMP_TASK
    2737             :         .uclamp_enabled         = 1,
    2738             : #endif
    2739             : };
    2740             : 
    2741             : #ifdef CONFIG_RT_GROUP_SCHED
    2742             : /*
    2743             :  * Ensure that the real time constraints are schedulable.
    2744             :  */
    2745             : static DEFINE_MUTEX(rt_constraints_mutex);
    2746             : 
    2747             : static inline int tg_has_rt_tasks(struct task_group *tg)
    2748             : {
    2749             :         struct task_struct *task;
    2750             :         struct css_task_iter it;
    2751             :         int ret = 0;
    2752             : 
    2753             :         /*
    2754             :          * Autogroups do not have RT tasks; see autogroup_create().
    2755             :          */
    2756             :         if (task_group_is_autogroup(tg))
    2757             :                 return 0;
    2758             : 
    2759             :         css_task_iter_start(&tg->css, 0, &it);
    2760             :         while (!ret && (task = css_task_iter_next(&it)))
    2761             :                 ret |= rt_task(task);
    2762             :         css_task_iter_end(&it);
    2763             : 
    2764             :         return ret;
    2765             : }
    2766             : 
    2767             : struct rt_schedulable_data {
    2768             :         struct task_group *tg;
    2769             :         u64 rt_period;
    2770             :         u64 rt_runtime;
    2771             : };
    2772             : 
    2773             : static int tg_rt_schedulable(struct task_group *tg, void *data)
    2774             : {
    2775             :         struct rt_schedulable_data *d = data;
    2776             :         struct task_group *child;
    2777             :         unsigned long total, sum = 0;
    2778             :         u64 period, runtime;
    2779             : 
    2780             :         period = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2781             :         runtime = tg->rt_bandwidth.rt_runtime;
    2782             : 
    2783             :         if (tg == d->tg) {
    2784             :                 period = d->rt_period;
    2785             :                 runtime = d->rt_runtime;
    2786             :         }
    2787             : 
    2788             :         /*
    2789             :          * Cannot have more runtime than the period.
    2790             :          */
    2791             :         if (runtime > period && runtime != RUNTIME_INF)
    2792             :                 return -EINVAL;
    2793             : 
    2794             :         /*
    2795             :          * Ensure we don't starve existing RT tasks if runtime turns zero.
    2796             :          */
    2797             :         if (rt_bandwidth_enabled() && !runtime &&
    2798             :             tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg))
    2799             :                 return -EBUSY;
    2800             : 
    2801             :         total = to_ratio(period, runtime);
    2802             : 
    2803             :         /*
    2804             :          * Nobody can have more than the global setting allows.
    2805             :          */
    2806             :         if (total > to_ratio(global_rt_period(), global_rt_runtime()))
    2807             :                 return -EINVAL;
    2808             : 
    2809             :         /*
    2810             :          * The sum of our children's runtime should not exceed our own.
    2811             :          */
    2812             :         list_for_each_entry_rcu(child, &tg->children, siblings) {
    2813             :                 period = ktime_to_ns(child->rt_bandwidth.rt_period);
    2814             :                 runtime = child->rt_bandwidth.rt_runtime;
    2815             : 
    2816             :                 if (child == d->tg) {
    2817             :                         period = d->rt_period;
    2818             :                         runtime = d->rt_runtime;
    2819             :                 }
    2820             : 
    2821             :                 sum += to_ratio(period, runtime);
    2822             :         }
    2823             : 
    2824             :         if (sum > total)
    2825             :                 return -EINVAL;
    2826             : 
    2827             :         return 0;
    2828             : }
    2829             : 
    2830             : static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
    2831             : {
    2832             :         int ret;
    2833             : 
    2834             :         struct rt_schedulable_data data = {
    2835             :                 .tg = tg,
    2836             :                 .rt_period = period,
    2837             :                 .rt_runtime = runtime,
    2838             :         };
    2839             : 
    2840             :         rcu_read_lock();
    2841             :         ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
    2842             :         rcu_read_unlock();
    2843             : 
    2844             :         return ret;
    2845             : }
    2846             : 
    2847             : static int tg_set_rt_bandwidth(struct task_group *tg,
    2848             :                 u64 rt_period, u64 rt_runtime)
    2849             : {
    2850             :         int i, err = 0;
    2851             : 
    2852             :         /*
    2853             :          * Disallowing the root group RT runtime is BAD, it would disallow the
    2854             :          * kernel creating (and or operating) RT threads.
    2855             :          */
    2856             :         if (tg == &root_task_group && rt_runtime == 0)
    2857             :                 return -EINVAL;
    2858             : 
    2859             :         /* No period doesn't make any sense. */
    2860             :         if (rt_period == 0)
    2861             :                 return -EINVAL;
    2862             : 
    2863             :         /*
    2864             :          * Bound quota to defend quota against overflow during bandwidth shift.
    2865             :          */
    2866             :         if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
    2867             :                 return -EINVAL;
    2868             : 
    2869             :         mutex_lock(&rt_constraints_mutex);
    2870             :         err = __rt_schedulable(tg, rt_period, rt_runtime);
    2871             :         if (err)
    2872             :                 goto unlock;
    2873             : 
    2874             :         raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
    2875             :         tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
    2876             :         tg->rt_bandwidth.rt_runtime = rt_runtime;
    2877             : 
    2878             :         for_each_possible_cpu(i) {
    2879             :                 struct rt_rq *rt_rq = tg->rt_rq[i];
    2880             : 
    2881             :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    2882             :                 rt_rq->rt_runtime = rt_runtime;
    2883             :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    2884             :         }
    2885             :         raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
    2886             : unlock:
    2887             :         mutex_unlock(&rt_constraints_mutex);
    2888             : 
    2889             :         return err;
    2890             : }
    2891             : 
    2892             : int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
    2893             : {
    2894             :         u64 rt_runtime, rt_period;
    2895             : 
    2896             :         rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2897             :         rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
    2898             :         if (rt_runtime_us < 0)
    2899             :                 rt_runtime = RUNTIME_INF;
    2900             :         else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
    2901             :                 return -EINVAL;
    2902             : 
    2903             :         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
    2904             : }
    2905             : 
    2906             : long sched_group_rt_runtime(struct task_group *tg)
    2907             : {
    2908             :         u64 rt_runtime_us;
    2909             : 
    2910             :         if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
    2911             :                 return -1;
    2912             : 
    2913             :         rt_runtime_us = tg->rt_bandwidth.rt_runtime;
    2914             :         do_div(rt_runtime_us, NSEC_PER_USEC);
    2915             :         return rt_runtime_us;
    2916             : }
    2917             : 
    2918             : int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
    2919             : {
    2920             :         u64 rt_runtime, rt_period;
    2921             : 
    2922             :         if (rt_period_us > U64_MAX / NSEC_PER_USEC)
    2923             :                 return -EINVAL;
    2924             : 
    2925             :         rt_period = rt_period_us * NSEC_PER_USEC;
    2926             :         rt_runtime = tg->rt_bandwidth.rt_runtime;
    2927             : 
    2928             :         return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
    2929             : }
    2930             : 
    2931             : long sched_group_rt_period(struct task_group *tg)
    2932             : {
    2933             :         u64 rt_period_us;
    2934             : 
    2935             :         rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
    2936             :         do_div(rt_period_us, NSEC_PER_USEC);
    2937             :         return rt_period_us;
    2938             : }
    2939             : 
    2940             : #ifdef CONFIG_SYSCTL
    2941             : static int sched_rt_global_constraints(void)
    2942             : {
    2943             :         int ret = 0;
    2944             : 
    2945             :         mutex_lock(&rt_constraints_mutex);
    2946             :         ret = __rt_schedulable(NULL, 0, 0);
    2947             :         mutex_unlock(&rt_constraints_mutex);
    2948             : 
    2949             :         return ret;
    2950             : }
    2951             : #endif /* CONFIG_SYSCTL */
    2952             : 
    2953             : int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
    2954             : {
    2955             :         /* Don't accept realtime tasks when there is no way for them to run */
    2956             :         if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
    2957             :                 return 0;
    2958             : 
    2959             :         return 1;
    2960             : }
    2961             : 
    2962             : #else /* !CONFIG_RT_GROUP_SCHED */
    2963             : 
    2964             : #ifdef CONFIG_SYSCTL
    2965           0 : static int sched_rt_global_constraints(void)
    2966             : {
    2967             :         unsigned long flags;
    2968             :         int i;
    2969             : 
    2970           0 :         raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
    2971           0 :         for_each_possible_cpu(i) {
    2972           0 :                 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
    2973             : 
    2974           0 :                 raw_spin_lock(&rt_rq->rt_runtime_lock);
    2975           0 :                 rt_rq->rt_runtime = global_rt_runtime();
    2976           0 :                 raw_spin_unlock(&rt_rq->rt_runtime_lock);
    2977             :         }
    2978           0 :         raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
    2979             : 
    2980           0 :         return 0;
    2981             : }
    2982             : #endif /* CONFIG_SYSCTL */
    2983             : #endif /* CONFIG_RT_GROUP_SCHED */
    2984             : 
    2985             : #ifdef CONFIG_SYSCTL
    2986             : static int sched_rt_global_validate(void)
    2987             : {
    2988           0 :         if (sysctl_sched_rt_period <= 0)
    2989             :                 return -EINVAL;
    2990             : 
    2991           0 :         if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
    2992           0 :                 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) ||
    2993           0 :                  ((u64)sysctl_sched_rt_runtime *
    2994             :                         NSEC_PER_USEC > max_rt_runtime)))
    2995             :                 return -EINVAL;
    2996             : 
    2997             :         return 0;
    2998             : }
    2999             : 
    3000           0 : static void sched_rt_do_global(void)
    3001             : {
    3002             :         unsigned long flags;
    3003             : 
    3004           0 :         raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
    3005           0 :         def_rt_bandwidth.rt_runtime = global_rt_runtime();
    3006           0 :         def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
    3007           0 :         raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
    3008           0 : }
    3009             : 
    3010           0 : static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
    3011             :                 size_t *lenp, loff_t *ppos)
    3012             : {
    3013             :         int old_period, old_runtime;
    3014             :         static DEFINE_MUTEX(mutex);
    3015             :         int ret;
    3016             : 
    3017           0 :         mutex_lock(&mutex);
    3018           0 :         old_period = sysctl_sched_rt_period;
    3019           0 :         old_runtime = sysctl_sched_rt_runtime;
    3020             : 
    3021           0 :         ret = proc_dointvec(table, write, buffer, lenp, ppos);
    3022             : 
    3023           0 :         if (!ret && write) {
    3024           0 :                 ret = sched_rt_global_validate();
    3025           0 :                 if (ret)
    3026             :                         goto undo;
    3027             : 
    3028           0 :                 ret = sched_dl_global_validate();
    3029           0 :                 if (ret)
    3030             :                         goto undo;
    3031             : 
    3032           0 :                 ret = sched_rt_global_constraints();
    3033           0 :                 if (ret)
    3034             :                         goto undo;
    3035             : 
    3036           0 :                 sched_rt_do_global();
    3037           0 :                 sched_dl_do_global();
    3038             :         }
    3039             :         if (0) {
    3040             : undo:
    3041           0 :                 sysctl_sched_rt_period = old_period;
    3042           0 :                 sysctl_sched_rt_runtime = old_runtime;
    3043             :         }
    3044           0 :         mutex_unlock(&mutex);
    3045             : 
    3046           0 :         return ret;
    3047             : }
    3048             : 
    3049           0 : static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
    3050             :                 size_t *lenp, loff_t *ppos)
    3051             : {
    3052             :         int ret;
    3053             :         static DEFINE_MUTEX(mutex);
    3054             : 
    3055           0 :         mutex_lock(&mutex);
    3056           0 :         ret = proc_dointvec(table, write, buffer, lenp, ppos);
    3057             :         /*
    3058             :          * Make sure that internally we keep jiffies.
    3059             :          * Also, writing zero resets the timeslice to default:
    3060             :          */
    3061           0 :         if (!ret && write) {
    3062           0 :                 sched_rr_timeslice =
    3063           0 :                         sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
    3064           0 :                         msecs_to_jiffies(sysctl_sched_rr_timeslice);
    3065             :         }
    3066           0 :         mutex_unlock(&mutex);
    3067             : 
    3068           0 :         return ret;
    3069             : }
    3070             : #endif /* CONFIG_SYSCTL */
    3071             : 
    3072             : #ifdef CONFIG_SCHED_DEBUG
    3073             : void print_rt_stats(struct seq_file *m, int cpu)
    3074             : {
    3075             :         rt_rq_iter_t iter;
    3076             :         struct rt_rq *rt_rq;
    3077             : 
    3078             :         rcu_read_lock();
    3079             :         for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
    3080             :                 print_rt_rq(m, cpu, rt_rq);
    3081             :         rcu_read_unlock();
    3082             : }
    3083             : #endif /* CONFIG_SCHED_DEBUG */

Generated by: LCOV version 1.14