LCOV - code coverage report
Current view: top level - kernel/time - hrtimer.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 77 372 20.7 %
Date: 2023-08-24 13:40:31 Functions: 9 39 23.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
       4             :  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
       5             :  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
       6             :  *
       7             :  *  High-resolution kernel timers
       8             :  *
       9             :  *  In contrast to the low-resolution timeout API, aka timer wheel,
      10             :  *  hrtimers provide finer resolution and accuracy depending on system
      11             :  *  configuration and capabilities.
      12             :  *
      13             :  *  Started by: Thomas Gleixner and Ingo Molnar
      14             :  *
      15             :  *  Credits:
      16             :  *      Based on the original timer wheel code
      17             :  *
      18             :  *      Help, testing, suggestions, bugfixes, improvements were
      19             :  *      provided by:
      20             :  *
      21             :  *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
      22             :  *      et. al.
      23             :  */
      24             : 
      25             : #include <linux/cpu.h>
      26             : #include <linux/export.h>
      27             : #include <linux/percpu.h>
      28             : #include <linux/hrtimer.h>
      29             : #include <linux/notifier.h>
      30             : #include <linux/syscalls.h>
      31             : #include <linux/interrupt.h>
      32             : #include <linux/tick.h>
      33             : #include <linux/err.h>
      34             : #include <linux/debugobjects.h>
      35             : #include <linux/sched/signal.h>
      36             : #include <linux/sched/sysctl.h>
      37             : #include <linux/sched/rt.h>
      38             : #include <linux/sched/deadline.h>
      39             : #include <linux/sched/nohz.h>
      40             : #include <linux/sched/debug.h>
      41             : #include <linux/timer.h>
      42             : #include <linux/freezer.h>
      43             : #include <linux/compat.h>
      44             : 
      45             : #include <linux/uaccess.h>
      46             : 
      47             : #include <trace/events/timer.h>
      48             : 
      49             : #include "tick-internal.h"
      50             : 
      51             : /*
      52             :  * Masks for selecting the soft and hard context timers from
      53             :  * cpu_base->active
      54             :  */
      55             : #define MASK_SHIFT              (HRTIMER_BASE_MONOTONIC_SOFT)
      56             : #define HRTIMER_ACTIVE_HARD     ((1U << MASK_SHIFT) - 1)
      57             : #define HRTIMER_ACTIVE_SOFT     (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
      58             : #define HRTIMER_ACTIVE_ALL      (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
      59             : 
      60             : /*
      61             :  * The timer bases:
      62             :  *
      63             :  * There are more clockids than hrtimer bases. Thus, we index
      64             :  * into the timer bases by the hrtimer_base_type enum. When trying
      65             :  * to reach a base using a clockid, hrtimer_clockid_to_base()
      66             :  * is used to convert from clockid to the proper hrtimer_base_type.
      67             :  */
      68             : DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
      69             : {
      70             :         .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
      71             :         .clock_base =
      72             :         {
      73             :                 {
      74             :                         .index = HRTIMER_BASE_MONOTONIC,
      75             :                         .clockid = CLOCK_MONOTONIC,
      76             :                         .get_time = &ktime_get,
      77             :                 },
      78             :                 {
      79             :                         .index = HRTIMER_BASE_REALTIME,
      80             :                         .clockid = CLOCK_REALTIME,
      81             :                         .get_time = &ktime_get_real,
      82             :                 },
      83             :                 {
      84             :                         .index = HRTIMER_BASE_BOOTTIME,
      85             :                         .clockid = CLOCK_BOOTTIME,
      86             :                         .get_time = &ktime_get_boottime,
      87             :                 },
      88             :                 {
      89             :                         .index = HRTIMER_BASE_TAI,
      90             :                         .clockid = CLOCK_TAI,
      91             :                         .get_time = &ktime_get_clocktai,
      92             :                 },
      93             :                 {
      94             :                         .index = HRTIMER_BASE_MONOTONIC_SOFT,
      95             :                         .clockid = CLOCK_MONOTONIC,
      96             :                         .get_time = &ktime_get,
      97             :                 },
      98             :                 {
      99             :                         .index = HRTIMER_BASE_REALTIME_SOFT,
     100             :                         .clockid = CLOCK_REALTIME,
     101             :                         .get_time = &ktime_get_real,
     102             :                 },
     103             :                 {
     104             :                         .index = HRTIMER_BASE_BOOTTIME_SOFT,
     105             :                         .clockid = CLOCK_BOOTTIME,
     106             :                         .get_time = &ktime_get_boottime,
     107             :                 },
     108             :                 {
     109             :                         .index = HRTIMER_BASE_TAI_SOFT,
     110             :                         .clockid = CLOCK_TAI,
     111             :                         .get_time = &ktime_get_clocktai,
     112             :                 },
     113             :         }
     114             : };
     115             : 
     116             : static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
     117             :         /* Make sure we catch unsupported clockids */
     118             :         [0 ... MAX_CLOCKS - 1]  = HRTIMER_MAX_CLOCK_BASES,
     119             : 
     120             :         [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
     121             :         [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
     122             :         [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
     123             :         [CLOCK_TAI]             = HRTIMER_BASE_TAI,
     124             : };
     125             : 
     126             : /*
     127             :  * Functions and macros which are different for UP/SMP systems are kept in a
     128             :  * single place
     129             :  */
     130             : #ifdef CONFIG_SMP
     131             : 
     132             : /*
     133             :  * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
     134             :  * such that hrtimer_callback_running() can unconditionally dereference
     135             :  * timer->base->cpu_base
     136             :  */
     137             : static struct hrtimer_cpu_base migration_cpu_base = {
     138             :         .clock_base = { {
     139             :                 .cpu_base = &migration_cpu_base,
     140             :                 .seq      = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
     141             :                                                      &migration_cpu_base.lock),
     142             :         }, },
     143             : };
     144             : 
     145             : #define migration_base  migration_cpu_base.clock_base[0]
     146             : 
     147             : static inline bool is_migration_base(struct hrtimer_clock_base *base)
     148             : {
     149             :         return base == &migration_base;
     150             : }
     151             : 
     152             : /*
     153             :  * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
     154             :  * means that all timers which are tied to this base via timer->base are
     155             :  * locked, and the base itself is locked too.
     156             :  *
     157             :  * So __run_timers/migrate_timers can safely modify all timers which could
     158             :  * be found on the lists/queues.
     159             :  *
     160             :  * When the timer's base is locked, and the timer removed from list, it is
     161             :  * possible to set timer->base = &migration_base and drop the lock: the timer
     162             :  * remains locked.
     163             :  */
     164             : static
     165             : struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
     166             :                                              unsigned long *flags)
     167             :         __acquires(&timer->base->lock)
     168             : {
     169             :         struct hrtimer_clock_base *base;
     170             : 
     171             :         for (;;) {
     172             :                 base = READ_ONCE(timer->base);
     173             :                 if (likely(base != &migration_base)) {
     174             :                         raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
     175             :                         if (likely(base == timer->base))
     176             :                                 return base;
     177             :                         /* The timer has migrated to another CPU: */
     178             :                         raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
     179             :                 }
     180             :                 cpu_relax();
     181             :         }
     182             : }
     183             : 
     184             : /*
     185             :  * We do not migrate the timer when it is expiring before the next
     186             :  * event on the target cpu. When high resolution is enabled, we cannot
     187             :  * reprogram the target cpu hardware and we would cause it to fire
     188             :  * late. To keep it simple, we handle the high resolution enabled and
     189             :  * disabled case similar.
     190             :  *
     191             :  * Called with cpu_base->lock of target cpu held.
     192             :  */
     193             : static int
     194             : hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
     195             : {
     196             :         ktime_t expires;
     197             : 
     198             :         expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
     199             :         return expires < new_base->cpu_base->expires_next;
     200             : }
     201             : 
     202             : static inline
     203             : struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
     204             :                                          int pinned)
     205             : {
     206             : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
     207             :         if (static_branch_likely(&timers_migration_enabled) && !pinned)
     208             :                 return &per_cpu(hrtimer_bases, get_nohz_timer_target());
     209             : #endif
     210             :         return base;
     211             : }
     212             : 
     213             : /*
     214             :  * We switch the timer base to a power-optimized selected CPU target,
     215             :  * if:
     216             :  *      - NO_HZ_COMMON is enabled
     217             :  *      - timer migration is enabled
     218             :  *      - the timer callback is not running
     219             :  *      - the timer is not the first expiring timer on the new target
     220             :  *
     221             :  * If one of the above requirements is not fulfilled we move the timer
     222             :  * to the current CPU or leave it on the previously assigned CPU if
     223             :  * the timer callback is currently running.
     224             :  */
     225             : static inline struct hrtimer_clock_base *
     226             : switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
     227             :                     int pinned)
     228             : {
     229             :         struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base;
     230             :         struct hrtimer_clock_base *new_base;
     231             :         int basenum = base->index;
     232             : 
     233             :         this_cpu_base = this_cpu_ptr(&hrtimer_bases);
     234             :         new_cpu_base = get_target_base(this_cpu_base, pinned);
     235             : again:
     236             :         new_base = &new_cpu_base->clock_base[basenum];
     237             : 
     238             :         if (base != new_base) {
     239             :                 /*
     240             :                  * We are trying to move timer to new_base.
     241             :                  * However we can't change timer's base while it is running,
     242             :                  * so we keep it on the same CPU. No hassle vs. reprogramming
     243             :                  * the event source in the high resolution case. The softirq
     244             :                  * code will take care of this when the timer function has
     245             :                  * completed. There is no conflict as we hold the lock until
     246             :                  * the timer is enqueued.
     247             :                  */
     248             :                 if (unlikely(hrtimer_callback_running(timer)))
     249             :                         return base;
     250             : 
     251             :                 /* See the comment in lock_hrtimer_base() */
     252             :                 WRITE_ONCE(timer->base, &migration_base);
     253             :                 raw_spin_unlock(&base->cpu_base->lock);
     254             :                 raw_spin_lock(&new_base->cpu_base->lock);
     255             : 
     256             :                 if (new_cpu_base != this_cpu_base &&
     257             :                     hrtimer_check_target(timer, new_base)) {
     258             :                         raw_spin_unlock(&new_base->cpu_base->lock);
     259             :                         raw_spin_lock(&base->cpu_base->lock);
     260             :                         new_cpu_base = this_cpu_base;
     261             :                         WRITE_ONCE(timer->base, base);
     262             :                         goto again;
     263             :                 }
     264             :                 WRITE_ONCE(timer->base, new_base);
     265             :         } else {
     266             :                 if (new_cpu_base != this_cpu_base &&
     267             :                     hrtimer_check_target(timer, new_base)) {
     268             :                         new_cpu_base = this_cpu_base;
     269             :                         goto again;
     270             :                 }
     271             :         }
     272             :         return new_base;
     273             : }
     274             : 
     275             : #else /* CONFIG_SMP */
     276             : 
     277             : static inline bool is_migration_base(struct hrtimer_clock_base *base)
     278             : {
     279             :         return false;
     280             : }
     281             : 
     282             : static inline struct hrtimer_clock_base *
     283             : lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
     284             :         __acquires(&timer->base->cpu_base->lock)
     285             : {
     286           0 :         struct hrtimer_clock_base *base = timer->base;
     287             : 
     288           0 :         raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
     289             : 
     290             :         return base;
     291             : }
     292             : 
     293             : # define switch_hrtimer_base(t, b, p)   (b)
     294             : 
     295             : #endif  /* !CONFIG_SMP */
     296             : 
     297             : /*
     298             :  * Functions for the union type storage format of ktime_t which are
     299             :  * too large for inlining:
     300             :  */
     301             : #if BITS_PER_LONG < 64
     302             : /*
     303             :  * Divide a ktime value by a nanosecond value
     304             :  */
     305             : s64 __ktime_divns(const ktime_t kt, s64 div)
     306             : {
     307             :         int sft = 0;
     308             :         s64 dclc;
     309             :         u64 tmp;
     310             : 
     311             :         dclc = ktime_to_ns(kt);
     312             :         tmp = dclc < 0 ? -dclc : dclc;
     313             : 
     314             :         /* Make sure the divisor is less than 2^32: */
     315             :         while (div >> 32) {
     316             :                 sft++;
     317             :                 div >>= 1;
     318             :         }
     319             :         tmp >>= sft;
     320             :         do_div(tmp, (u32) div);
     321             :         return dclc < 0 ? -tmp : tmp;
     322             : }
     323             : EXPORT_SYMBOL_GPL(__ktime_divns);
     324             : #endif /* BITS_PER_LONG >= 64 */
     325             : 
     326             : /*
     327             :  * Add two ktime values and do a safety check for overflow:
     328             :  */
     329           0 : ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
     330             : {
     331           0 :         ktime_t res = ktime_add_unsafe(lhs, rhs);
     332             : 
     333             :         /*
     334             :          * We use KTIME_SEC_MAX here, the maximum timeout which we can
     335             :          * return to user space in a timespec:
     336             :          */
     337           0 :         if (res < 0 || res < lhs || res < rhs)
     338           0 :                 res = ktime_set(KTIME_SEC_MAX, 0);
     339             : 
     340           0 :         return res;
     341             : }
     342             : 
     343             : EXPORT_SYMBOL_GPL(ktime_add_safe);
     344             : 
     345             : #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
     346             : 
     347             : static const struct debug_obj_descr hrtimer_debug_descr;
     348             : 
     349             : static void *hrtimer_debug_hint(void *addr)
     350             : {
     351             :         return ((struct hrtimer *) addr)->function;
     352             : }
     353             : 
     354             : /*
     355             :  * fixup_init is called when:
     356             :  * - an active object is initialized
     357             :  */
     358             : static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state)
     359             : {
     360             :         struct hrtimer *timer = addr;
     361             : 
     362             :         switch (state) {
     363             :         case ODEBUG_STATE_ACTIVE:
     364             :                 hrtimer_cancel(timer);
     365             :                 debug_object_init(timer, &hrtimer_debug_descr);
     366             :                 return true;
     367             :         default:
     368             :                 return false;
     369             :         }
     370             : }
     371             : 
     372             : /*
     373             :  * fixup_activate is called when:
     374             :  * - an active object is activated
     375             :  * - an unknown non-static object is activated
     376             :  */
     377             : static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
     378             : {
     379             :         switch (state) {
     380             :         case ODEBUG_STATE_ACTIVE:
     381             :                 WARN_ON(1);
     382             :                 fallthrough;
     383             :         default:
     384             :                 return false;
     385             :         }
     386             : }
     387             : 
     388             : /*
     389             :  * fixup_free is called when:
     390             :  * - an active object is freed
     391             :  */
     392             : static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
     393             : {
     394             :         struct hrtimer *timer = addr;
     395             : 
     396             :         switch (state) {
     397             :         case ODEBUG_STATE_ACTIVE:
     398             :                 hrtimer_cancel(timer);
     399             :                 debug_object_free(timer, &hrtimer_debug_descr);
     400             :                 return true;
     401             :         default:
     402             :                 return false;
     403             :         }
     404             : }
     405             : 
     406             : static const struct debug_obj_descr hrtimer_debug_descr = {
     407             :         .name           = "hrtimer",
     408             :         .debug_hint     = hrtimer_debug_hint,
     409             :         .fixup_init     = hrtimer_fixup_init,
     410             :         .fixup_activate = hrtimer_fixup_activate,
     411             :         .fixup_free     = hrtimer_fixup_free,
     412             : };
     413             : 
     414             : static inline void debug_hrtimer_init(struct hrtimer *timer)
     415             : {
     416             :         debug_object_init(timer, &hrtimer_debug_descr);
     417             : }
     418             : 
     419             : static inline void debug_hrtimer_activate(struct hrtimer *timer,
     420             :                                           enum hrtimer_mode mode)
     421             : {
     422             :         debug_object_activate(timer, &hrtimer_debug_descr);
     423             : }
     424             : 
     425             : static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
     426             : {
     427             :         debug_object_deactivate(timer, &hrtimer_debug_descr);
     428             : }
     429             : 
     430             : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
     431             :                            enum hrtimer_mode mode);
     432             : 
     433             : void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
     434             :                            enum hrtimer_mode mode)
     435             : {
     436             :         debug_object_init_on_stack(timer, &hrtimer_debug_descr);
     437             :         __hrtimer_init(timer, clock_id, mode);
     438             : }
     439             : EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
     440             : 
     441             : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
     442             :                                    clockid_t clock_id, enum hrtimer_mode mode);
     443             : 
     444             : void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
     445             :                                    clockid_t clock_id, enum hrtimer_mode mode)
     446             : {
     447             :         debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
     448             :         __hrtimer_init_sleeper(sl, clock_id, mode);
     449             : }
     450             : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
     451             : 
     452             : void destroy_hrtimer_on_stack(struct hrtimer *timer)
     453             : {
     454             :         debug_object_free(timer, &hrtimer_debug_descr);
     455             : }
     456             : EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
     457             : 
     458             : #else
     459             : 
     460             : static inline void debug_hrtimer_init(struct hrtimer *timer) { }
     461             : static inline void debug_hrtimer_activate(struct hrtimer *timer,
     462             :                                           enum hrtimer_mode mode) { }
     463             : static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
     464             : #endif
     465             : 
     466             : static inline void
     467             : debug_init(struct hrtimer *timer, clockid_t clockid,
     468             :            enum hrtimer_mode mode)
     469             : {
     470        1073 :         debug_hrtimer_init(timer);
     471        1073 :         trace_hrtimer_init(timer, clockid, mode);
     472             : }
     473             : 
     474             : static inline void debug_activate(struct hrtimer *timer,
     475             :                                   enum hrtimer_mode mode)
     476             : {
     477           0 :         debug_hrtimer_activate(timer, mode);
     478           0 :         trace_hrtimer_start(timer, mode);
     479             : }
     480             : 
     481             : static inline void debug_deactivate(struct hrtimer *timer)
     482             : {
     483           0 :         debug_hrtimer_deactivate(timer);
     484           0 :         trace_hrtimer_cancel(timer);
     485             : }
     486             : 
     487             : static struct hrtimer_clock_base *
     488             : __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
     489             : {
     490             :         unsigned int idx;
     491             : 
     492           5 :         if (!*active)
     493             :                 return NULL;
     494             : 
     495           0 :         idx = __ffs(*active);
     496           0 :         *active &= ~(1U << idx);
     497             : 
     498           0 :         return &cpu_base->clock_base[idx];
     499             : }
     500             : 
     501             : #define for_each_active_base(base, cpu_base, active)    \
     502             :         while ((base = __next_base((cpu_base), &(active))))
     503             : 
     504           0 : static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
     505             :                                          const struct hrtimer *exclude,
     506             :                                          unsigned int active,
     507             :                                          ktime_t expires_next)
     508             : {
     509             :         struct hrtimer_clock_base *base;
     510             :         ktime_t expires;
     511             : 
     512           0 :         for_each_active_base(base, cpu_base, active) {
     513             :                 struct timerqueue_node *next;
     514             :                 struct hrtimer *timer;
     515             : 
     516           0 :                 next = timerqueue_getnext(&base->active);
     517           0 :                 timer = container_of(next, struct hrtimer, node);
     518           0 :                 if (timer == exclude) {
     519             :                         /* Get to the next timer in the queue. */
     520           0 :                         next = timerqueue_iterate_next(next);
     521           0 :                         if (!next)
     522           0 :                                 continue;
     523             : 
     524             :                         timer = container_of(next, struct hrtimer, node);
     525             :                 }
     526           0 :                 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
     527           0 :                 if (expires < expires_next) {
     528           0 :                         expires_next = expires;
     529             : 
     530             :                         /* Skip cpu_base update if a timer is being excluded. */
     531           0 :                         if (exclude)
     532           0 :                                 continue;
     533             : 
     534           0 :                         if (timer->is_soft)
     535           0 :                                 cpu_base->softirq_next_timer = timer;
     536             :                         else
     537           0 :                                 cpu_base->next_timer = timer;
     538             :                 }
     539             :         }
     540             :         /*
     541             :          * clock_was_set() might have changed base->offset of any of
     542             :          * the clock bases so the result might be negative. Fix it up
     543             :          * to prevent a false positive in clockevents_program_event().
     544             :          */
     545           0 :         if (expires_next < 0)
     546           0 :                 expires_next = 0;
     547           0 :         return expires_next;
     548             : }
     549             : 
     550             : /*
     551             :  * Recomputes cpu_base::*next_timer and returns the earliest expires_next
     552             :  * but does not set cpu_base::*expires_next, that is done by
     553             :  * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
     554             :  * cpu_base::*expires_next right away, reprogramming logic would no longer
     555             :  * work.
     556             :  *
     557             :  * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
     558             :  * those timers will get run whenever the softirq gets handled, at the end of
     559             :  * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
     560             :  *
     561             :  * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
     562             :  * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
     563             :  * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
     564             :  *
     565             :  * @active_mask must be one of:
     566             :  *  - HRTIMER_ACTIVE_ALL,
     567             :  *  - HRTIMER_ACTIVE_SOFT, or
     568             :  *  - HRTIMER_ACTIVE_HARD.
     569             :  */
     570             : static ktime_t
     571           0 : __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
     572             : {
     573             :         unsigned int active;
     574           0 :         struct hrtimer *next_timer = NULL;
     575           0 :         ktime_t expires_next = KTIME_MAX;
     576             : 
     577           0 :         if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
     578           0 :                 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
     579           0 :                 cpu_base->softirq_next_timer = NULL;
     580           0 :                 expires_next = __hrtimer_next_event_base(cpu_base, NULL,
     581             :                                                          active, KTIME_MAX);
     582             : 
     583           0 :                 next_timer = cpu_base->softirq_next_timer;
     584             :         }
     585             : 
     586           0 :         if (active_mask & HRTIMER_ACTIVE_HARD) {
     587           0 :                 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
     588           0 :                 cpu_base->next_timer = next_timer;
     589           0 :                 expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
     590             :                                                          expires_next);
     591             :         }
     592             : 
     593           0 :         return expires_next;
     594             : }
     595             : 
     596           0 : static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
     597             : {
     598           0 :         ktime_t expires_next, soft = KTIME_MAX;
     599             : 
     600             :         /*
     601             :          * If the soft interrupt has already been activated, ignore the
     602             :          * soft bases. They will be handled in the already raised soft
     603             :          * interrupt.
     604             :          */
     605           0 :         if (!cpu_base->softirq_activated) {
     606           0 :                 soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
     607             :                 /*
     608             :                  * Update the soft expiry time. clock_settime() might have
     609             :                  * affected it.
     610             :                  */
     611           0 :                 cpu_base->softirq_expires_next = soft;
     612             :         }
     613             : 
     614           0 :         expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
     615             :         /*
     616             :          * If a softirq timer is expiring first, update cpu_base->next_timer
     617             :          * and program the hardware with the soft expiry time.
     618             :          */
     619           0 :         if (expires_next > soft) {
     620           0 :                 cpu_base->next_timer = cpu_base->softirq_next_timer;
     621           0 :                 expires_next = soft;
     622             :         }
     623             : 
     624           0 :         return expires_next;
     625             : }
     626             : 
     627             : static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
     628             : {
     629           5 :         ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
     630           5 :         ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
     631           5 :         ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
     632             : 
     633           5 :         ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
     634             :                                             offs_real, offs_boot, offs_tai);
     635             : 
     636           5 :         base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
     637           5 :         base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
     638           5 :         base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
     639             : 
     640             :         return now;
     641             : }
     642             : 
     643             : /*
     644             :  * Is the high resolution mode active ?
     645             :  */
     646             : static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
     647             : {
     648             :         return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
     649             :                 cpu_base->hres_active : 0;
     650             : }
     651             : 
     652             : static inline int hrtimer_hres_active(void)
     653             : {
     654             :         return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
     655             : }
     656             : 
     657             : static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
     658             :                                 struct hrtimer *next_timer,
     659             :                                 ktime_t expires_next)
     660             : {
     661           0 :         cpu_base->expires_next = expires_next;
     662             : 
     663             :         /*
     664             :          * If hres is not active, hardware does not have to be
     665             :          * reprogrammed yet.
     666             :          *
     667             :          * If a hang was detected in the last timer interrupt then we
     668             :          * leave the hang delay active in the hardware. We want the
     669             :          * system to make progress. That also prevents the following
     670             :          * scenario:
     671             :          * T1 expires 50ms from now
     672             :          * T2 expires 5s from now
     673             :          *
     674             :          * T1 is removed, so this code is called and would reprogram
     675             :          * the hardware to 5s from now. Any hrtimer_start after that
     676             :          * will not reprogram the hardware due to hang_detected being
     677             :          * set. So we'd effectively block all timers until the T2 event
     678             :          * fires.
     679             :          */
     680           0 :         if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
     681             :                 return;
     682             : 
     683             :         tick_program_event(expires_next, 1);
     684             : }
     685             : 
     686             : /*
     687             :  * Reprogram the event source with checking both queues for the
     688             :  * next event
     689             :  * Called with interrupts disabled and base->lock held
     690             :  */
     691             : static void
     692             : hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
     693             : {
     694             :         ktime_t expires_next;
     695             : 
     696           0 :         expires_next = hrtimer_update_next_event(cpu_base);
     697             : 
     698           0 :         if (skip_equal && expires_next == cpu_base->expires_next)
     699             :                 return;
     700             : 
     701           0 :         __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
     702             : }
     703             : 
     704             : /* High resolution timer related functions */
     705             : #ifdef CONFIG_HIGH_RES_TIMERS
     706             : 
     707             : /*
     708             :  * High resolution timer enabled ?
     709             :  */
     710             : static bool hrtimer_hres_enabled __read_mostly  = true;
     711             : unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
     712             : EXPORT_SYMBOL_GPL(hrtimer_resolution);
     713             : 
     714             : /*
     715             :  * Enable / Disable high resolution mode
     716             :  */
     717             : static int __init setup_hrtimer_hres(char *str)
     718             : {
     719             :         return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
     720             : }
     721             : 
     722             : __setup("highres=", setup_hrtimer_hres);
     723             : 
     724             : /*
     725             :  * hrtimer_high_res_enabled - query, if the highres mode is enabled
     726             :  */
     727             : static inline int hrtimer_is_hres_enabled(void)
     728             : {
     729             :         return hrtimer_hres_enabled;
     730             : }
     731             : 
     732             : static void retrigger_next_event(void *arg);
     733             : 
     734             : /*
     735             :  * Switch to high resolution mode
     736             :  */
     737             : static void hrtimer_switch_to_hres(void)
     738             : {
     739             :         struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
     740             : 
     741             :         if (tick_init_highres()) {
     742             :                 pr_warn("Could not switch to high resolution mode on CPU %u\n",
     743             :                         base->cpu);
     744             :                 return;
     745             :         }
     746             :         base->hres_active = 1;
     747             :         hrtimer_resolution = HIGH_RES_NSEC;
     748             : 
     749             :         tick_setup_sched_timer();
     750             :         /* "Retrigger" the interrupt to get things going */
     751             :         retrigger_next_event(NULL);
     752             : }
     753             : 
     754             : #else
     755             : 
     756             : static inline int hrtimer_is_hres_enabled(void) { return 0; }
     757             : static inline void hrtimer_switch_to_hres(void) { }
     758             : 
     759             : #endif /* CONFIG_HIGH_RES_TIMERS */
     760             : /*
     761             :  * Retrigger next event is called after clock was set with interrupts
     762             :  * disabled through an SMP function call or directly from low level
     763             :  * resume code.
     764             :  *
     765             :  * This is only invoked when:
     766             :  *      - CONFIG_HIGH_RES_TIMERS is enabled.
     767             :  *      - CONFIG_NOHZ_COMMON is enabled
     768             :  *
     769             :  * For the other cases this function is empty and because the call sites
     770             :  * are optimized out it vanishes as well, i.e. no need for lots of
     771             :  * #ifdeffery.
     772             :  */
     773             : static void retrigger_next_event(void *arg)
     774             : {
     775           0 :         struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
     776             : 
     777             :         /*
     778             :          * When high resolution mode or nohz is active, then the offsets of
     779             :          * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
     780             :          * next tick will take care of that.
     781             :          *
     782             :          * If high resolution mode is active then the next expiring timer
     783             :          * must be reevaluated and the clock event device reprogrammed if
     784             :          * necessary.
     785             :          *
     786             :          * In the NOHZ case the update of the offset and the reevaluation
     787             :          * of the next expiring timer is enough. The return from the SMP
     788             :          * function call will take care of the reprogramming in case the
     789             :          * CPU was in a NOHZ idle sleep.
     790             :          */
     791           0 :         if (!__hrtimer_hres_active(base) && !tick_nohz_active)
     792             :                 return;
     793             : 
     794             :         raw_spin_lock(&base->lock);
     795             :         hrtimer_update_base(base);
     796             :         if (__hrtimer_hres_active(base))
     797             :                 hrtimer_force_reprogram(base, 0);
     798             :         else
     799             :                 hrtimer_update_next_event(base);
     800             :         raw_spin_unlock(&base->lock);
     801             : }
     802             : 
     803             : /*
     804             :  * When a timer is enqueued and expires earlier than the already enqueued
     805             :  * timers, we have to check, whether it expires earlier than the timer for
     806             :  * which the clock event device was armed.
     807             :  *
     808             :  * Called with interrupts disabled and base->cpu_base.lock held
     809             :  */
     810           0 : static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
     811             : {
     812           0 :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
     813           0 :         struct hrtimer_clock_base *base = timer->base;
     814           0 :         ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
     815             : 
     816           0 :         WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
     817             : 
     818             :         /*
     819             :          * CLOCK_REALTIME timer might be requested with an absolute
     820             :          * expiry time which is less than base->offset. Set it to 0.
     821             :          */
     822           0 :         if (expires < 0)
     823           0 :                 expires = 0;
     824             : 
     825           0 :         if (timer->is_soft) {
     826             :                 /*
     827             :                  * soft hrtimer could be started on a remote CPU. In this
     828             :                  * case softirq_expires_next needs to be updated on the
     829             :                  * remote CPU. The soft hrtimer will not expire before the
     830             :                  * first hard hrtimer on the remote CPU -
     831             :                  * hrtimer_check_target() prevents this case.
     832             :                  */
     833           0 :                 struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;
     834             : 
     835           0 :                 if (timer_cpu_base->softirq_activated)
     836             :                         return;
     837             : 
     838           0 :                 if (!ktime_before(expires, timer_cpu_base->softirq_expires_next))
     839             :                         return;
     840             : 
     841           0 :                 timer_cpu_base->softirq_next_timer = timer;
     842           0 :                 timer_cpu_base->softirq_expires_next = expires;
     843             : 
     844           0 :                 if (!ktime_before(expires, timer_cpu_base->expires_next) ||
     845             :                     !reprogram)
     846             :                         return;
     847             :         }
     848             : 
     849             :         /*
     850             :          * If the timer is not on the current cpu, we cannot reprogram
     851             :          * the other cpus clock event device.
     852             :          */
     853           0 :         if (base->cpu_base != cpu_base)
     854             :                 return;
     855             : 
     856           0 :         if (expires >= cpu_base->expires_next)
     857             :                 return;
     858             : 
     859             :         /*
     860             :          * If the hrtimer interrupt is running, then it will reevaluate the
     861             :          * clock bases and reprogram the clock event device.
     862             :          */
     863           0 :         if (cpu_base->in_hrtirq)
     864             :                 return;
     865             : 
     866           0 :         cpu_base->next_timer = timer;
     867             : 
     868           0 :         __hrtimer_reprogram(cpu_base, timer, expires);
     869             : }
     870             : 
     871             : static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
     872             :                              unsigned int active)
     873             : {
     874             :         struct hrtimer_clock_base *base;
     875             :         unsigned int seq;
     876             :         ktime_t expires;
     877             : 
     878             :         /*
     879             :          * Update the base offsets unconditionally so the following
     880             :          * checks whether the SMP function call is required works.
     881             :          *
     882             :          * The update is safe even when the remote CPU is in the hrtimer
     883             :          * interrupt or the hrtimer soft interrupt and expiring affected
     884             :          * bases. Either it will see the update before handling a base or
     885             :          * it will see it when it finishes the processing and reevaluates
     886             :          * the next expiring timer.
     887             :          */
     888             :         seq = cpu_base->clock_was_set_seq;
     889             :         hrtimer_update_base(cpu_base);
     890             : 
     891             :         /*
     892             :          * If the sequence did not change over the update then the
     893             :          * remote CPU already handled it.
     894             :          */
     895             :         if (seq == cpu_base->clock_was_set_seq)
     896             :                 return false;
     897             : 
     898             :         /*
     899             :          * If the remote CPU is currently handling an hrtimer interrupt, it
     900             :          * will reevaluate the first expiring timer of all clock bases
     901             :          * before reprogramming. Nothing to do here.
     902             :          */
     903             :         if (cpu_base->in_hrtirq)
     904             :                 return false;
     905             : 
     906             :         /*
     907             :          * Walk the affected clock bases and check whether the first expiring
     908             :          * timer in a clock base is moving ahead of the first expiring timer of
     909             :          * @cpu_base. If so, the IPI must be invoked because per CPU clock
     910             :          * event devices cannot be remotely reprogrammed.
     911             :          */
     912             :         active &= cpu_base->active_bases;
     913             : 
     914             :         for_each_active_base(base, cpu_base, active) {
     915             :                 struct timerqueue_node *next;
     916             : 
     917             :                 next = timerqueue_getnext(&base->active);
     918             :                 expires = ktime_sub(next->expires, base->offset);
     919             :                 if (expires < cpu_base->expires_next)
     920             :                         return true;
     921             : 
     922             :                 /* Extra check for softirq clock bases */
     923             :                 if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
     924             :                         continue;
     925             :                 if (cpu_base->softirq_activated)
     926             :                         continue;
     927             :                 if (expires < cpu_base->softirq_expires_next)
     928             :                         return true;
     929             :         }
     930             :         return false;
     931             : }
     932             : 
     933             : /*
     934             :  * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
     935             :  * CLOCK_BOOTTIME (for late sleep time injection).
     936             :  *
     937             :  * This requires to update the offsets for these clocks
     938             :  * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
     939             :  * also requires to eventually reprogram the per CPU clock event devices
     940             :  * when the change moves an affected timer ahead of the first expiring
     941             :  * timer on that CPU. Obviously remote per CPU clock event devices cannot
     942             :  * be reprogrammed. The other reason why an IPI has to be sent is when the
     943             :  * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
     944             :  * in the tick, which obviously might be stopped, so this has to bring out
     945             :  * the remote CPU which might sleep in idle to get this sorted.
     946             :  */
     947           0 : void clock_was_set(unsigned int bases)
     948             : {
     949           0 :         struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
     950             :         cpumask_var_t mask;
     951             :         int cpu;
     952             : 
     953           0 :         if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
     954             :                 goto out_timerfd;
     955             : 
     956             :         if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
     957             :                 on_each_cpu(retrigger_next_event, NULL, 1);
     958             :                 goto out_timerfd;
     959             :         }
     960             : 
     961             :         /* Avoid interrupting CPUs if possible */
     962             :         cpus_read_lock();
     963             :         for_each_online_cpu(cpu) {
     964             :                 unsigned long flags;
     965             : 
     966             :                 cpu_base = &per_cpu(hrtimer_bases, cpu);
     967             :                 raw_spin_lock_irqsave(&cpu_base->lock, flags);
     968             : 
     969             :                 if (update_needs_ipi(cpu_base, bases))
     970             :                         cpumask_set_cpu(cpu, mask);
     971             : 
     972             :                 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
     973             :         }
     974             : 
     975             :         preempt_disable();
     976             :         smp_call_function_many(mask, retrigger_next_event, NULL, 1);
     977             :         preempt_enable();
     978             :         cpus_read_unlock();
     979             :         free_cpumask_var(mask);
     980             : 
     981             : out_timerfd:
     982           0 :         timerfd_clock_was_set();
     983           0 : }
     984             : 
     985           0 : static void clock_was_set_work(struct work_struct *work)
     986             : {
     987           0 :         clock_was_set(CLOCK_SET_WALL);
     988           0 : }
     989             : 
     990             : static DECLARE_WORK(hrtimer_work, clock_was_set_work);
     991             : 
     992             : /*
     993             :  * Called from timekeeping code to reprogram the hrtimer interrupt device
     994             :  * on all cpus and to notify timerfd.
     995             :  */
     996           0 : void clock_was_set_delayed(void)
     997             : {
     998           0 :         schedule_work(&hrtimer_work);
     999           0 : }
    1000             : 
    1001             : /*
    1002             :  * Called during resume either directly from via timekeeping_resume()
    1003             :  * or in the case of s2idle from tick_unfreeze() to ensure that the
    1004             :  * hrtimers are up to date.
    1005             :  */
    1006           0 : void hrtimers_resume_local(void)
    1007             : {
    1008             :         lockdep_assert_irqs_disabled();
    1009             :         /* Retrigger on the local CPU */
    1010           0 :         retrigger_next_event(NULL);
    1011           0 : }
    1012             : 
    1013             : /*
    1014             :  * Counterpart to lock_hrtimer_base above:
    1015             :  */
    1016             : static inline
    1017             : void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
    1018             :         __releases(&timer->base->cpu_base->lock)
    1019             : {
    1020           0 :         raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
    1021             : }
    1022             : 
    1023             : /**
    1024             :  * hrtimer_forward - forward the timer expiry
    1025             :  * @timer:      hrtimer to forward
    1026             :  * @now:        forward past this time
    1027             :  * @interval:   the interval to forward
    1028             :  *
    1029             :  * Forward the timer expiry so it will expire in the future.
    1030             :  * Returns the number of overruns.
    1031             :  *
    1032             :  * Can be safely called from the callback function of @timer. If
    1033             :  * called from other contexts @timer must neither be enqueued nor
    1034             :  * running the callback and the caller needs to take care of
    1035             :  * serialization.
    1036             :  *
    1037             :  * Note: This only updates the timer expiry value and does not requeue
    1038             :  * the timer.
    1039             :  */
    1040           0 : u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
    1041             : {
    1042           0 :         u64 orun = 1;
    1043             :         ktime_t delta;
    1044             : 
    1045           0 :         delta = ktime_sub(now, hrtimer_get_expires(timer));
    1046             : 
    1047           0 :         if (delta < 0)
    1048             :                 return 0;
    1049             : 
    1050           0 :         if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
    1051             :                 return 0;
    1052             : 
    1053           0 :         if (interval < hrtimer_resolution)
    1054           0 :                 interval = hrtimer_resolution;
    1055             : 
    1056           0 :         if (unlikely(delta >= interval)) {
    1057           0 :                 s64 incr = ktime_to_ns(interval);
    1058             : 
    1059           0 :                 orun = ktime_divns(delta, incr);
    1060           0 :                 hrtimer_add_expires_ns(timer, incr * orun);
    1061           0 :                 if (hrtimer_get_expires_tv64(timer) > now)
    1062             :                         return orun;
    1063             :                 /*
    1064             :                  * This (and the ktime_add() below) is the
    1065             :                  * correction for exact:
    1066             :                  */
    1067           0 :                 orun++;
    1068             :         }
    1069           0 :         hrtimer_add_expires(timer, interval);
    1070             : 
    1071           0 :         return orun;
    1072             : }
    1073             : EXPORT_SYMBOL_GPL(hrtimer_forward);
    1074             : 
    1075             : /*
    1076             :  * enqueue_hrtimer - internal function to (re)start a timer
    1077             :  *
    1078             :  * The timer is inserted in expiry order. Insertion into the
    1079             :  * red black tree is O(log(n)). Must hold the base lock.
    1080             :  *
    1081             :  * Returns 1 when the new timer is the leftmost timer in the tree.
    1082             :  */
    1083             : static int enqueue_hrtimer(struct hrtimer *timer,
    1084             :                            struct hrtimer_clock_base *base,
    1085             :                            enum hrtimer_mode mode)
    1086             : {
    1087           0 :         debug_activate(timer, mode);
    1088             : 
    1089           0 :         base->cpu_base->active_bases |= 1 << base->index;
    1090             : 
    1091             :         /* Pairs with the lockless read in hrtimer_is_queued() */
    1092           0 :         WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
    1093             : 
    1094           0 :         return timerqueue_add(&base->active, &timer->node);
    1095             : }
    1096             : 
    1097             : /*
    1098             :  * __remove_hrtimer - internal function to remove a timer
    1099             :  *
    1100             :  * Caller must hold the base lock.
    1101             :  *
    1102             :  * High resolution timer mode reprograms the clock event device when the
    1103             :  * timer is the one which expires next. The caller can disable this by setting
    1104             :  * reprogram to zero. This is useful, when the context does a reprogramming
    1105             :  * anyway (e.g. timer interrupt)
    1106             :  */
    1107           0 : static void __remove_hrtimer(struct hrtimer *timer,
    1108             :                              struct hrtimer_clock_base *base,
    1109             :                              u8 newstate, int reprogram)
    1110             : {
    1111           0 :         struct hrtimer_cpu_base *cpu_base = base->cpu_base;
    1112           0 :         u8 state = timer->state;
    1113             : 
    1114             :         /* Pairs with the lockless read in hrtimer_is_queued() */
    1115           0 :         WRITE_ONCE(timer->state, newstate);
    1116           0 :         if (!(state & HRTIMER_STATE_ENQUEUED))
    1117             :                 return;
    1118             : 
    1119           0 :         if (!timerqueue_del(&base->active, &timer->node))
    1120           0 :                 cpu_base->active_bases &= ~(1 << base->index);
    1121             : 
    1122             :         /*
    1123             :          * Note: If reprogram is false we do not update
    1124             :          * cpu_base->next_timer. This happens when we remove the first
    1125             :          * timer on a remote cpu. No harm as we never dereference
    1126             :          * cpu_base->next_timer. So the worst thing what can happen is
    1127             :          * an superfluous call to hrtimer_force_reprogram() on the
    1128             :          * remote cpu later on if the same timer gets enqueued again.
    1129             :          */
    1130           0 :         if (reprogram && timer == cpu_base->next_timer)
    1131             :                 hrtimer_force_reprogram(cpu_base, 1);
    1132             : }
    1133             : 
    1134             : /*
    1135             :  * remove hrtimer, called with base lock held
    1136             :  */
    1137             : static inline int
    1138             : remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
    1139             :                bool restart, bool keep_local)
    1140             : {
    1141           0 :         u8 state = timer->state;
    1142             : 
    1143           0 :         if (state & HRTIMER_STATE_ENQUEUED) {
    1144             :                 bool reprogram;
    1145             : 
    1146             :                 /*
    1147             :                  * Remove the timer and force reprogramming when high
    1148             :                  * resolution mode is active and the timer is on the current
    1149             :                  * CPU. If we remove a timer on another CPU, reprogramming is
    1150             :                  * skipped. The interrupt event on this CPU is fired and
    1151             :                  * reprogramming happens in the interrupt handler. This is a
    1152             :                  * rare case and less expensive than a smp call.
    1153             :                  */
    1154           0 :                 debug_deactivate(timer);
    1155           0 :                 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
    1156             : 
    1157             :                 /*
    1158             :                  * If the timer is not restarted then reprogramming is
    1159             :                  * required if the timer is local. If it is local and about
    1160             :                  * to be restarted, avoid programming it twice (on removal
    1161             :                  * and a moment later when it's requeued).
    1162             :                  */
    1163             :                 if (!restart)
    1164             :                         state = HRTIMER_STATE_INACTIVE;
    1165             :                 else
    1166           0 :                         reprogram &= !keep_local;
    1167             : 
    1168           0 :                 __remove_hrtimer(timer, base, state, reprogram);
    1169             :                 return 1;
    1170             :         }
    1171             :         return 0;
    1172             : }
    1173             : 
    1174             : static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
    1175             :                                             const enum hrtimer_mode mode)
    1176             : {
    1177             : #ifdef CONFIG_TIME_LOW_RES
    1178             :         /*
    1179             :          * CONFIG_TIME_LOW_RES indicates that the system has no way to return
    1180             :          * granular time values. For relative timers we add hrtimer_resolution
    1181             :          * (i.e. one jiffie) to prevent short timeouts.
    1182             :          */
    1183             :         timer->is_rel = mode & HRTIMER_MODE_REL;
    1184             :         if (timer->is_rel)
    1185             :                 tim = ktime_add_safe(tim, hrtimer_resolution);
    1186             : #endif
    1187             :         return tim;
    1188             : }
    1189             : 
    1190             : static void
    1191           0 : hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
    1192             : {
    1193             :         ktime_t expires;
    1194             : 
    1195             :         /*
    1196             :          * Find the next SOFT expiration.
    1197             :          */
    1198           0 :         expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
    1199             : 
    1200             :         /*
    1201             :          * reprogramming needs to be triggered, even if the next soft
    1202             :          * hrtimer expires at the same time than the next hard
    1203             :          * hrtimer. cpu_base->softirq_expires_next needs to be updated!
    1204             :          */
    1205           0 :         if (expires == KTIME_MAX)
    1206             :                 return;
    1207             : 
    1208             :         /*
    1209             :          * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
    1210             :          * cpu_base->*expires_next is only set by hrtimer_reprogram()
    1211             :          */
    1212           0 :         hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
    1213             : }
    1214             : 
    1215           0 : static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
    1216             :                                     u64 delta_ns, const enum hrtimer_mode mode,
    1217             :                                     struct hrtimer_clock_base *base)
    1218             : {
    1219             :         struct hrtimer_clock_base *new_base;
    1220             :         bool force_local, first;
    1221             : 
    1222             :         /*
    1223             :          * If the timer is on the local cpu base and is the first expiring
    1224             :          * timer then this might end up reprogramming the hardware twice
    1225             :          * (on removal and on enqueue). To avoid that by prevent the
    1226             :          * reprogram on removal, keep the timer local to the current CPU
    1227             :          * and enforce reprogramming after it is queued no matter whether
    1228             :          * it is the new first expiring timer again or not.
    1229             :          */
    1230           0 :         force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
    1231           0 :         force_local &= base->cpu_base->next_timer == timer;
    1232             : 
    1233             :         /*
    1234             :          * Remove an active timer from the queue. In case it is not queued
    1235             :          * on the current CPU, make sure that remove_hrtimer() updates the
    1236             :          * remote data correctly.
    1237             :          *
    1238             :          * If it's on the current CPU and the first expiring timer, then
    1239             :          * skip reprogramming, keep the timer local and enforce
    1240             :          * reprogramming later if it was the first expiring timer.  This
    1241             :          * avoids programming the underlying clock event twice (once at
    1242             :          * removal and once after enqueue).
    1243             :          */
    1244           0 :         remove_hrtimer(timer, base, true, force_local);
    1245             : 
    1246           0 :         if (mode & HRTIMER_MODE_REL)
    1247           0 :                 tim = ktime_add_safe(tim, base->get_time());
    1248             : 
    1249           0 :         tim = hrtimer_update_lowres(timer, tim, mode);
    1250             : 
    1251           0 :         hrtimer_set_expires_range_ns(timer, tim, delta_ns);
    1252             : 
    1253             :         /* Switch the timer base, if necessary: */
    1254             :         if (!force_local) {
    1255             :                 new_base = switch_hrtimer_base(timer, base,
    1256             :                                                mode & HRTIMER_MODE_PINNED);
    1257             :         } else {
    1258             :                 new_base = base;
    1259             :         }
    1260             : 
    1261           0 :         first = enqueue_hrtimer(timer, new_base, mode);
    1262           0 :         if (!force_local)
    1263             :                 return first;
    1264             : 
    1265             :         /*
    1266             :          * Timer was forced to stay on the current CPU to avoid
    1267             :          * reprogramming on removal and enqueue. Force reprogram the
    1268             :          * hardware by evaluating the new first expiring timer.
    1269             :          */
    1270           0 :         hrtimer_force_reprogram(new_base->cpu_base, 1);
    1271             :         return 0;
    1272             : }
    1273             : 
    1274             : /**
    1275             :  * hrtimer_start_range_ns - (re)start an hrtimer
    1276             :  * @timer:      the timer to be added
    1277             :  * @tim:        expiry time
    1278             :  * @delta_ns:   "slack" range for the timer
    1279             :  * @mode:       timer mode: absolute (HRTIMER_MODE_ABS) or
    1280             :  *              relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
    1281             :  *              softirq based mode is considered for debug purpose only!
    1282             :  */
    1283           0 : void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
    1284             :                             u64 delta_ns, const enum hrtimer_mode mode)
    1285             : {
    1286             :         struct hrtimer_clock_base *base;
    1287             :         unsigned long flags;
    1288             : 
    1289             :         /*
    1290             :          * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
    1291             :          * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
    1292             :          * expiry mode because unmarked timers are moved to softirq expiry.
    1293             :          */
    1294             :         if (!IS_ENABLED(CONFIG_PREEMPT_RT))
    1295           0 :                 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
    1296             :         else
    1297             :                 WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
    1298             : 
    1299           0 :         base = lock_hrtimer_base(timer, &flags);
    1300             : 
    1301           0 :         if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
    1302           0 :                 hrtimer_reprogram(timer, true);
    1303             : 
    1304           0 :         unlock_hrtimer_base(timer, &flags);
    1305           0 : }
    1306             : EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
    1307             : 
    1308             : /**
    1309             :  * hrtimer_try_to_cancel - try to deactivate a timer
    1310             :  * @timer:      hrtimer to stop
    1311             :  *
    1312             :  * Returns:
    1313             :  *
    1314             :  *  *  0 when the timer was not active
    1315             :  *  *  1 when the timer was active
    1316             :  *  * -1 when the timer is currently executing the callback function and
    1317             :  *    cannot be stopped
    1318             :  */
    1319         160 : int hrtimer_try_to_cancel(struct hrtimer *timer)
    1320             : {
    1321             :         struct hrtimer_clock_base *base;
    1322             :         unsigned long flags;
    1323         160 :         int ret = -1;
    1324             : 
    1325             :         /*
    1326             :          * Check lockless first. If the timer is not active (neither
    1327             :          * enqueued nor running the callback, nothing to do here.  The
    1328             :          * base lock does not serialize against a concurrent enqueue,
    1329             :          * so we can avoid taking it.
    1330             :          */
    1331         160 :         if (!hrtimer_active(timer))
    1332             :                 return 0;
    1333             : 
    1334           0 :         base = lock_hrtimer_base(timer, &flags);
    1335             : 
    1336           0 :         if (!hrtimer_callback_running(timer))
    1337             :                 ret = remove_hrtimer(timer, base, false, false);
    1338             : 
    1339           0 :         unlock_hrtimer_base(timer, &flags);
    1340             : 
    1341           0 :         return ret;
    1342             : 
    1343             : }
    1344             : EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
    1345             : 
    1346             : #ifdef CONFIG_PREEMPT_RT
    1347             : static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
    1348             : {
    1349             :         spin_lock_init(&base->softirq_expiry_lock);
    1350             : }
    1351             : 
    1352             : static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
    1353             : {
    1354             :         spin_lock(&base->softirq_expiry_lock);
    1355             : }
    1356             : 
    1357             : static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
    1358             : {
    1359             :         spin_unlock(&base->softirq_expiry_lock);
    1360             : }
    1361             : 
    1362             : /*
    1363             :  * The counterpart to hrtimer_cancel_wait_running().
    1364             :  *
    1365             :  * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
    1366             :  * the timer callback to finish. Drop expiry_lock and reacquire it. That
    1367             :  * allows the waiter to acquire the lock and make progress.
    1368             :  */
    1369             : static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
    1370             :                                       unsigned long flags)
    1371             : {
    1372             :         if (atomic_read(&cpu_base->timer_waiters)) {
    1373             :                 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1374             :                 spin_unlock(&cpu_base->softirq_expiry_lock);
    1375             :                 spin_lock(&cpu_base->softirq_expiry_lock);
    1376             :                 raw_spin_lock_irq(&cpu_base->lock);
    1377             :         }
    1378             : }
    1379             : 
    1380             : /*
    1381             :  * This function is called on PREEMPT_RT kernels when the fast path
    1382             :  * deletion of a timer failed because the timer callback function was
    1383             :  * running.
    1384             :  *
    1385             :  * This prevents priority inversion: if the soft irq thread is preempted
    1386             :  * in the middle of a timer callback, then calling del_timer_sync() can
    1387             :  * lead to two issues:
    1388             :  *
    1389             :  *  - If the caller is on a remote CPU then it has to spin wait for the timer
    1390             :  *    handler to complete. This can result in unbound priority inversion.
    1391             :  *
    1392             :  *  - If the caller originates from the task which preempted the timer
    1393             :  *    handler on the same CPU, then spin waiting for the timer handler to
    1394             :  *    complete is never going to end.
    1395             :  */
    1396             : void hrtimer_cancel_wait_running(const struct hrtimer *timer)
    1397             : {
    1398             :         /* Lockless read. Prevent the compiler from reloading it below */
    1399             :         struct hrtimer_clock_base *base = READ_ONCE(timer->base);
    1400             : 
    1401             :         /*
    1402             :          * Just relax if the timer expires in hard interrupt context or if
    1403             :          * it is currently on the migration base.
    1404             :          */
    1405             :         if (!timer->is_soft || is_migration_base(base)) {
    1406             :                 cpu_relax();
    1407             :                 return;
    1408             :         }
    1409             : 
    1410             :         /*
    1411             :          * Mark the base as contended and grab the expiry lock, which is
    1412             :          * held by the softirq across the timer callback. Drop the lock
    1413             :          * immediately so the softirq can expire the next timer. In theory
    1414             :          * the timer could already be running again, but that's more than
    1415             :          * unlikely and just causes another wait loop.
    1416             :          */
    1417             :         atomic_inc(&base->cpu_base->timer_waiters);
    1418             :         spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
    1419             :         atomic_dec(&base->cpu_base->timer_waiters);
    1420             :         spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
    1421             : }
    1422             : #else
    1423             : static inline void
    1424             : hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
    1425             : static inline void
    1426             : hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
    1427             : static inline void
    1428             : hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
    1429             : static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
    1430             :                                              unsigned long flags) { }
    1431             : #endif
    1432             : 
    1433             : /**
    1434             :  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
    1435             :  * @timer:      the timer to be cancelled
    1436             :  *
    1437             :  * Returns:
    1438             :  *  0 when the timer was not active
    1439             :  *  1 when the timer was active
    1440             :  */
    1441         160 : int hrtimer_cancel(struct hrtimer *timer)
    1442             : {
    1443             :         int ret;
    1444             : 
    1445             :         do {
    1446         160 :                 ret = hrtimer_try_to_cancel(timer);
    1447             : 
    1448         160 :                 if (ret < 0)
    1449           0 :                         hrtimer_cancel_wait_running(timer);
    1450         160 :         } while (ret < 0);
    1451         160 :         return ret;
    1452             : }
    1453             : EXPORT_SYMBOL_GPL(hrtimer_cancel);
    1454             : 
    1455             : /**
    1456             :  * __hrtimer_get_remaining - get remaining time for the timer
    1457             :  * @timer:      the timer to read
    1458             :  * @adjust:     adjust relative timers when CONFIG_TIME_LOW_RES=y
    1459             :  */
    1460           0 : ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
    1461             : {
    1462             :         unsigned long flags;
    1463             :         ktime_t rem;
    1464             : 
    1465           0 :         lock_hrtimer_base(timer, &flags);
    1466             :         if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
    1467             :                 rem = hrtimer_expires_remaining_adjusted(timer);
    1468             :         else
    1469           0 :                 rem = hrtimer_expires_remaining(timer);
    1470           0 :         unlock_hrtimer_base(timer, &flags);
    1471             : 
    1472           0 :         return rem;
    1473             : }
    1474             : EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
    1475             : 
    1476             : #ifdef CONFIG_NO_HZ_COMMON
    1477             : /**
    1478             :  * hrtimer_get_next_event - get the time until next expiry event
    1479             :  *
    1480             :  * Returns the next expiry time or KTIME_MAX if no timer is pending.
    1481             :  */
    1482             : u64 hrtimer_get_next_event(void)
    1483             : {
    1484             :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1485             :         u64 expires = KTIME_MAX;
    1486             :         unsigned long flags;
    1487             : 
    1488             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1489             : 
    1490             :         if (!__hrtimer_hres_active(cpu_base))
    1491             :                 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
    1492             : 
    1493             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1494             : 
    1495             :         return expires;
    1496             : }
    1497             : 
    1498             : /**
    1499             :  * hrtimer_next_event_without - time until next expiry event w/o one timer
    1500             :  * @exclude:    timer to exclude
    1501             :  *
    1502             :  * Returns the next expiry time over all timers except for the @exclude one or
    1503             :  * KTIME_MAX if none of them is pending.
    1504             :  */
    1505             : u64 hrtimer_next_event_without(const struct hrtimer *exclude)
    1506             : {
    1507             :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1508             :         u64 expires = KTIME_MAX;
    1509             :         unsigned long flags;
    1510             : 
    1511             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1512             : 
    1513             :         if (__hrtimer_hres_active(cpu_base)) {
    1514             :                 unsigned int active;
    1515             : 
    1516             :                 if (!cpu_base->softirq_activated) {
    1517             :                         active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
    1518             :                         expires = __hrtimer_next_event_base(cpu_base, exclude,
    1519             :                                                             active, KTIME_MAX);
    1520             :                 }
    1521             :                 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
    1522             :                 expires = __hrtimer_next_event_base(cpu_base, exclude, active,
    1523             :                                                     expires);
    1524             :         }
    1525             : 
    1526             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1527             : 
    1528             :         return expires;
    1529             : }
    1530             : #endif
    1531             : 
    1532        1073 : static inline int hrtimer_clockid_to_base(clockid_t clock_id)
    1533             : {
    1534        1073 :         if (likely(clock_id < MAX_CLOCKS)) {
    1535        1073 :                 int base = hrtimer_clock_to_base_table[clock_id];
    1536             : 
    1537        1073 :                 if (likely(base != HRTIMER_MAX_CLOCK_BASES))
    1538             :                         return base;
    1539             :         }
    1540           0 :         WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
    1541           0 :         return HRTIMER_BASE_MONOTONIC;
    1542             : }
    1543             : 
    1544        1073 : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
    1545             :                            enum hrtimer_mode mode)
    1546             : {
    1547        1073 :         bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
    1548             :         struct hrtimer_cpu_base *cpu_base;
    1549             :         int base;
    1550             : 
    1551             :         /*
    1552             :          * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
    1553             :          * marked for hard interrupt expiry mode are moved into soft
    1554             :          * interrupt context for latency reasons and because the callbacks
    1555             :          * can invoke functions which might sleep on RT, e.g. spin_lock().
    1556             :          */
    1557             :         if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
    1558             :                 softtimer = true;
    1559             : 
    1560        2146 :         memset(timer, 0, sizeof(struct hrtimer));
    1561             : 
    1562        1073 :         cpu_base = raw_cpu_ptr(&hrtimer_bases);
    1563             : 
    1564             :         /*
    1565             :          * POSIX magic: Relative CLOCK_REALTIME timers are not affected by
    1566             :          * clock modifications, so they needs to become CLOCK_MONOTONIC to
    1567             :          * ensure POSIX compliance.
    1568             :          */
    1569        1073 :         if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
    1570           0 :                 clock_id = CLOCK_MONOTONIC;
    1571             : 
    1572        1073 :         base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
    1573        1073 :         base += hrtimer_clockid_to_base(clock_id);
    1574        1073 :         timer->is_soft = softtimer;
    1575        1073 :         timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
    1576        1073 :         timer->base = &cpu_base->clock_base[base];
    1577        2146 :         timerqueue_init(&timer->node);
    1578        1073 : }
    1579             : 
    1580             : /**
    1581             :  * hrtimer_init - initialize a timer to the given clock
    1582             :  * @timer:      the timer to be initialized
    1583             :  * @clock_id:   the clock to be used
    1584             :  * @mode:       The modes which are relevant for initialization:
    1585             :  *              HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
    1586             :  *              HRTIMER_MODE_REL_SOFT
    1587             :  *
    1588             :  *              The PINNED variants of the above can be handed in,
    1589             :  *              but the PINNED bit is ignored as pinning happens
    1590             :  *              when the hrtimer is started
    1591             :  */
    1592        1073 : void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
    1593             :                   enum hrtimer_mode mode)
    1594             : {
    1595        1073 :         debug_init(timer, clock_id, mode);
    1596        1073 :         __hrtimer_init(timer, clock_id, mode);
    1597        1073 : }
    1598             : EXPORT_SYMBOL_GPL(hrtimer_init);
    1599             : 
    1600             : /*
    1601             :  * A timer is active, when it is enqueued into the rbtree or the
    1602             :  * callback function is running or it's in the state of being migrated
    1603             :  * to another cpu.
    1604             :  *
    1605             :  * It is important for this function to not return a false negative.
    1606             :  */
    1607         160 : bool hrtimer_active(const struct hrtimer *timer)
    1608             : {
    1609             :         struct hrtimer_clock_base *base;
    1610             :         unsigned int seq;
    1611             : 
    1612             :         do {
    1613         160 :                 base = READ_ONCE(timer->base);
    1614         480 :                 seq = raw_read_seqcount_begin(&base->seq);
    1615             : 
    1616         320 :                 if (timer->state != HRTIMER_STATE_INACTIVE ||
    1617         160 :                     base->running == timer)
    1618             :                         return true;
    1619             : 
    1620         640 :         } while (read_seqcount_retry(&base->seq, seq) ||
    1621         160 :                  base != READ_ONCE(timer->base));
    1622             : 
    1623             :         return false;
    1624             : }
    1625             : EXPORT_SYMBOL_GPL(hrtimer_active);
    1626             : 
    1627             : /*
    1628             :  * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
    1629             :  * distinct sections:
    1630             :  *
    1631             :  *  - queued:   the timer is queued
    1632             :  *  - callback: the timer is being ran
    1633             :  *  - post:     the timer is inactive or (re)queued
    1634             :  *
    1635             :  * On the read side we ensure we observe timer->state and cpu_base->running
    1636             :  * from the same section, if anything changed while we looked at it, we retry.
    1637             :  * This includes timer->base changing because sequence numbers alone are
    1638             :  * insufficient for that.
    1639             :  *
    1640             :  * The sequence numbers are required because otherwise we could still observe
    1641             :  * a false negative if the read side got smeared over multiple consecutive
    1642             :  * __run_hrtimer() invocations.
    1643             :  */
    1644             : 
    1645           0 : static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
    1646             :                           struct hrtimer_clock_base *base,
    1647             :                           struct hrtimer *timer, ktime_t *now,
    1648             :                           unsigned long flags) __must_hold(&cpu_base->lock)
    1649             : {
    1650             :         enum hrtimer_restart (*fn)(struct hrtimer *);
    1651             :         bool expires_in_hardirq;
    1652             :         int restart;
    1653             : 
    1654             :         lockdep_assert_held(&cpu_base->lock);
    1655             : 
    1656           0 :         debug_deactivate(timer);
    1657           0 :         base->running = timer;
    1658             : 
    1659             :         /*
    1660             :          * Separate the ->running assignment from the ->state assignment.
    1661             :          *
    1662             :          * As with a regular write barrier, this ensures the read side in
    1663             :          * hrtimer_active() cannot observe base->running == NULL &&
    1664             :          * timer->state == INACTIVE.
    1665             :          */
    1666           0 :         raw_write_seqcount_barrier(&base->seq);
    1667             : 
    1668           0 :         __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
    1669           0 :         fn = timer->function;
    1670             : 
    1671             :         /*
    1672             :          * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
    1673             :          * timer is restarted with a period then it becomes an absolute
    1674             :          * timer. If its not restarted it does not matter.
    1675             :          */
    1676             :         if (IS_ENABLED(CONFIG_TIME_LOW_RES))
    1677             :                 timer->is_rel = false;
    1678             : 
    1679             :         /*
    1680             :          * The timer is marked as running in the CPU base, so it is
    1681             :          * protected against migration to a different CPU even if the lock
    1682             :          * is dropped.
    1683             :          */
    1684           0 :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1685           0 :         trace_hrtimer_expire_entry(timer, now);
    1686           0 :         expires_in_hardirq = lockdep_hrtimer_enter(timer);
    1687             : 
    1688           0 :         restart = fn(timer);
    1689             : 
    1690             :         lockdep_hrtimer_exit(expires_in_hardirq);
    1691           0 :         trace_hrtimer_expire_exit(timer);
    1692           0 :         raw_spin_lock_irq(&cpu_base->lock);
    1693             : 
    1694             :         /*
    1695             :          * Note: We clear the running state after enqueue_hrtimer and
    1696             :          * we do not reprogram the event hardware. Happens either in
    1697             :          * hrtimer_start_range_ns() or in hrtimer_interrupt()
    1698             :          *
    1699             :          * Note: Because we dropped the cpu_base->lock above,
    1700             :          * hrtimer_start_range_ns() can have popped in and enqueued the timer
    1701             :          * for us already.
    1702             :          */
    1703           0 :         if (restart != HRTIMER_NORESTART &&
    1704           0 :             !(timer->state & HRTIMER_STATE_ENQUEUED))
    1705           0 :                 enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);
    1706             : 
    1707             :         /*
    1708             :          * Separate the ->running assignment from the ->state assignment.
    1709             :          *
    1710             :          * As with a regular write barrier, this ensures the read side in
    1711             :          * hrtimer_active() cannot observe base->running.timer == NULL &&
    1712             :          * timer->state == INACTIVE.
    1713             :          */
    1714           0 :         raw_write_seqcount_barrier(&base->seq);
    1715             : 
    1716           0 :         WARN_ON_ONCE(base->running != timer);
    1717           0 :         base->running = NULL;
    1718           0 : }
    1719             : 
    1720           5 : static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
    1721             :                                  unsigned long flags, unsigned int active_mask)
    1722             : {
    1723             :         struct hrtimer_clock_base *base;
    1724           5 :         unsigned int active = cpu_base->active_bases & active_mask;
    1725             : 
    1726          10 :         for_each_active_base(base, cpu_base, active) {
    1727             :                 struct timerqueue_node *node;
    1728             :                 ktime_t basenow;
    1729             : 
    1730           0 :                 basenow = ktime_add(now, base->offset);
    1731             : 
    1732           0 :                 while ((node = timerqueue_getnext(&base->active))) {
    1733             :                         struct hrtimer *timer;
    1734             : 
    1735           0 :                         timer = container_of(node, struct hrtimer, node);
    1736             : 
    1737             :                         /*
    1738             :                          * The immediate goal for using the softexpires is
    1739             :                          * minimizing wakeups, not running timers at the
    1740             :                          * earliest interrupt after their soft expiration.
    1741             :                          * This allows us to avoid using a Priority Search
    1742             :                          * Tree, which can answer a stabbing query for
    1743             :                          * overlapping intervals and instead use the simple
    1744             :                          * BST we already have.
    1745             :                          * We don't add extra wakeups by delaying timers that
    1746             :                          * are right-of a not yet expired timer, because that
    1747             :                          * timer will have to trigger a wakeup anyway.
    1748             :                          */
    1749           0 :                         if (basenow < hrtimer_get_softexpires_tv64(timer))
    1750             :                                 break;
    1751             : 
    1752           0 :                         __run_hrtimer(cpu_base, base, timer, &basenow, flags);
    1753             :                         if (active_mask == HRTIMER_ACTIVE_SOFT)
    1754             :                                 hrtimer_sync_wait_running(cpu_base, flags);
    1755             :                 }
    1756             :         }
    1757           5 : }
    1758             : 
    1759           0 : static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
    1760             : {
    1761           0 :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1762             :         unsigned long flags;
    1763             :         ktime_t now;
    1764             : 
    1765           0 :         hrtimer_cpu_base_lock_expiry(cpu_base);
    1766           0 :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1767             : 
    1768           0 :         now = hrtimer_update_base(cpu_base);
    1769           0 :         __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
    1770             : 
    1771           0 :         cpu_base->softirq_activated = 0;
    1772           0 :         hrtimer_update_softirq_timer(cpu_base, true);
    1773             : 
    1774           0 :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1775           0 :         hrtimer_cpu_base_unlock_expiry(cpu_base);
    1776           0 : }
    1777             : 
    1778             : #ifdef CONFIG_HIGH_RES_TIMERS
    1779             : 
    1780             : /*
    1781             :  * High resolution timer interrupt
    1782             :  * Called with interrupts disabled
    1783             :  */
    1784             : void hrtimer_interrupt(struct clock_event_device *dev)
    1785             : {
    1786             :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1787             :         ktime_t expires_next, now, entry_time, delta;
    1788             :         unsigned long flags;
    1789             :         int retries = 0;
    1790             : 
    1791             :         BUG_ON(!cpu_base->hres_active);
    1792             :         cpu_base->nr_events++;
    1793             :         dev->next_event = KTIME_MAX;
    1794             : 
    1795             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1796             :         entry_time = now = hrtimer_update_base(cpu_base);
    1797             : retry:
    1798             :         cpu_base->in_hrtirq = 1;
    1799             :         /*
    1800             :          * We set expires_next to KTIME_MAX here with cpu_base->lock
    1801             :          * held to prevent that a timer is enqueued in our queue via
    1802             :          * the migration code. This does not affect enqueueing of
    1803             :          * timers which run their callback and need to be requeued on
    1804             :          * this CPU.
    1805             :          */
    1806             :         cpu_base->expires_next = KTIME_MAX;
    1807             : 
    1808             :         if (!ktime_before(now, cpu_base->softirq_expires_next)) {
    1809             :                 cpu_base->softirq_expires_next = KTIME_MAX;
    1810             :                 cpu_base->softirq_activated = 1;
    1811             :                 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
    1812             :         }
    1813             : 
    1814             :         __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
    1815             : 
    1816             :         /* Reevaluate the clock bases for the [soft] next expiry */
    1817             :         expires_next = hrtimer_update_next_event(cpu_base);
    1818             :         /*
    1819             :          * Store the new expiry value so the migration code can verify
    1820             :          * against it.
    1821             :          */
    1822             :         cpu_base->expires_next = expires_next;
    1823             :         cpu_base->in_hrtirq = 0;
    1824             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1825             : 
    1826             :         /* Reprogramming necessary ? */
    1827             :         if (!tick_program_event(expires_next, 0)) {
    1828             :                 cpu_base->hang_detected = 0;
    1829             :                 return;
    1830             :         }
    1831             : 
    1832             :         /*
    1833             :          * The next timer was already expired due to:
    1834             :          * - tracing
    1835             :          * - long lasting callbacks
    1836             :          * - being scheduled away when running in a VM
    1837             :          *
    1838             :          * We need to prevent that we loop forever in the hrtimer
    1839             :          * interrupt routine. We give it 3 attempts to avoid
    1840             :          * overreacting on some spurious event.
    1841             :          *
    1842             :          * Acquire base lock for updating the offsets and retrieving
    1843             :          * the current time.
    1844             :          */
    1845             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1846             :         now = hrtimer_update_base(cpu_base);
    1847             :         cpu_base->nr_retries++;
    1848             :         if (++retries < 3)
    1849             :                 goto retry;
    1850             :         /*
    1851             :          * Give the system a chance to do something else than looping
    1852             :          * here. We stored the entry time, so we know exactly how long
    1853             :          * we spent here. We schedule the next event this amount of
    1854             :          * time away.
    1855             :          */
    1856             :         cpu_base->nr_hangs++;
    1857             :         cpu_base->hang_detected = 1;
    1858             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1859             : 
    1860             :         delta = ktime_sub(now, entry_time);
    1861             :         if ((unsigned int)delta > cpu_base->max_hang_time)
    1862             :                 cpu_base->max_hang_time = (unsigned int) delta;
    1863             :         /*
    1864             :          * Limit it to a sensible value as we enforce a longer
    1865             :          * delay. Give the CPU at least 100ms to catch up.
    1866             :          */
    1867             :         if (delta > 100 * NSEC_PER_MSEC)
    1868             :                 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
    1869             :         else
    1870             :                 expires_next = ktime_add(now, delta);
    1871             :         tick_program_event(expires_next, 1);
    1872             :         pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
    1873             : }
    1874             : 
    1875             : /* called with interrupts disabled */
    1876             : static inline void __hrtimer_peek_ahead_timers(void)
    1877             : {
    1878             :         struct tick_device *td;
    1879             : 
    1880             :         if (!hrtimer_hres_active())
    1881             :                 return;
    1882             : 
    1883             :         td = this_cpu_ptr(&tick_cpu_device);
    1884             :         if (td && td->evtdev)
    1885             :                 hrtimer_interrupt(td->evtdev);
    1886             : }
    1887             : 
    1888             : #else /* CONFIG_HIGH_RES_TIMERS */
    1889             : 
    1890             : static inline void __hrtimer_peek_ahead_timers(void) { }
    1891             : 
    1892             : #endif  /* !CONFIG_HIGH_RES_TIMERS */
    1893             : 
    1894             : /*
    1895             :  * Called from run_local_timers in hardirq context every jiffy
    1896             :  */
    1897           5 : void hrtimer_run_queues(void)
    1898             : {
    1899           5 :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1900             :         unsigned long flags;
    1901             :         ktime_t now;
    1902             : 
    1903           5 :         if (__hrtimer_hres_active(cpu_base))
    1904             :                 return;
    1905             : 
    1906             :         /*
    1907             :          * This _is_ ugly: We have to check periodically, whether we
    1908             :          * can switch to highres and / or nohz mode. The clocksource
    1909             :          * switch happens with xtime_lock held. Notification from
    1910             :          * there only sets the check bit in the tick_oneshot code,
    1911             :          * otherwise we might deadlock vs. xtime_lock.
    1912             :          */
    1913           5 :         if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
    1914             :                 hrtimer_switch_to_hres();
    1915             :                 return;
    1916             :         }
    1917             : 
    1918           5 :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1919           5 :         now = hrtimer_update_base(cpu_base);
    1920             : 
    1921          10 :         if (!ktime_before(now, cpu_base->softirq_expires_next)) {
    1922           0 :                 cpu_base->softirq_expires_next = KTIME_MAX;
    1923           0 :                 cpu_base->softirq_activated = 1;
    1924           0 :                 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
    1925             :         }
    1926             : 
    1927           5 :         __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
    1928          10 :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1929             : }
    1930             : 
    1931             : /*
    1932             :  * Sleep related functions:
    1933             :  */
    1934           0 : static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
    1935             : {
    1936           0 :         struct hrtimer_sleeper *t =
    1937           0 :                 container_of(timer, struct hrtimer_sleeper, timer);
    1938           0 :         struct task_struct *task = t->task;
    1939             : 
    1940           0 :         t->task = NULL;
    1941           0 :         if (task)
    1942           0 :                 wake_up_process(task);
    1943             : 
    1944           0 :         return HRTIMER_NORESTART;
    1945             : }
    1946             : 
    1947             : /**
    1948             :  * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
    1949             :  * @sl:         sleeper to be started
    1950             :  * @mode:       timer mode abs/rel
    1951             :  *
    1952             :  * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
    1953             :  * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
    1954             :  */
    1955           0 : void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
    1956             :                                    enum hrtimer_mode mode)
    1957             : {
    1958             :         /*
    1959             :          * Make the enqueue delivery mode check work on RT. If the sleeper
    1960             :          * was initialized for hard interrupt delivery, force the mode bit.
    1961             :          * This is a special case for hrtimer_sleepers because
    1962             :          * hrtimer_init_sleeper() determines the delivery mode on RT so the
    1963             :          * fiddling with this decision is avoided at the call sites.
    1964             :          */
    1965             :         if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
    1966             :                 mode |= HRTIMER_MODE_HARD;
    1967             : 
    1968           0 :         hrtimer_start_expires(&sl->timer, mode);
    1969           0 : }
    1970             : EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
    1971             : 
    1972             : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
    1973             :                                    clockid_t clock_id, enum hrtimer_mode mode)
    1974             : {
    1975             :         /*
    1976             :          * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
    1977             :          * marked for hard interrupt expiry mode are moved into soft
    1978             :          * interrupt context either for latency reasons or because the
    1979             :          * hrtimer callback takes regular spinlocks or invokes other
    1980             :          * functions which are not suitable for hard interrupt context on
    1981             :          * PREEMPT_RT.
    1982             :          *
    1983             :          * The hrtimer_sleeper callback is RT compatible in hard interrupt
    1984             :          * context, but there is a latency concern: Untrusted userspace can
    1985             :          * spawn many threads which arm timers for the same expiry time on
    1986             :          * the same CPU. That causes a latency spike due to the wakeup of
    1987             :          * a gazillion threads.
    1988             :          *
    1989             :          * OTOH, privileged real-time user space applications rely on the
    1990             :          * low latency of hard interrupt wakeups. If the current task is in
    1991             :          * a real-time scheduling class, mark the mode for hard interrupt
    1992             :          * expiry.
    1993             :          */
    1994             :         if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
    1995             :                 if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
    1996             :                         mode |= HRTIMER_MODE_HARD;
    1997             :         }
    1998             : 
    1999           0 :         __hrtimer_init(&sl->timer, clock_id, mode);
    2000           0 :         sl->timer.function = hrtimer_wakeup;
    2001           0 :         sl->task = current;
    2002             : }
    2003             : 
    2004             : /**
    2005             :  * hrtimer_init_sleeper - initialize sleeper to the given clock
    2006             :  * @sl:         sleeper to be initialized
    2007             :  * @clock_id:   the clock to be used
    2008             :  * @mode:       timer mode abs/rel
    2009             :  */
    2010           0 : void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
    2011             :                           enum hrtimer_mode mode)
    2012             : {
    2013           0 :         debug_init(&sl->timer, clock_id, mode);
    2014           0 :         __hrtimer_init_sleeper(sl, clock_id, mode);
    2015             : 
    2016           0 : }
    2017             : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
    2018             : 
    2019           0 : int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
    2020             : {
    2021           0 :         switch(restart->nanosleep.type) {
    2022             : #ifdef CONFIG_COMPAT_32BIT_TIME
    2023             :         case TT_COMPAT:
    2024             :                 if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
    2025             :                         return -EFAULT;
    2026             :                 break;
    2027             : #endif
    2028             :         case TT_NATIVE:
    2029           0 :                 if (put_timespec64(ts, restart->nanosleep.rmtp))
    2030             :                         return -EFAULT;
    2031             :                 break;
    2032             :         default:
    2033           0 :                 BUG();
    2034             :         }
    2035           0 :         return -ERESTART_RESTARTBLOCK;
    2036             : }
    2037             : 
    2038           0 : static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
    2039             : {
    2040             :         struct restart_block *restart;
    2041             : 
    2042             :         do {
    2043           0 :                 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
    2044           0 :                 hrtimer_sleeper_start_expires(t, mode);
    2045             : 
    2046           0 :                 if (likely(t->task))
    2047           0 :                         schedule();
    2048             : 
    2049           0 :                 hrtimer_cancel(&t->timer);
    2050           0 :                 mode = HRTIMER_MODE_ABS;
    2051             : 
    2052           0 :         } while (t->task && !signal_pending(current));
    2053             : 
    2054           0 :         __set_current_state(TASK_RUNNING);
    2055             : 
    2056           0 :         if (!t->task)
    2057             :                 return 0;
    2058             : 
    2059           0 :         restart = &current->restart_block;
    2060           0 :         if (restart->nanosleep.type != TT_NONE) {
    2061           0 :                 ktime_t rem = hrtimer_expires_remaining(&t->timer);
    2062             :                 struct timespec64 rmt;
    2063             : 
    2064           0 :                 if (rem <= 0)
    2065             :                         return 0;
    2066           0 :                 rmt = ktime_to_timespec64(rem);
    2067             : 
    2068           0 :                 return nanosleep_copyout(restart, &rmt);
    2069             :         }
    2070             :         return -ERESTART_RESTARTBLOCK;
    2071             : }
    2072             : 
    2073           0 : static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
    2074             : {
    2075             :         struct hrtimer_sleeper t;
    2076             :         int ret;
    2077             : 
    2078           0 :         hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
    2079             :                                       HRTIMER_MODE_ABS);
    2080           0 :         hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
    2081           0 :         ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
    2082           0 :         destroy_hrtimer_on_stack(&t.timer);
    2083           0 :         return ret;
    2084             : }
    2085             : 
    2086           0 : long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
    2087             :                        const clockid_t clockid)
    2088             : {
    2089             :         struct restart_block *restart;
    2090             :         struct hrtimer_sleeper t;
    2091           0 :         int ret = 0;
    2092             :         u64 slack;
    2093             : 
    2094           0 :         slack = current->timer_slack_ns;
    2095           0 :         if (rt_task(current))
    2096           0 :                 slack = 0;
    2097             : 
    2098           0 :         hrtimer_init_sleeper_on_stack(&t, clockid, mode);
    2099           0 :         hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
    2100           0 :         ret = do_nanosleep(&t, mode);
    2101           0 :         if (ret != -ERESTART_RESTARTBLOCK)
    2102             :                 goto out;
    2103             : 
    2104             :         /* Absolute timers do not update the rmtp value and restart: */
    2105           0 :         if (mode == HRTIMER_MODE_ABS) {
    2106             :                 ret = -ERESTARTNOHAND;
    2107             :                 goto out;
    2108             :         }
    2109             : 
    2110           0 :         restart = &current->restart_block;
    2111           0 :         restart->nanosleep.clockid = t.timer.base->clockid;
    2112           0 :         restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
    2113           0 :         set_restart_fn(restart, hrtimer_nanosleep_restart);
    2114             : out:
    2115           0 :         destroy_hrtimer_on_stack(&t.timer);
    2116           0 :         return ret;
    2117             : }
    2118             : 
    2119             : #ifdef CONFIG_64BIT
    2120             : 
    2121           0 : SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
    2122             :                 struct __kernel_timespec __user *, rmtp)
    2123             : {
    2124             :         struct timespec64 tu;
    2125             : 
    2126           0 :         if (get_timespec64(&tu, rqtp))
    2127             :                 return -EFAULT;
    2128             : 
    2129           0 :         if (!timespec64_valid(&tu))
    2130             :                 return -EINVAL;
    2131             : 
    2132           0 :         current->restart_block.fn = do_no_restart_syscall;
    2133           0 :         current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
    2134           0 :         current->restart_block.nanosleep.rmtp = rmtp;
    2135           0 :         return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
    2136             :                                  CLOCK_MONOTONIC);
    2137             : }
    2138             : 
    2139             : #endif
    2140             : 
    2141             : #ifdef CONFIG_COMPAT_32BIT_TIME
    2142             : 
    2143             : SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
    2144             :                        struct old_timespec32 __user *, rmtp)
    2145             : {
    2146             :         struct timespec64 tu;
    2147             : 
    2148             :         if (get_old_timespec32(&tu, rqtp))
    2149             :                 return -EFAULT;
    2150             : 
    2151             :         if (!timespec64_valid(&tu))
    2152             :                 return -EINVAL;
    2153             : 
    2154             :         current->restart_block.fn = do_no_restart_syscall;
    2155             :         current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
    2156             :         current->restart_block.nanosleep.compat_rmtp = rmtp;
    2157             :         return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
    2158             :                                  CLOCK_MONOTONIC);
    2159             : }
    2160             : #endif
    2161             : 
    2162             : /*
    2163             :  * Functions related to boot-time initialization:
    2164             :  */
    2165           0 : int hrtimers_prepare_cpu(unsigned int cpu)
    2166             : {
    2167           1 :         struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
    2168             :         int i;
    2169             : 
    2170           9 :         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
    2171           8 :                 struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
    2172             : 
    2173           8 :                 clock_b->cpu_base = cpu_base;
    2174          16 :                 seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
    2175          16 :                 timerqueue_init_head(&clock_b->active);
    2176             :         }
    2177             : 
    2178           1 :         cpu_base->cpu = cpu;
    2179           1 :         cpu_base->active_bases = 0;
    2180           1 :         cpu_base->hres_active = 0;
    2181           1 :         cpu_base->hang_detected = 0;
    2182           1 :         cpu_base->next_timer = NULL;
    2183           1 :         cpu_base->softirq_next_timer = NULL;
    2184           1 :         cpu_base->expires_next = KTIME_MAX;
    2185           1 :         cpu_base->softirq_expires_next = KTIME_MAX;
    2186           1 :         hrtimer_cpu_base_init_expiry_lock(cpu_base);
    2187           0 :         return 0;
    2188             : }
    2189             : 
    2190             : #ifdef CONFIG_HOTPLUG_CPU
    2191             : 
    2192             : static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
    2193             :                                 struct hrtimer_clock_base *new_base)
    2194             : {
    2195             :         struct hrtimer *timer;
    2196             :         struct timerqueue_node *node;
    2197             : 
    2198             :         while ((node = timerqueue_getnext(&old_base->active))) {
    2199             :                 timer = container_of(node, struct hrtimer, node);
    2200             :                 BUG_ON(hrtimer_callback_running(timer));
    2201             :                 debug_deactivate(timer);
    2202             : 
    2203             :                 /*
    2204             :                  * Mark it as ENQUEUED not INACTIVE otherwise the
    2205             :                  * timer could be seen as !active and just vanish away
    2206             :                  * under us on another CPU
    2207             :                  */
    2208             :                 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
    2209             :                 timer->base = new_base;
    2210             :                 /*
    2211             :                  * Enqueue the timers on the new cpu. This does not
    2212             :                  * reprogram the event device in case the timer
    2213             :                  * expires before the earliest on this CPU, but we run
    2214             :                  * hrtimer_interrupt after we migrated everything to
    2215             :                  * sort out already expired timers and reprogram the
    2216             :                  * event device.
    2217             :                  */
    2218             :                 enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
    2219             :         }
    2220             : }
    2221             : 
    2222             : int hrtimers_dead_cpu(unsigned int scpu)
    2223             : {
    2224             :         struct hrtimer_cpu_base *old_base, *new_base;
    2225             :         int i;
    2226             : 
    2227             :         BUG_ON(cpu_online(scpu));
    2228             :         tick_cancel_sched_timer(scpu);
    2229             : 
    2230             :         /*
    2231             :          * this BH disable ensures that raise_softirq_irqoff() does
    2232             :          * not wakeup ksoftirqd (and acquire the pi-lock) while
    2233             :          * holding the cpu_base lock
    2234             :          */
    2235             :         local_bh_disable();
    2236             :         local_irq_disable();
    2237             :         old_base = &per_cpu(hrtimer_bases, scpu);
    2238             :         new_base = this_cpu_ptr(&hrtimer_bases);
    2239             :         /*
    2240             :          * The caller is globally serialized and nobody else
    2241             :          * takes two locks at once, deadlock is not possible.
    2242             :          */
    2243             :         raw_spin_lock(&new_base->lock);
    2244             :         raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
    2245             : 
    2246             :         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
    2247             :                 migrate_hrtimer_list(&old_base->clock_base[i],
    2248             :                                      &new_base->clock_base[i]);
    2249             :         }
    2250             : 
    2251             :         /*
    2252             :          * The migration might have changed the first expiring softirq
    2253             :          * timer on this CPU. Update it.
    2254             :          */
    2255             :         hrtimer_update_softirq_timer(new_base, false);
    2256             : 
    2257             :         raw_spin_unlock(&old_base->lock);
    2258             :         raw_spin_unlock(&new_base->lock);
    2259             : 
    2260             :         /* Check, if we got expired work to do */
    2261             :         __hrtimer_peek_ahead_timers();
    2262             :         local_irq_enable();
    2263             :         local_bh_enable();
    2264             :         return 0;
    2265             : }
    2266             : 
    2267             : #endif /* CONFIG_HOTPLUG_CPU */
    2268             : 
    2269           1 : void __init hrtimers_init(void)
    2270             : {
    2271           1 :         hrtimers_prepare_cpu(smp_processor_id());
    2272           1 :         open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
    2273           1 : }
    2274             : 
    2275             : /**
    2276             :  * schedule_hrtimeout_range_clock - sleep until timeout
    2277             :  * @expires:    timeout value (ktime_t)
    2278             :  * @delta:      slack in expires timeout (ktime_t) for SCHED_OTHER tasks
    2279             :  * @mode:       timer mode
    2280             :  * @clock_id:   timer clock to be used
    2281             :  */
    2282             : int __sched
    2283           0 : schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
    2284             :                                const enum hrtimer_mode mode, clockid_t clock_id)
    2285             : {
    2286             :         struct hrtimer_sleeper t;
    2287             : 
    2288             :         /*
    2289             :          * Optimize when a zero timeout value is given. It does not
    2290             :          * matter whether this is an absolute or a relative time.
    2291             :          */
    2292           0 :         if (expires && *expires == 0) {
    2293           0 :                 __set_current_state(TASK_RUNNING);
    2294           0 :                 return 0;
    2295             :         }
    2296             : 
    2297             :         /*
    2298             :          * A NULL parameter means "infinite"
    2299             :          */
    2300           0 :         if (!expires) {
    2301           0 :                 schedule();
    2302           0 :                 return -EINTR;
    2303             :         }
    2304             : 
    2305             :         /*
    2306             :          * Override any slack passed by the user if under
    2307             :          * rt contraints.
    2308             :          */
    2309           0 :         if (rt_task(current))
    2310           0 :                 delta = 0;
    2311             : 
    2312           0 :         hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
    2313           0 :         hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
    2314           0 :         hrtimer_sleeper_start_expires(&t, mode);
    2315             : 
    2316           0 :         if (likely(t.task))
    2317           0 :                 schedule();
    2318             : 
    2319           0 :         hrtimer_cancel(&t.timer);
    2320           0 :         destroy_hrtimer_on_stack(&t.timer);
    2321             : 
    2322           0 :         __set_current_state(TASK_RUNNING);
    2323             : 
    2324           0 :         return !t.task ? 0 : -EINTR;
    2325             : }
    2326             : EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
    2327             : 
    2328             : /**
    2329             :  * schedule_hrtimeout_range - sleep until timeout
    2330             :  * @expires:    timeout value (ktime_t)
    2331             :  * @delta:      slack in expires timeout (ktime_t) for SCHED_OTHER tasks
    2332             :  * @mode:       timer mode
    2333             :  *
    2334             :  * Make the current task sleep until the given expiry time has
    2335             :  * elapsed. The routine will return immediately unless
    2336             :  * the current task state has been set (see set_current_state()).
    2337             :  *
    2338             :  * The @delta argument gives the kernel the freedom to schedule the
    2339             :  * actual wakeup to a time that is both power and performance friendly
    2340             :  * for regular (non RT/DL) tasks.
    2341             :  * The kernel give the normal best effort behavior for "@expires+@delta",
    2342             :  * but may decide to fire the timer earlier, but no earlier than @expires.
    2343             :  *
    2344             :  * You can set the task state as follows -
    2345             :  *
    2346             :  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
    2347             :  * pass before the routine returns unless the current task is explicitly
    2348             :  * woken up, (e.g. by wake_up_process()).
    2349             :  *
    2350             :  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
    2351             :  * delivered to the current task or the current task is explicitly woken
    2352             :  * up.
    2353             :  *
    2354             :  * The current task state is guaranteed to be TASK_RUNNING when this
    2355             :  * routine returns.
    2356             :  *
    2357             :  * Returns 0 when the timer has expired. If the task was woken before the
    2358             :  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
    2359             :  * by an explicit wakeup, it returns -EINTR.
    2360             :  */
    2361           0 : int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
    2362             :                                      const enum hrtimer_mode mode)
    2363             : {
    2364           0 :         return schedule_hrtimeout_range_clock(expires, delta, mode,
    2365             :                                               CLOCK_MONOTONIC);
    2366             : }
    2367             : EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
    2368             : 
    2369             : /**
    2370             :  * schedule_hrtimeout - sleep until timeout
    2371             :  * @expires:    timeout value (ktime_t)
    2372             :  * @mode:       timer mode
    2373             :  *
    2374             :  * Make the current task sleep until the given expiry time has
    2375             :  * elapsed. The routine will return immediately unless
    2376             :  * the current task state has been set (see set_current_state()).
    2377             :  *
    2378             :  * You can set the task state as follows -
    2379             :  *
    2380             :  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
    2381             :  * pass before the routine returns unless the current task is explicitly
    2382             :  * woken up, (e.g. by wake_up_process()).
    2383             :  *
    2384             :  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
    2385             :  * delivered to the current task or the current task is explicitly woken
    2386             :  * up.
    2387             :  *
    2388             :  * The current task state is guaranteed to be TASK_RUNNING when this
    2389             :  * routine returns.
    2390             :  *
    2391             :  * Returns 0 when the timer has expired. If the task was woken before the
    2392             :  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
    2393             :  * by an explicit wakeup, it returns -EINTR.
    2394             :  */
    2395           0 : int __sched schedule_hrtimeout(ktime_t *expires,
    2396             :                                const enum hrtimer_mode mode)
    2397             : {
    2398           0 :         return schedule_hrtimeout_range(expires, 0, mode);
    2399             : }
    2400             : EXPORT_SYMBOL_GPL(schedule_hrtimeout);

Generated by: LCOV version 1.14