LCOV - code coverage report
Current view: top level - kernel/time - hrtimer.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 77 372 20.7 %
Date: 2023-03-27 20:00:47 Functions: 9 39 23.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
       4             :  *  Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
       5             :  *  Copyright(C) 2006-2007  Timesys Corp., Thomas Gleixner
       6             :  *
       7             :  *  High-resolution kernel timers
       8             :  *
       9             :  *  In contrast to the low-resolution timeout API, aka timer wheel,
      10             :  *  hrtimers provide finer resolution and accuracy depending on system
      11             :  *  configuration and capabilities.
      12             :  *
      13             :  *  Started by: Thomas Gleixner and Ingo Molnar
      14             :  *
      15             :  *  Credits:
      16             :  *      Based on the original timer wheel code
      17             :  *
      18             :  *      Help, testing, suggestions, bugfixes, improvements were
      19             :  *      provided by:
      20             :  *
      21             :  *      George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
      22             :  *      et. al.
      23             :  */
      24             : 
      25             : #include <linux/cpu.h>
      26             : #include <linux/export.h>
      27             : #include <linux/percpu.h>
      28             : #include <linux/hrtimer.h>
      29             : #include <linux/notifier.h>
      30             : #include <linux/syscalls.h>
      31             : #include <linux/interrupt.h>
      32             : #include <linux/tick.h>
      33             : #include <linux/err.h>
      34             : #include <linux/debugobjects.h>
      35             : #include <linux/sched/signal.h>
      36             : #include <linux/sched/sysctl.h>
      37             : #include <linux/sched/rt.h>
      38             : #include <linux/sched/deadline.h>
      39             : #include <linux/sched/nohz.h>
      40             : #include <linux/sched/debug.h>
      41             : #include <linux/timer.h>
      42             : #include <linux/freezer.h>
      43             : #include <linux/compat.h>
      44             : 
      45             : #include <linux/uaccess.h>
      46             : 
      47             : #include <trace/events/timer.h>
      48             : 
      49             : #include "tick-internal.h"
      50             : 
      51             : /*
      52             :  * Masks for selecting the soft and hard context timers from
      53             :  * cpu_base->active
      54             :  */
      55             : #define MASK_SHIFT              (HRTIMER_BASE_MONOTONIC_SOFT)
      56             : #define HRTIMER_ACTIVE_HARD     ((1U << MASK_SHIFT) - 1)
      57             : #define HRTIMER_ACTIVE_SOFT     (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
      58             : #define HRTIMER_ACTIVE_ALL      (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
      59             : 
      60             : /*
      61             :  * The timer bases:
      62             :  *
      63             :  * There are more clockids than hrtimer bases. Thus, we index
      64             :  * into the timer bases by the hrtimer_base_type enum. When trying
      65             :  * to reach a base using a clockid, hrtimer_clockid_to_base()
      66             :  * is used to convert from clockid to the proper hrtimer_base_type.
      67             :  */
      68             : DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
      69             : {
      70             :         .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
      71             :         .clock_base =
      72             :         {
      73             :                 {
      74             :                         .index = HRTIMER_BASE_MONOTONIC,
      75             :                         .clockid = CLOCK_MONOTONIC,
      76             :                         .get_time = &ktime_get,
      77             :                 },
      78             :                 {
      79             :                         .index = HRTIMER_BASE_REALTIME,
      80             :                         .clockid = CLOCK_REALTIME,
      81             :                         .get_time = &ktime_get_real,
      82             :                 },
      83             :                 {
      84             :                         .index = HRTIMER_BASE_BOOTTIME,
      85             :                         .clockid = CLOCK_BOOTTIME,
      86             :                         .get_time = &ktime_get_boottime,
      87             :                 },
      88             :                 {
      89             :                         .index = HRTIMER_BASE_TAI,
      90             :                         .clockid = CLOCK_TAI,
      91             :                         .get_time = &ktime_get_clocktai,
      92             :                 },
      93             :                 {
      94             :                         .index = HRTIMER_BASE_MONOTONIC_SOFT,
      95             :                         .clockid = CLOCK_MONOTONIC,
      96             :                         .get_time = &ktime_get,
      97             :                 },
      98             :                 {
      99             :                         .index = HRTIMER_BASE_REALTIME_SOFT,
     100             :                         .clockid = CLOCK_REALTIME,
     101             :                         .get_time = &ktime_get_real,
     102             :                 },
     103             :                 {
     104             :                         .index = HRTIMER_BASE_BOOTTIME_SOFT,
     105             :                         .clockid = CLOCK_BOOTTIME,
     106             :                         .get_time = &ktime_get_boottime,
     107             :                 },
     108             :                 {
     109             :                         .index = HRTIMER_BASE_TAI_SOFT,
     110             :                         .clockid = CLOCK_TAI,
     111             :                         .get_time = &ktime_get_clocktai,
     112             :                 },
     113             :         }
     114             : };
     115             : 
     116             : static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
     117             :         /* Make sure we catch unsupported clockids */
     118             :         [0 ... MAX_CLOCKS - 1]  = HRTIMER_MAX_CLOCK_BASES,
     119             : 
     120             :         [CLOCK_REALTIME]        = HRTIMER_BASE_REALTIME,
     121             :         [CLOCK_MONOTONIC]       = HRTIMER_BASE_MONOTONIC,
     122             :         [CLOCK_BOOTTIME]        = HRTIMER_BASE_BOOTTIME,
     123             :         [CLOCK_TAI]             = HRTIMER_BASE_TAI,
     124             : };
     125             : 
     126             : /*
     127             :  * Functions and macros which are different for UP/SMP systems are kept in a
     128             :  * single place
     129             :  */
     130             : #ifdef CONFIG_SMP
     131             : 
     132             : /*
     133             :  * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
     134             :  * such that hrtimer_callback_running() can unconditionally dereference
     135             :  * timer->base->cpu_base
     136             :  */
     137             : static struct hrtimer_cpu_base migration_cpu_base = {
     138             :         .clock_base = { {
     139             :                 .cpu_base = &migration_cpu_base,
     140             :                 .seq      = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
     141             :                                                      &migration_cpu_base.lock),
     142             :         }, },
     143             : };
     144             : 
     145             : #define migration_base  migration_cpu_base.clock_base[0]
     146             : 
     147             : static inline bool is_migration_base(struct hrtimer_clock_base *base)
     148             : {
     149             :         return base == &migration_base;
     150             : }
     151             : 
     152             : /*
     153             :  * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
     154             :  * means that all timers which are tied to this base via timer->base are
     155             :  * locked, and the base itself is locked too.
     156             :  *
     157             :  * So __run_timers/migrate_timers can safely modify all timers which could
     158             :  * be found on the lists/queues.
     159             :  *
     160             :  * When the timer's base is locked, and the timer removed from list, it is
     161             :  * possible to set timer->base = &migration_base and drop the lock: the timer
     162             :  * remains locked.
     163             :  */
     164             : static
     165             : struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
     166             :                                              unsigned long *flags)
     167             : {
     168             :         struct hrtimer_clock_base *base;
     169             : 
     170             :         for (;;) {
     171             :                 base = READ_ONCE(timer->base);
     172             :                 if (likely(base != &migration_base)) {
     173             :                         raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
     174             :                         if (likely(base == timer->base))
     175             :                                 return base;
     176             :                         /* The timer has migrated to another CPU: */
     177             :                         raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
     178             :                 }
     179             :                 cpu_relax();
     180             :         }
     181             : }
     182             : 
     183             : /*
     184             :  * We do not migrate the timer when it is expiring before the next
     185             :  * event on the target cpu. When high resolution is enabled, we cannot
     186             :  * reprogram the target cpu hardware and we would cause it to fire
     187             :  * late. To keep it simple, we handle the high resolution enabled and
     188             :  * disabled case similar.
     189             :  *
     190             :  * Called with cpu_base->lock of target cpu held.
     191             :  */
     192             : static int
     193             : hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
     194             : {
     195             :         ktime_t expires;
     196             : 
     197             :         expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
     198             :         return expires < new_base->cpu_base->expires_next;
     199             : }
     200             : 
     201             : static inline
     202             : struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
     203             :                                          int pinned)
     204             : {
     205             : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
     206             :         if (static_branch_likely(&timers_migration_enabled) && !pinned)
     207             :                 return &per_cpu(hrtimer_bases, get_nohz_timer_target());
     208             : #endif
     209             :         return base;
     210             : }
     211             : 
     212             : /*
     213             :  * We switch the timer base to a power-optimized selected CPU target,
     214             :  * if:
     215             :  *      - NO_HZ_COMMON is enabled
     216             :  *      - timer migration is enabled
     217             :  *      - the timer callback is not running
     218             :  *      - the timer is not the first expiring timer on the new target
     219             :  *
     220             :  * If one of the above requirements is not fulfilled we move the timer
     221             :  * to the current CPU or leave it on the previously assigned CPU if
     222             :  * the timer callback is currently running.
     223             :  */
     224             : static inline struct hrtimer_clock_base *
     225             : switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
     226             :                     int pinned)
     227             : {
     228             :         struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base;
     229             :         struct hrtimer_clock_base *new_base;
     230             :         int basenum = base->index;
     231             : 
     232             :         this_cpu_base = this_cpu_ptr(&hrtimer_bases);
     233             :         new_cpu_base = get_target_base(this_cpu_base, pinned);
     234             : again:
     235             :         new_base = &new_cpu_base->clock_base[basenum];
     236             : 
     237             :         if (base != new_base) {
     238             :                 /*
     239             :                  * We are trying to move timer to new_base.
     240             :                  * However we can't change timer's base while it is running,
     241             :                  * so we keep it on the same CPU. No hassle vs. reprogramming
     242             :                  * the event source in the high resolution case. The softirq
     243             :                  * code will take care of this when the timer function has
     244             :                  * completed. There is no conflict as we hold the lock until
     245             :                  * the timer is enqueued.
     246             :                  */
     247             :                 if (unlikely(hrtimer_callback_running(timer)))
     248             :                         return base;
     249             : 
     250             :                 /* See the comment in lock_hrtimer_base() */
     251             :                 WRITE_ONCE(timer->base, &migration_base);
     252             :                 raw_spin_unlock(&base->cpu_base->lock);
     253             :                 raw_spin_lock(&new_base->cpu_base->lock);
     254             : 
     255             :                 if (new_cpu_base != this_cpu_base &&
     256             :                     hrtimer_check_target(timer, new_base)) {
     257             :                         raw_spin_unlock(&new_base->cpu_base->lock);
     258             :                         raw_spin_lock(&base->cpu_base->lock);
     259             :                         new_cpu_base = this_cpu_base;
     260             :                         WRITE_ONCE(timer->base, base);
     261             :                         goto again;
     262             :                 }
     263             :                 WRITE_ONCE(timer->base, new_base);
     264             :         } else {
     265             :                 if (new_cpu_base != this_cpu_base &&
     266             :                     hrtimer_check_target(timer, new_base)) {
     267             :                         new_cpu_base = this_cpu_base;
     268             :                         goto again;
     269             :                 }
     270             :         }
     271             :         return new_base;
     272             : }
     273             : 
     274             : #else /* CONFIG_SMP */
     275             : 
     276             : static inline bool is_migration_base(struct hrtimer_clock_base *base)
     277             : {
     278             :         return false;
     279             : }
     280             : 
     281             : static inline struct hrtimer_clock_base *
     282             : lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
     283             : {
     284           0 :         struct hrtimer_clock_base *base = timer->base;
     285             : 
     286           0 :         raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
     287             : 
     288             :         return base;
     289             : }
     290             : 
     291             : # define switch_hrtimer_base(t, b, p)   (b)
     292             : 
     293             : #endif  /* !CONFIG_SMP */
     294             : 
     295             : /*
     296             :  * Functions for the union type storage format of ktime_t which are
     297             :  * too large for inlining:
     298             :  */
     299             : #if BITS_PER_LONG < 64
     300             : /*
     301             :  * Divide a ktime value by a nanosecond value
     302             :  */
     303             : s64 __ktime_divns(const ktime_t kt, s64 div)
     304             : {
     305             :         int sft = 0;
     306             :         s64 dclc;
     307             :         u64 tmp;
     308             : 
     309             :         dclc = ktime_to_ns(kt);
     310             :         tmp = dclc < 0 ? -dclc : dclc;
     311             : 
     312             :         /* Make sure the divisor is less than 2^32: */
     313             :         while (div >> 32) {
     314             :                 sft++;
     315             :                 div >>= 1;
     316             :         }
     317             :         tmp >>= sft;
     318             :         do_div(tmp, (u32) div);
     319             :         return dclc < 0 ? -tmp : tmp;
     320             : }
     321             : EXPORT_SYMBOL_GPL(__ktime_divns);
     322             : #endif /* BITS_PER_LONG >= 64 */
     323             : 
     324             : /*
     325             :  * Add two ktime values and do a safety check for overflow:
     326             :  */
     327           0 : ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
     328             : {
     329           0 :         ktime_t res = ktime_add_unsafe(lhs, rhs);
     330             : 
     331             :         /*
     332             :          * We use KTIME_SEC_MAX here, the maximum timeout which we can
     333             :          * return to user space in a timespec:
     334             :          */
     335           0 :         if (res < 0 || res < lhs || res < rhs)
     336           0 :                 res = ktime_set(KTIME_SEC_MAX, 0);
     337             : 
     338           0 :         return res;
     339             : }
     340             : 
     341             : EXPORT_SYMBOL_GPL(ktime_add_safe);
     342             : 
     343             : #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
     344             : 
     345             : static const struct debug_obj_descr hrtimer_debug_descr;
     346             : 
     347             : static void *hrtimer_debug_hint(void *addr)
     348             : {
     349             :         return ((struct hrtimer *) addr)->function;
     350             : }
     351             : 
     352             : /*
     353             :  * fixup_init is called when:
     354             :  * - an active object is initialized
     355             :  */
     356             : static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state)
     357             : {
     358             :         struct hrtimer *timer = addr;
     359             : 
     360             :         switch (state) {
     361             :         case ODEBUG_STATE_ACTIVE:
     362             :                 hrtimer_cancel(timer);
     363             :                 debug_object_init(timer, &hrtimer_debug_descr);
     364             :                 return true;
     365             :         default:
     366             :                 return false;
     367             :         }
     368             : }
     369             : 
     370             : /*
     371             :  * fixup_activate is called when:
     372             :  * - an active object is activated
     373             :  * - an unknown non-static object is activated
     374             :  */
     375             : static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
     376             : {
     377             :         switch (state) {
     378             :         case ODEBUG_STATE_ACTIVE:
     379             :                 WARN_ON(1);
     380             :                 fallthrough;
     381             :         default:
     382             :                 return false;
     383             :         }
     384             : }
     385             : 
     386             : /*
     387             :  * fixup_free is called when:
     388             :  * - an active object is freed
     389             :  */
     390             : static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
     391             : {
     392             :         struct hrtimer *timer = addr;
     393             : 
     394             :         switch (state) {
     395             :         case ODEBUG_STATE_ACTIVE:
     396             :                 hrtimer_cancel(timer);
     397             :                 debug_object_free(timer, &hrtimer_debug_descr);
     398             :                 return true;
     399             :         default:
     400             :                 return false;
     401             :         }
     402             : }
     403             : 
     404             : static const struct debug_obj_descr hrtimer_debug_descr = {
     405             :         .name           = "hrtimer",
     406             :         .debug_hint     = hrtimer_debug_hint,
     407             :         .fixup_init     = hrtimer_fixup_init,
     408             :         .fixup_activate = hrtimer_fixup_activate,
     409             :         .fixup_free     = hrtimer_fixup_free,
     410             : };
     411             : 
     412             : static inline void debug_hrtimer_init(struct hrtimer *timer)
     413             : {
     414             :         debug_object_init(timer, &hrtimer_debug_descr);
     415             : }
     416             : 
     417             : static inline void debug_hrtimer_activate(struct hrtimer *timer,
     418             :                                           enum hrtimer_mode mode)
     419             : {
     420             :         debug_object_activate(timer, &hrtimer_debug_descr);
     421             : }
     422             : 
     423             : static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
     424             : {
     425             :         debug_object_deactivate(timer, &hrtimer_debug_descr);
     426             : }
     427             : 
     428             : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
     429             :                            enum hrtimer_mode mode);
     430             : 
     431             : void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
     432             :                            enum hrtimer_mode mode)
     433             : {
     434             :         debug_object_init_on_stack(timer, &hrtimer_debug_descr);
     435             :         __hrtimer_init(timer, clock_id, mode);
     436             : }
     437             : EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
     438             : 
     439             : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
     440             :                                    clockid_t clock_id, enum hrtimer_mode mode);
     441             : 
     442             : void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
     443             :                                    clockid_t clock_id, enum hrtimer_mode mode)
     444             : {
     445             :         debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
     446             :         __hrtimer_init_sleeper(sl, clock_id, mode);
     447             : }
     448             : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
     449             : 
     450             : void destroy_hrtimer_on_stack(struct hrtimer *timer)
     451             : {
     452             :         debug_object_free(timer, &hrtimer_debug_descr);
     453             : }
     454             : EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
     455             : 
     456             : #else
     457             : 
     458             : static inline void debug_hrtimer_init(struct hrtimer *timer) { }
     459             : static inline void debug_hrtimer_activate(struct hrtimer *timer,
     460             :                                           enum hrtimer_mode mode) { }
     461             : static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
     462             : #endif
     463             : 
     464             : static inline void
     465             : debug_init(struct hrtimer *timer, clockid_t clockid,
     466             :            enum hrtimer_mode mode)
     467             : {
     468        1593 :         debug_hrtimer_init(timer);
     469        1593 :         trace_hrtimer_init(timer, clockid, mode);
     470             : }
     471             : 
     472             : static inline void debug_activate(struct hrtimer *timer,
     473             :                                   enum hrtimer_mode mode)
     474             : {
     475           0 :         debug_hrtimer_activate(timer, mode);
     476           0 :         trace_hrtimer_start(timer, mode);
     477             : }
     478             : 
     479             : static inline void debug_deactivate(struct hrtimer *timer)
     480             : {
     481           0 :         debug_hrtimer_deactivate(timer);
     482           0 :         trace_hrtimer_cancel(timer);
     483             : }
     484             : 
     485             : static struct hrtimer_clock_base *
     486             : __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
     487             : {
     488             :         unsigned int idx;
     489             : 
     490        2723 :         if (!*active)
     491             :                 return NULL;
     492             : 
     493           0 :         idx = __ffs(*active);
     494           0 :         *active &= ~(1U << idx);
     495             : 
     496           0 :         return &cpu_base->clock_base[idx];
     497             : }
     498             : 
     499             : #define for_each_active_base(base, cpu_base, active)    \
     500             :         while ((base = __next_base((cpu_base), &(active))))
     501             : 
     502           0 : static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
     503             :                                          const struct hrtimer *exclude,
     504             :                                          unsigned int active,
     505             :                                          ktime_t expires_next)
     506             : {
     507             :         struct hrtimer_clock_base *base;
     508             :         ktime_t expires;
     509             : 
     510           0 :         for_each_active_base(base, cpu_base, active) {
     511             :                 struct timerqueue_node *next;
     512             :                 struct hrtimer *timer;
     513             : 
     514           0 :                 next = timerqueue_getnext(&base->active);
     515           0 :                 timer = container_of(next, struct hrtimer, node);
     516           0 :                 if (timer == exclude) {
     517             :                         /* Get to the next timer in the queue. */
     518           0 :                         next = timerqueue_iterate_next(next);
     519           0 :                         if (!next)
     520           0 :                                 continue;
     521             : 
     522             :                         timer = container_of(next, struct hrtimer, node);
     523             :                 }
     524           0 :                 expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
     525           0 :                 if (expires < expires_next) {
     526           0 :                         expires_next = expires;
     527             : 
     528             :                         /* Skip cpu_base update if a timer is being excluded. */
     529           0 :                         if (exclude)
     530           0 :                                 continue;
     531             : 
     532           0 :                         if (timer->is_soft)
     533           0 :                                 cpu_base->softirq_next_timer = timer;
     534             :                         else
     535           0 :                                 cpu_base->next_timer = timer;
     536             :                 }
     537             :         }
     538             :         /*
     539             :          * clock_was_set() might have changed base->offset of any of
     540             :          * the clock bases so the result might be negative. Fix it up
     541             :          * to prevent a false positive in clockevents_program_event().
     542             :          */
     543           0 :         if (expires_next < 0)
     544           0 :                 expires_next = 0;
     545           0 :         return expires_next;
     546             : }
     547             : 
     548             : /*
     549             :  * Recomputes cpu_base::*next_timer and returns the earliest expires_next
     550             :  * but does not set cpu_base::*expires_next, that is done by
     551             :  * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
     552             :  * cpu_base::*expires_next right away, reprogramming logic would no longer
     553             :  * work.
     554             :  *
     555             :  * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
     556             :  * those timers will get run whenever the softirq gets handled, at the end of
     557             :  * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
     558             :  *
     559             :  * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
     560             :  * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
     561             :  * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
     562             :  *
     563             :  * @active_mask must be one of:
     564             :  *  - HRTIMER_ACTIVE_ALL,
     565             :  *  - HRTIMER_ACTIVE_SOFT, or
     566             :  *  - HRTIMER_ACTIVE_HARD.
     567             :  */
     568             : static ktime_t
     569           0 : __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
     570             : {
     571             :         unsigned int active;
     572           0 :         struct hrtimer *next_timer = NULL;
     573           0 :         ktime_t expires_next = KTIME_MAX;
     574             : 
     575           0 :         if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
     576           0 :                 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
     577           0 :                 cpu_base->softirq_next_timer = NULL;
     578           0 :                 expires_next = __hrtimer_next_event_base(cpu_base, NULL,
     579             :                                                          active, KTIME_MAX);
     580             : 
     581           0 :                 next_timer = cpu_base->softirq_next_timer;
     582             :         }
     583             : 
     584           0 :         if (active_mask & HRTIMER_ACTIVE_HARD) {
     585           0 :                 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
     586           0 :                 cpu_base->next_timer = next_timer;
     587           0 :                 expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
     588             :                                                          expires_next);
     589             :         }
     590             : 
     591           0 :         return expires_next;
     592             : }
     593             : 
     594           0 : static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
     595             : {
     596           0 :         ktime_t expires_next, soft = KTIME_MAX;
     597             : 
     598             :         /*
     599             :          * If the soft interrupt has already been activated, ignore the
     600             :          * soft bases. They will be handled in the already raised soft
     601             :          * interrupt.
     602             :          */
     603           0 :         if (!cpu_base->softirq_activated) {
     604           0 :                 soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
     605             :                 /*
     606             :                  * Update the soft expiry time. clock_settime() might have
     607             :                  * affected it.
     608             :                  */
     609           0 :                 cpu_base->softirq_expires_next = soft;
     610             :         }
     611             : 
     612           0 :         expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
     613             :         /*
     614             :          * If a softirq timer is expiring first, update cpu_base->next_timer
     615             :          * and program the hardware with the soft expiry time.
     616             :          */
     617           0 :         if (expires_next > soft) {
     618           0 :                 cpu_base->next_timer = cpu_base->softirq_next_timer;
     619           0 :                 expires_next = soft;
     620             :         }
     621             : 
     622           0 :         return expires_next;
     623             : }
     624             : 
     625             : static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
     626             : {
     627        2723 :         ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
     628        2723 :         ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
     629        2723 :         ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
     630             : 
     631        2723 :         ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
     632             :                                             offs_real, offs_boot, offs_tai);
     633             : 
     634        2723 :         base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
     635        2723 :         base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
     636        2723 :         base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
     637             : 
     638             :         return now;
     639             : }
     640             : 
     641             : /*
     642             :  * Is the high resolution mode active ?
     643             :  */
     644             : static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
     645             : {
     646             :         return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
     647             :                 cpu_base->hres_active : 0;
     648             : }
     649             : 
     650             : static inline int hrtimer_hres_active(void)
     651             : {
     652             :         return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
     653             : }
     654             : 
     655             : static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
     656             :                                 struct hrtimer *next_timer,
     657             :                                 ktime_t expires_next)
     658             : {
     659           0 :         cpu_base->expires_next = expires_next;
     660             : 
     661             :         /*
     662             :          * If hres is not active, hardware does not have to be
     663             :          * reprogrammed yet.
     664             :          *
     665             :          * If a hang was detected in the last timer interrupt then we
     666             :          * leave the hang delay active in the hardware. We want the
     667             :          * system to make progress. That also prevents the following
     668             :          * scenario:
     669             :          * T1 expires 50ms from now
     670             :          * T2 expires 5s from now
     671             :          *
     672             :          * T1 is removed, so this code is called and would reprogram
     673             :          * the hardware to 5s from now. Any hrtimer_start after that
     674             :          * will not reprogram the hardware due to hang_detected being
     675             :          * set. So we'd effectively block all timers until the T2 event
     676             :          * fires.
     677             :          */
     678           0 :         if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
     679             :                 return;
     680             : 
     681             :         tick_program_event(expires_next, 1);
     682             : }
     683             : 
     684             : /*
     685             :  * Reprogram the event source with checking both queues for the
     686             :  * next event
     687             :  * Called with interrupts disabled and base->lock held
     688             :  */
     689             : static void
     690             : hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
     691             : {
     692             :         ktime_t expires_next;
     693             : 
     694           0 :         expires_next = hrtimer_update_next_event(cpu_base);
     695             : 
     696           0 :         if (skip_equal && expires_next == cpu_base->expires_next)
     697             :                 return;
     698             : 
     699           0 :         __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
     700             : }
     701             : 
     702             : /* High resolution timer related functions */
     703             : #ifdef CONFIG_HIGH_RES_TIMERS
     704             : 
     705             : /*
     706             :  * High resolution timer enabled ?
     707             :  */
     708             : static bool hrtimer_hres_enabled __read_mostly  = true;
     709             : unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
     710             : EXPORT_SYMBOL_GPL(hrtimer_resolution);
     711             : 
     712             : /*
     713             :  * Enable / Disable high resolution mode
     714             :  */
     715             : static int __init setup_hrtimer_hres(char *str)
     716             : {
     717             :         return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
     718             : }
     719             : 
     720             : __setup("highres=", setup_hrtimer_hres);
     721             : 
     722             : /*
     723             :  * hrtimer_high_res_enabled - query, if the highres mode is enabled
     724             :  */
     725             : static inline int hrtimer_is_hres_enabled(void)
     726             : {
     727             :         return hrtimer_hres_enabled;
     728             : }
     729             : 
     730             : static void retrigger_next_event(void *arg);
     731             : 
     732             : /*
     733             :  * Switch to high resolution mode
     734             :  */
     735             : static void hrtimer_switch_to_hres(void)
     736             : {
     737             :         struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
     738             : 
     739             :         if (tick_init_highres()) {
     740             :                 pr_warn("Could not switch to high resolution mode on CPU %u\n",
     741             :                         base->cpu);
     742             :                 return;
     743             :         }
     744             :         base->hres_active = 1;
     745             :         hrtimer_resolution = HIGH_RES_NSEC;
     746             : 
     747             :         tick_setup_sched_timer();
     748             :         /* "Retrigger" the interrupt to get things going */
     749             :         retrigger_next_event(NULL);
     750             : }
     751             : 
     752             : #else
     753             : 
     754             : static inline int hrtimer_is_hres_enabled(void) { return 0; }
     755             : static inline void hrtimer_switch_to_hres(void) { }
     756             : 
     757             : #endif /* CONFIG_HIGH_RES_TIMERS */
     758             : /*
     759             :  * Retrigger next event is called after clock was set with interrupts
     760             :  * disabled through an SMP function call or directly from low level
     761             :  * resume code.
     762             :  *
     763             :  * This is only invoked when:
     764             :  *      - CONFIG_HIGH_RES_TIMERS is enabled.
     765             :  *      - CONFIG_NOHZ_COMMON is enabled
     766             :  *
     767             :  * For the other cases this function is empty and because the call sites
     768             :  * are optimized out it vanishes as well, i.e. no need for lots of
     769             :  * #ifdeffery.
     770             :  */
     771             : static void retrigger_next_event(void *arg)
     772             : {
     773           0 :         struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
     774             : 
     775             :         /*
     776             :          * When high resolution mode or nohz is active, then the offsets of
     777             :          * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
     778             :          * next tick will take care of that.
     779             :          *
     780             :          * If high resolution mode is active then the next expiring timer
     781             :          * must be reevaluated and the clock event device reprogrammed if
     782             :          * necessary.
     783             :          *
     784             :          * In the NOHZ case the update of the offset and the reevaluation
     785             :          * of the next expiring timer is enough. The return from the SMP
     786             :          * function call will take care of the reprogramming in case the
     787             :          * CPU was in a NOHZ idle sleep.
     788             :          */
     789           0 :         if (!__hrtimer_hres_active(base) && !tick_nohz_active)
     790             :                 return;
     791             : 
     792             :         raw_spin_lock(&base->lock);
     793             :         hrtimer_update_base(base);
     794             :         if (__hrtimer_hres_active(base))
     795             :                 hrtimer_force_reprogram(base, 0);
     796             :         else
     797             :                 hrtimer_update_next_event(base);
     798             :         raw_spin_unlock(&base->lock);
     799             : }
     800             : 
     801             : /*
     802             :  * When a timer is enqueued and expires earlier than the already enqueued
     803             :  * timers, we have to check, whether it expires earlier than the timer for
     804             :  * which the clock event device was armed.
     805             :  *
     806             :  * Called with interrupts disabled and base->cpu_base.lock held
     807             :  */
     808           0 : static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
     809             : {
     810           0 :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
     811           0 :         struct hrtimer_clock_base *base = timer->base;
     812           0 :         ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
     813             : 
     814           0 :         WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
     815             : 
     816             :         /*
     817             :          * CLOCK_REALTIME timer might be requested with an absolute
     818             :          * expiry time which is less than base->offset. Set it to 0.
     819             :          */
     820           0 :         if (expires < 0)
     821           0 :                 expires = 0;
     822             : 
     823           0 :         if (timer->is_soft) {
     824             :                 /*
     825             :                  * soft hrtimer could be started on a remote CPU. In this
     826             :                  * case softirq_expires_next needs to be updated on the
     827             :                  * remote CPU. The soft hrtimer will not expire before the
     828             :                  * first hard hrtimer on the remote CPU -
     829             :                  * hrtimer_check_target() prevents this case.
     830             :                  */
     831           0 :                 struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;
     832             : 
     833           0 :                 if (timer_cpu_base->softirq_activated)
     834             :                         return;
     835             : 
     836           0 :                 if (!ktime_before(expires, timer_cpu_base->softirq_expires_next))
     837             :                         return;
     838             : 
     839           0 :                 timer_cpu_base->softirq_next_timer = timer;
     840           0 :                 timer_cpu_base->softirq_expires_next = expires;
     841             : 
     842           0 :                 if (!ktime_before(expires, timer_cpu_base->expires_next) ||
     843             :                     !reprogram)
     844             :                         return;
     845             :         }
     846             : 
     847             :         /*
     848             :          * If the timer is not on the current cpu, we cannot reprogram
     849             :          * the other cpus clock event device.
     850             :          */
     851           0 :         if (base->cpu_base != cpu_base)
     852             :                 return;
     853             : 
     854           0 :         if (expires >= cpu_base->expires_next)
     855             :                 return;
     856             : 
     857             :         /*
     858             :          * If the hrtimer interrupt is running, then it will reevaluate the
     859             :          * clock bases and reprogram the clock event device.
     860             :          */
     861           0 :         if (cpu_base->in_hrtirq)
     862             :                 return;
     863             : 
     864           0 :         cpu_base->next_timer = timer;
     865             : 
     866           0 :         __hrtimer_reprogram(cpu_base, timer, expires);
     867             : }
     868             : 
     869             : static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
     870             :                              unsigned int active)
     871             : {
     872             :         struct hrtimer_clock_base *base;
     873             :         unsigned int seq;
     874             :         ktime_t expires;
     875             : 
     876             :         /*
     877             :          * Update the base offsets unconditionally so the following
     878             :          * checks whether the SMP function call is required works.
     879             :          *
     880             :          * The update is safe even when the remote CPU is in the hrtimer
     881             :          * interrupt or the hrtimer soft interrupt and expiring affected
     882             :          * bases. Either it will see the update before handling a base or
     883             :          * it will see it when it finishes the processing and reevaluates
     884             :          * the next expiring timer.
     885             :          */
     886             :         seq = cpu_base->clock_was_set_seq;
     887             :         hrtimer_update_base(cpu_base);
     888             : 
     889             :         /*
     890             :          * If the sequence did not change over the update then the
     891             :          * remote CPU already handled it.
     892             :          */
     893             :         if (seq == cpu_base->clock_was_set_seq)
     894             :                 return false;
     895             : 
     896             :         /*
     897             :          * If the remote CPU is currently handling an hrtimer interrupt, it
     898             :          * will reevaluate the first expiring timer of all clock bases
     899             :          * before reprogramming. Nothing to do here.
     900             :          */
     901             :         if (cpu_base->in_hrtirq)
     902             :                 return false;
     903             : 
     904             :         /*
     905             :          * Walk the affected clock bases and check whether the first expiring
     906             :          * timer in a clock base is moving ahead of the first expiring timer of
     907             :          * @cpu_base. If so, the IPI must be invoked because per CPU clock
     908             :          * event devices cannot be remotely reprogrammed.
     909             :          */
     910             :         active &= cpu_base->active_bases;
     911             : 
     912             :         for_each_active_base(base, cpu_base, active) {
     913             :                 struct timerqueue_node *next;
     914             : 
     915             :                 next = timerqueue_getnext(&base->active);
     916             :                 expires = ktime_sub(next->expires, base->offset);
     917             :                 if (expires < cpu_base->expires_next)
     918             :                         return true;
     919             : 
     920             :                 /* Extra check for softirq clock bases */
     921             :                 if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
     922             :                         continue;
     923             :                 if (cpu_base->softirq_activated)
     924             :                         continue;
     925             :                 if (expires < cpu_base->softirq_expires_next)
     926             :                         return true;
     927             :         }
     928             :         return false;
     929             : }
     930             : 
     931             : /*
     932             :  * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
     933             :  * CLOCK_BOOTTIME (for late sleep time injection).
     934             :  *
     935             :  * This requires to update the offsets for these clocks
     936             :  * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
     937             :  * also requires to eventually reprogram the per CPU clock event devices
     938             :  * when the change moves an affected timer ahead of the first expiring
     939             :  * timer on that CPU. Obviously remote per CPU clock event devices cannot
     940             :  * be reprogrammed. The other reason why an IPI has to be sent is when the
     941             :  * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
     942             :  * in the tick, which obviously might be stopped, so this has to bring out
     943             :  * the remote CPU which might sleep in idle to get this sorted.
     944             :  */
     945           0 : void clock_was_set(unsigned int bases)
     946             : {
     947           0 :         struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
     948             :         cpumask_var_t mask;
     949             :         int cpu;
     950             : 
     951           0 :         if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
     952             :                 goto out_timerfd;
     953             : 
     954             :         if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
     955             :                 on_each_cpu(retrigger_next_event, NULL, 1);
     956             :                 goto out_timerfd;
     957             :         }
     958             : 
     959             :         /* Avoid interrupting CPUs if possible */
     960             :         cpus_read_lock();
     961             :         for_each_online_cpu(cpu) {
     962             :                 unsigned long flags;
     963             : 
     964             :                 cpu_base = &per_cpu(hrtimer_bases, cpu);
     965             :                 raw_spin_lock_irqsave(&cpu_base->lock, flags);
     966             : 
     967             :                 if (update_needs_ipi(cpu_base, bases))
     968             :                         cpumask_set_cpu(cpu, mask);
     969             : 
     970             :                 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
     971             :         }
     972             : 
     973             :         preempt_disable();
     974             :         smp_call_function_many(mask, retrigger_next_event, NULL, 1);
     975             :         preempt_enable();
     976             :         cpus_read_unlock();
     977             :         free_cpumask_var(mask);
     978             : 
     979             : out_timerfd:
     980           0 :         timerfd_clock_was_set();
     981           0 : }
     982             : 
     983           0 : static void clock_was_set_work(struct work_struct *work)
     984             : {
     985           0 :         clock_was_set(CLOCK_SET_WALL);
     986           0 : }
     987             : 
     988             : static DECLARE_WORK(hrtimer_work, clock_was_set_work);
     989             : 
     990             : /*
     991             :  * Called from timekeeping code to reprogram the hrtimer interrupt device
     992             :  * on all cpus and to notify timerfd.
     993             :  */
     994           0 : void clock_was_set_delayed(void)
     995             : {
     996           0 :         schedule_work(&hrtimer_work);
     997           0 : }
     998             : 
     999             : /*
    1000             :  * Called during resume either directly from via timekeeping_resume()
    1001             :  * or in the case of s2idle from tick_unfreeze() to ensure that the
    1002             :  * hrtimers are up to date.
    1003             :  */
    1004           0 : void hrtimers_resume_local(void)
    1005             : {
    1006             :         lockdep_assert_irqs_disabled();
    1007             :         /* Retrigger on the local CPU */
    1008           0 :         retrigger_next_event(NULL);
    1009           0 : }
    1010             : 
    1011             : /*
    1012             :  * Counterpart to lock_hrtimer_base above:
    1013             :  */
    1014             : static inline
    1015             : void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
    1016             : {
    1017           0 :         raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
    1018             : }
    1019             : 
    1020             : /**
    1021             :  * hrtimer_forward - forward the timer expiry
    1022             :  * @timer:      hrtimer to forward
    1023             :  * @now:        forward past this time
    1024             :  * @interval:   the interval to forward
    1025             :  *
    1026             :  * Forward the timer expiry so it will expire in the future.
    1027             :  * Returns the number of overruns.
    1028             :  *
    1029             :  * Can be safely called from the callback function of @timer. If
    1030             :  * called from other contexts @timer must neither be enqueued nor
    1031             :  * running the callback and the caller needs to take care of
    1032             :  * serialization.
    1033             :  *
    1034             :  * Note: This only updates the timer expiry value and does not requeue
    1035             :  * the timer.
    1036             :  */
    1037           0 : u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
    1038             : {
    1039           0 :         u64 orun = 1;
    1040             :         ktime_t delta;
    1041             : 
    1042           0 :         delta = ktime_sub(now, hrtimer_get_expires(timer));
    1043             : 
    1044           0 :         if (delta < 0)
    1045             :                 return 0;
    1046             : 
    1047           0 :         if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
    1048             :                 return 0;
    1049             : 
    1050           0 :         if (interval < hrtimer_resolution)
    1051           0 :                 interval = hrtimer_resolution;
    1052             : 
    1053           0 :         if (unlikely(delta >= interval)) {
    1054           0 :                 s64 incr = ktime_to_ns(interval);
    1055             : 
    1056           0 :                 orun = ktime_divns(delta, incr);
    1057           0 :                 hrtimer_add_expires_ns(timer, incr * orun);
    1058           0 :                 if (hrtimer_get_expires_tv64(timer) > now)
    1059             :                         return orun;
    1060             :                 /*
    1061             :                  * This (and the ktime_add() below) is the
    1062             :                  * correction for exact:
    1063             :                  */
    1064           0 :                 orun++;
    1065             :         }
    1066           0 :         hrtimer_add_expires(timer, interval);
    1067             : 
    1068           0 :         return orun;
    1069             : }
    1070             : EXPORT_SYMBOL_GPL(hrtimer_forward);
    1071             : 
    1072             : /*
    1073             :  * enqueue_hrtimer - internal function to (re)start a timer
    1074             :  *
    1075             :  * The timer is inserted in expiry order. Insertion into the
    1076             :  * red black tree is O(log(n)). Must hold the base lock.
    1077             :  *
    1078             :  * Returns 1 when the new timer is the leftmost timer in the tree.
    1079             :  */
    1080             : static int enqueue_hrtimer(struct hrtimer *timer,
    1081             :                            struct hrtimer_clock_base *base,
    1082             :                            enum hrtimer_mode mode)
    1083             : {
    1084           0 :         debug_activate(timer, mode);
    1085             : 
    1086           0 :         base->cpu_base->active_bases |= 1 << base->index;
    1087             : 
    1088             :         /* Pairs with the lockless read in hrtimer_is_queued() */
    1089           0 :         WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
    1090             : 
    1091           0 :         return timerqueue_add(&base->active, &timer->node);
    1092             : }
    1093             : 
    1094             : /*
    1095             :  * __remove_hrtimer - internal function to remove a timer
    1096             :  *
    1097             :  * Caller must hold the base lock.
    1098             :  *
    1099             :  * High resolution timer mode reprograms the clock event device when the
    1100             :  * timer is the one which expires next. The caller can disable this by setting
    1101             :  * reprogram to zero. This is useful, when the context does a reprogramming
    1102             :  * anyway (e.g. timer interrupt)
    1103             :  */
    1104           0 : static void __remove_hrtimer(struct hrtimer *timer,
    1105             :                              struct hrtimer_clock_base *base,
    1106             :                              u8 newstate, int reprogram)
    1107             : {
    1108           0 :         struct hrtimer_cpu_base *cpu_base = base->cpu_base;
    1109           0 :         u8 state = timer->state;
    1110             : 
    1111             :         /* Pairs with the lockless read in hrtimer_is_queued() */
    1112           0 :         WRITE_ONCE(timer->state, newstate);
    1113           0 :         if (!(state & HRTIMER_STATE_ENQUEUED))
    1114             :                 return;
    1115             : 
    1116           0 :         if (!timerqueue_del(&base->active, &timer->node))
    1117           0 :                 cpu_base->active_bases &= ~(1 << base->index);
    1118             : 
    1119             :         /*
    1120             :          * Note: If reprogram is false we do not update
    1121             :          * cpu_base->next_timer. This happens when we remove the first
    1122             :          * timer on a remote cpu. No harm as we never dereference
    1123             :          * cpu_base->next_timer. So the worst thing what can happen is
    1124             :          * an superfluous call to hrtimer_force_reprogram() on the
    1125             :          * remote cpu later on if the same timer gets enqueued again.
    1126             :          */
    1127           0 :         if (reprogram && timer == cpu_base->next_timer)
    1128             :                 hrtimer_force_reprogram(cpu_base, 1);
    1129             : }
    1130             : 
    1131             : /*
    1132             :  * remove hrtimer, called with base lock held
    1133             :  */
    1134             : static inline int
    1135             : remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
    1136             :                bool restart, bool keep_local)
    1137             : {
    1138           0 :         u8 state = timer->state;
    1139             : 
    1140           0 :         if (state & HRTIMER_STATE_ENQUEUED) {
    1141             :                 bool reprogram;
    1142             : 
    1143             :                 /*
    1144             :                  * Remove the timer and force reprogramming when high
    1145             :                  * resolution mode is active and the timer is on the current
    1146             :                  * CPU. If we remove a timer on another CPU, reprogramming is
    1147             :                  * skipped. The interrupt event on this CPU is fired and
    1148             :                  * reprogramming happens in the interrupt handler. This is a
    1149             :                  * rare case and less expensive than a smp call.
    1150             :                  */
    1151           0 :                 debug_deactivate(timer);
    1152           0 :                 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
    1153             : 
    1154             :                 /*
    1155             :                  * If the timer is not restarted then reprogramming is
    1156             :                  * required if the timer is local. If it is local and about
    1157             :                  * to be restarted, avoid programming it twice (on removal
    1158             :                  * and a moment later when it's requeued).
    1159             :                  */
    1160             :                 if (!restart)
    1161             :                         state = HRTIMER_STATE_INACTIVE;
    1162             :                 else
    1163           0 :                         reprogram &= !keep_local;
    1164             : 
    1165           0 :                 __remove_hrtimer(timer, base, state, reprogram);
    1166             :                 return 1;
    1167             :         }
    1168             :         return 0;
    1169             : }
    1170             : 
    1171             : static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
    1172             :                                             const enum hrtimer_mode mode)
    1173             : {
    1174             : #ifdef CONFIG_TIME_LOW_RES
    1175             :         /*
    1176             :          * CONFIG_TIME_LOW_RES indicates that the system has no way to return
    1177             :          * granular time values. For relative timers we add hrtimer_resolution
    1178             :          * (i.e. one jiffie) to prevent short timeouts.
    1179             :          */
    1180             :         timer->is_rel = mode & HRTIMER_MODE_REL;
    1181             :         if (timer->is_rel)
    1182             :                 tim = ktime_add_safe(tim, hrtimer_resolution);
    1183             : #endif
    1184             :         return tim;
    1185             : }
    1186             : 
    1187             : static void
    1188           0 : hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
    1189             : {
    1190             :         ktime_t expires;
    1191             : 
    1192             :         /*
    1193             :          * Find the next SOFT expiration.
    1194             :          */
    1195           0 :         expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
    1196             : 
    1197             :         /*
    1198             :          * reprogramming needs to be triggered, even if the next soft
    1199             :          * hrtimer expires at the same time than the next hard
    1200             :          * hrtimer. cpu_base->softirq_expires_next needs to be updated!
    1201             :          */
    1202           0 :         if (expires == KTIME_MAX)
    1203             :                 return;
    1204             : 
    1205             :         /*
    1206             :          * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
    1207             :          * cpu_base->*expires_next is only set by hrtimer_reprogram()
    1208             :          */
    1209           0 :         hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
    1210             : }
    1211             : 
    1212           0 : static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
    1213             :                                     u64 delta_ns, const enum hrtimer_mode mode,
    1214             :                                     struct hrtimer_clock_base *base)
    1215             : {
    1216             :         struct hrtimer_clock_base *new_base;
    1217             :         bool force_local, first;
    1218             : 
    1219             :         /*
    1220             :          * If the timer is on the local cpu base and is the first expiring
    1221             :          * timer then this might end up reprogramming the hardware twice
    1222             :          * (on removal and on enqueue). To avoid that by prevent the
    1223             :          * reprogram on removal, keep the timer local to the current CPU
    1224             :          * and enforce reprogramming after it is queued no matter whether
    1225             :          * it is the new first expiring timer again or not.
    1226             :          */
    1227           0 :         force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
    1228           0 :         force_local &= base->cpu_base->next_timer == timer;
    1229             : 
    1230             :         /*
    1231             :          * Remove an active timer from the queue. In case it is not queued
    1232             :          * on the current CPU, make sure that remove_hrtimer() updates the
    1233             :          * remote data correctly.
    1234             :          *
    1235             :          * If it's on the current CPU and the first expiring timer, then
    1236             :          * skip reprogramming, keep the timer local and enforce
    1237             :          * reprogramming later if it was the first expiring timer.  This
    1238             :          * avoids programming the underlying clock event twice (once at
    1239             :          * removal and once after enqueue).
    1240             :          */
    1241           0 :         remove_hrtimer(timer, base, true, force_local);
    1242             : 
    1243           0 :         if (mode & HRTIMER_MODE_REL)
    1244           0 :                 tim = ktime_add_safe(tim, base->get_time());
    1245             : 
    1246           0 :         tim = hrtimer_update_lowres(timer, tim, mode);
    1247             : 
    1248           0 :         hrtimer_set_expires_range_ns(timer, tim, delta_ns);
    1249             : 
    1250             :         /* Switch the timer base, if necessary: */
    1251             :         if (!force_local) {
    1252             :                 new_base = switch_hrtimer_base(timer, base,
    1253             :                                                mode & HRTIMER_MODE_PINNED);
    1254             :         } else {
    1255             :                 new_base = base;
    1256             :         }
    1257             : 
    1258           0 :         first = enqueue_hrtimer(timer, new_base, mode);
    1259           0 :         if (!force_local)
    1260             :                 return first;
    1261             : 
    1262             :         /*
    1263             :          * Timer was forced to stay on the current CPU to avoid
    1264             :          * reprogramming on removal and enqueue. Force reprogram the
    1265             :          * hardware by evaluating the new first expiring timer.
    1266             :          */
    1267           0 :         hrtimer_force_reprogram(new_base->cpu_base, 1);
    1268             :         return 0;
    1269             : }
    1270             : 
    1271             : /**
    1272             :  * hrtimer_start_range_ns - (re)start an hrtimer
    1273             :  * @timer:      the timer to be added
    1274             :  * @tim:        expiry time
    1275             :  * @delta_ns:   "slack" range for the timer
    1276             :  * @mode:       timer mode: absolute (HRTIMER_MODE_ABS) or
    1277             :  *              relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
    1278             :  *              softirq based mode is considered for debug purpose only!
    1279             :  */
    1280           0 : void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
    1281             :                             u64 delta_ns, const enum hrtimer_mode mode)
    1282             : {
    1283             :         struct hrtimer_clock_base *base;
    1284             :         unsigned long flags;
    1285             : 
    1286             :         /*
    1287             :          * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
    1288             :          * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
    1289             :          * expiry mode because unmarked timers are moved to softirq expiry.
    1290             :          */
    1291             :         if (!IS_ENABLED(CONFIG_PREEMPT_RT))
    1292           0 :                 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
    1293             :         else
    1294             :                 WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
    1295             : 
    1296           0 :         base = lock_hrtimer_base(timer, &flags);
    1297             : 
    1298           0 :         if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
    1299           0 :                 hrtimer_reprogram(timer, true);
    1300             : 
    1301           0 :         unlock_hrtimer_base(timer, &flags);
    1302           0 : }
    1303             : EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
    1304             : 
    1305             : /**
    1306             :  * hrtimer_try_to_cancel - try to deactivate a timer
    1307             :  * @timer:      hrtimer to stop
    1308             :  *
    1309             :  * Returns:
    1310             :  *
    1311             :  *  *  0 when the timer was not active
    1312             :  *  *  1 when the timer was active
    1313             :  *  * -1 when the timer is currently executing the callback function and
    1314             :  *    cannot be stopped
    1315             :  */
    1316         325 : int hrtimer_try_to_cancel(struct hrtimer *timer)
    1317             : {
    1318             :         struct hrtimer_clock_base *base;
    1319             :         unsigned long flags;
    1320         325 :         int ret = -1;
    1321             : 
    1322             :         /*
    1323             :          * Check lockless first. If the timer is not active (neither
    1324             :          * enqueued nor running the callback, nothing to do here.  The
    1325             :          * base lock does not serialize against a concurrent enqueue,
    1326             :          * so we can avoid taking it.
    1327             :          */
    1328         325 :         if (!hrtimer_active(timer))
    1329             :                 return 0;
    1330             : 
    1331           0 :         base = lock_hrtimer_base(timer, &flags);
    1332             : 
    1333           0 :         if (!hrtimer_callback_running(timer))
    1334             :                 ret = remove_hrtimer(timer, base, false, false);
    1335             : 
    1336           0 :         unlock_hrtimer_base(timer, &flags);
    1337             : 
    1338           0 :         return ret;
    1339             : 
    1340             : }
    1341             : EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
    1342             : 
    1343             : #ifdef CONFIG_PREEMPT_RT
    1344             : static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
    1345             : {
    1346             :         spin_lock_init(&base->softirq_expiry_lock);
    1347             : }
    1348             : 
    1349             : static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
    1350             : {
    1351             :         spin_lock(&base->softirq_expiry_lock);
    1352             : }
    1353             : 
    1354             : static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
    1355             : {
    1356             :         spin_unlock(&base->softirq_expiry_lock);
    1357             : }
    1358             : 
    1359             : /*
    1360             :  * The counterpart to hrtimer_cancel_wait_running().
    1361             :  *
    1362             :  * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
    1363             :  * the timer callback to finish. Drop expiry_lock and reacquire it. That
    1364             :  * allows the waiter to acquire the lock and make progress.
    1365             :  */
    1366             : static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
    1367             :                                       unsigned long flags)
    1368             : {
    1369             :         if (atomic_read(&cpu_base->timer_waiters)) {
    1370             :                 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1371             :                 spin_unlock(&cpu_base->softirq_expiry_lock);
    1372             :                 spin_lock(&cpu_base->softirq_expiry_lock);
    1373             :                 raw_spin_lock_irq(&cpu_base->lock);
    1374             :         }
    1375             : }
    1376             : 
    1377             : /*
    1378             :  * This function is called on PREEMPT_RT kernels when the fast path
    1379             :  * deletion of a timer failed because the timer callback function was
    1380             :  * running.
    1381             :  *
    1382             :  * This prevents priority inversion: if the soft irq thread is preempted
    1383             :  * in the middle of a timer callback, then calling del_timer_sync() can
    1384             :  * lead to two issues:
    1385             :  *
    1386             :  *  - If the caller is on a remote CPU then it has to spin wait for the timer
    1387             :  *    handler to complete. This can result in unbound priority inversion.
    1388             :  *
    1389             :  *  - If the caller originates from the task which preempted the timer
    1390             :  *    handler on the same CPU, then spin waiting for the timer handler to
    1391             :  *    complete is never going to end.
    1392             :  */
    1393             : void hrtimer_cancel_wait_running(const struct hrtimer *timer)
    1394             : {
    1395             :         /* Lockless read. Prevent the compiler from reloading it below */
    1396             :         struct hrtimer_clock_base *base = READ_ONCE(timer->base);
    1397             : 
    1398             :         /*
    1399             :          * Just relax if the timer expires in hard interrupt context or if
    1400             :          * it is currently on the migration base.
    1401             :          */
    1402             :         if (!timer->is_soft || is_migration_base(base)) {
    1403             :                 cpu_relax();
    1404             :                 return;
    1405             :         }
    1406             : 
    1407             :         /*
    1408             :          * Mark the base as contended and grab the expiry lock, which is
    1409             :          * held by the softirq across the timer callback. Drop the lock
    1410             :          * immediately so the softirq can expire the next timer. In theory
    1411             :          * the timer could already be running again, but that's more than
    1412             :          * unlikely and just causes another wait loop.
    1413             :          */
    1414             :         atomic_inc(&base->cpu_base->timer_waiters);
    1415             :         spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
    1416             :         atomic_dec(&base->cpu_base->timer_waiters);
    1417             :         spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
    1418             : }
    1419             : #else
    1420             : static inline void
    1421             : hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
    1422             : static inline void
    1423             : hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
    1424             : static inline void
    1425             : hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
    1426             : static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
    1427             :                                              unsigned long flags) { }
    1428             : #endif
    1429             : 
    1430             : /**
    1431             :  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
    1432             :  * @timer:      the timer to be cancelled
    1433             :  *
    1434             :  * Returns:
    1435             :  *  0 when the timer was not active
    1436             :  *  1 when the timer was active
    1437             :  */
    1438         325 : int hrtimer_cancel(struct hrtimer *timer)
    1439             : {
    1440             :         int ret;
    1441             : 
    1442             :         do {
    1443         325 :                 ret = hrtimer_try_to_cancel(timer);
    1444             : 
    1445         325 :                 if (ret < 0)
    1446           0 :                         hrtimer_cancel_wait_running(timer);
    1447         325 :         } while (ret < 0);
    1448         325 :         return ret;
    1449             : }
    1450             : EXPORT_SYMBOL_GPL(hrtimer_cancel);
    1451             : 
    1452             : /**
    1453             :  * __hrtimer_get_remaining - get remaining time for the timer
    1454             :  * @timer:      the timer to read
    1455             :  * @adjust:     adjust relative timers when CONFIG_TIME_LOW_RES=y
    1456             :  */
    1457           0 : ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
    1458             : {
    1459             :         unsigned long flags;
    1460             :         ktime_t rem;
    1461             : 
    1462           0 :         lock_hrtimer_base(timer, &flags);
    1463             :         if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
    1464             :                 rem = hrtimer_expires_remaining_adjusted(timer);
    1465             :         else
    1466           0 :                 rem = hrtimer_expires_remaining(timer);
    1467           0 :         unlock_hrtimer_base(timer, &flags);
    1468             : 
    1469           0 :         return rem;
    1470             : }
    1471             : EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
    1472             : 
    1473             : #ifdef CONFIG_NO_HZ_COMMON
    1474             : /**
    1475             :  * hrtimer_get_next_event - get the time until next expiry event
    1476             :  *
    1477             :  * Returns the next expiry time or KTIME_MAX if no timer is pending.
    1478             :  */
    1479             : u64 hrtimer_get_next_event(void)
    1480             : {
    1481             :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1482             :         u64 expires = KTIME_MAX;
    1483             :         unsigned long flags;
    1484             : 
    1485             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1486             : 
    1487             :         if (!__hrtimer_hres_active(cpu_base))
    1488             :                 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
    1489             : 
    1490             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1491             : 
    1492             :         return expires;
    1493             : }
    1494             : 
    1495             : /**
    1496             :  * hrtimer_next_event_without - time until next expiry event w/o one timer
    1497             :  * @exclude:    timer to exclude
    1498             :  *
    1499             :  * Returns the next expiry time over all timers except for the @exclude one or
    1500             :  * KTIME_MAX if none of them is pending.
    1501             :  */
    1502             : u64 hrtimer_next_event_without(const struct hrtimer *exclude)
    1503             : {
    1504             :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1505             :         u64 expires = KTIME_MAX;
    1506             :         unsigned long flags;
    1507             : 
    1508             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1509             : 
    1510             :         if (__hrtimer_hres_active(cpu_base)) {
    1511             :                 unsigned int active;
    1512             : 
    1513             :                 if (!cpu_base->softirq_activated) {
    1514             :                         active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
    1515             :                         expires = __hrtimer_next_event_base(cpu_base, exclude,
    1516             :                                                             active, KTIME_MAX);
    1517             :                 }
    1518             :                 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
    1519             :                 expires = __hrtimer_next_event_base(cpu_base, exclude, active,
    1520             :                                                     expires);
    1521             :         }
    1522             : 
    1523             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1524             : 
    1525             :         return expires;
    1526             : }
    1527             : #endif
    1528             : 
    1529        1593 : static inline int hrtimer_clockid_to_base(clockid_t clock_id)
    1530             : {
    1531        1593 :         if (likely(clock_id < MAX_CLOCKS)) {
    1532        1593 :                 int base = hrtimer_clock_to_base_table[clock_id];
    1533             : 
    1534        1593 :                 if (likely(base != HRTIMER_MAX_CLOCK_BASES))
    1535             :                         return base;
    1536             :         }
    1537           0 :         WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
    1538           0 :         return HRTIMER_BASE_MONOTONIC;
    1539             : }
    1540             : 
    1541        1593 : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
    1542             :                            enum hrtimer_mode mode)
    1543             : {
    1544        1593 :         bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
    1545             :         struct hrtimer_cpu_base *cpu_base;
    1546             :         int base;
    1547             : 
    1548             :         /*
    1549             :          * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
    1550             :          * marked for hard interrupt expiry mode are moved into soft
    1551             :          * interrupt context for latency reasons and because the callbacks
    1552             :          * can invoke functions which might sleep on RT, e.g. spin_lock().
    1553             :          */
    1554             :         if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
    1555             :                 softtimer = true;
    1556             : 
    1557        1593 :         memset(timer, 0, sizeof(struct hrtimer));
    1558             : 
    1559        1593 :         cpu_base = raw_cpu_ptr(&hrtimer_bases);
    1560             : 
    1561             :         /*
    1562             :          * POSIX magic: Relative CLOCK_REALTIME timers are not affected by
    1563             :          * clock modifications, so they needs to become CLOCK_MONOTONIC to
    1564             :          * ensure POSIX compliance.
    1565             :          */
    1566        1593 :         if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
    1567           0 :                 clock_id = CLOCK_MONOTONIC;
    1568             : 
    1569        1593 :         base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
    1570        1593 :         base += hrtimer_clockid_to_base(clock_id);
    1571        1593 :         timer->is_soft = softtimer;
    1572        1593 :         timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
    1573        1593 :         timer->base = &cpu_base->clock_base[base];
    1574        3186 :         timerqueue_init(&timer->node);
    1575        1593 : }
    1576             : 
    1577             : /**
    1578             :  * hrtimer_init - initialize a timer to the given clock
    1579             :  * @timer:      the timer to be initialized
    1580             :  * @clock_id:   the clock to be used
    1581             :  * @mode:       The modes which are relevant for initialization:
    1582             :  *              HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
    1583             :  *              HRTIMER_MODE_REL_SOFT
    1584             :  *
    1585             :  *              The PINNED variants of the above can be handed in,
    1586             :  *              but the PINNED bit is ignored as pinning happens
    1587             :  *              when the hrtimer is started
    1588             :  */
    1589        1593 : void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
    1590             :                   enum hrtimer_mode mode)
    1591             : {
    1592        1593 :         debug_init(timer, clock_id, mode);
    1593        1593 :         __hrtimer_init(timer, clock_id, mode);
    1594        1593 : }
    1595             : EXPORT_SYMBOL_GPL(hrtimer_init);
    1596             : 
    1597             : /*
    1598             :  * A timer is active, when it is enqueued into the rbtree or the
    1599             :  * callback function is running or it's in the state of being migrated
    1600             :  * to another cpu.
    1601             :  *
    1602             :  * It is important for this function to not return a false negative.
    1603             :  */
    1604         325 : bool hrtimer_active(const struct hrtimer *timer)
    1605             : {
    1606             :         struct hrtimer_clock_base *base;
    1607             :         unsigned int seq;
    1608             : 
    1609             :         do {
    1610         325 :                 base = READ_ONCE(timer->base);
    1611         975 :                 seq = raw_read_seqcount_begin(&base->seq);
    1612             : 
    1613         650 :                 if (timer->state != HRTIMER_STATE_INACTIVE ||
    1614         325 :                     base->running == timer)
    1615             :                         return true;
    1616             : 
    1617        1300 :         } while (read_seqcount_retry(&base->seq, seq) ||
    1618         325 :                  base != READ_ONCE(timer->base));
    1619             : 
    1620             :         return false;
    1621             : }
    1622             : EXPORT_SYMBOL_GPL(hrtimer_active);
    1623             : 
    1624             : /*
    1625             :  * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
    1626             :  * distinct sections:
    1627             :  *
    1628             :  *  - queued:   the timer is queued
    1629             :  *  - callback: the timer is being ran
    1630             :  *  - post:     the timer is inactive or (re)queued
    1631             :  *
    1632             :  * On the read side we ensure we observe timer->state and cpu_base->running
    1633             :  * from the same section, if anything changed while we looked at it, we retry.
    1634             :  * This includes timer->base changing because sequence numbers alone are
    1635             :  * insufficient for that.
    1636             :  *
    1637             :  * The sequence numbers are required because otherwise we could still observe
    1638             :  * a false negative if the read side got smeared over multiple consecutive
    1639             :  * __run_hrtimer() invocations.
    1640             :  */
    1641             : 
    1642           0 : static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
    1643             :                           struct hrtimer_clock_base *base,
    1644             :                           struct hrtimer *timer, ktime_t *now,
    1645             :                           unsigned long flags) __must_hold(&cpu_base->lock)
    1646             : {
    1647             :         enum hrtimer_restart (*fn)(struct hrtimer *);
    1648             :         bool expires_in_hardirq;
    1649             :         int restart;
    1650             : 
    1651             :         lockdep_assert_held(&cpu_base->lock);
    1652             : 
    1653           0 :         debug_deactivate(timer);
    1654           0 :         base->running = timer;
    1655             : 
    1656             :         /*
    1657             :          * Separate the ->running assignment from the ->state assignment.
    1658             :          *
    1659             :          * As with a regular write barrier, this ensures the read side in
    1660             :          * hrtimer_active() cannot observe base->running == NULL &&
    1661             :          * timer->state == INACTIVE.
    1662             :          */
    1663           0 :         raw_write_seqcount_barrier(&base->seq);
    1664             : 
    1665           0 :         __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
    1666           0 :         fn = timer->function;
    1667             : 
    1668             :         /*
    1669             :          * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
    1670             :          * timer is restarted with a period then it becomes an absolute
    1671             :          * timer. If its not restarted it does not matter.
    1672             :          */
    1673             :         if (IS_ENABLED(CONFIG_TIME_LOW_RES))
    1674             :                 timer->is_rel = false;
    1675             : 
    1676             :         /*
    1677             :          * The timer is marked as running in the CPU base, so it is
    1678             :          * protected against migration to a different CPU even if the lock
    1679             :          * is dropped.
    1680             :          */
    1681           0 :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1682           0 :         trace_hrtimer_expire_entry(timer, now);
    1683           0 :         expires_in_hardirq = lockdep_hrtimer_enter(timer);
    1684             : 
    1685           0 :         restart = fn(timer);
    1686             : 
    1687             :         lockdep_hrtimer_exit(expires_in_hardirq);
    1688           0 :         trace_hrtimer_expire_exit(timer);
    1689           0 :         raw_spin_lock_irq(&cpu_base->lock);
    1690             : 
    1691             :         /*
    1692             :          * Note: We clear the running state after enqueue_hrtimer and
    1693             :          * we do not reprogram the event hardware. Happens either in
    1694             :          * hrtimer_start_range_ns() or in hrtimer_interrupt()
    1695             :          *
    1696             :          * Note: Because we dropped the cpu_base->lock above,
    1697             :          * hrtimer_start_range_ns() can have popped in and enqueued the timer
    1698             :          * for us already.
    1699             :          */
    1700           0 :         if (restart != HRTIMER_NORESTART &&
    1701           0 :             !(timer->state & HRTIMER_STATE_ENQUEUED))
    1702           0 :                 enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);
    1703             : 
    1704             :         /*
    1705             :          * Separate the ->running assignment from the ->state assignment.
    1706             :          *
    1707             :          * As with a regular write barrier, this ensures the read side in
    1708             :          * hrtimer_active() cannot observe base->running.timer == NULL &&
    1709             :          * timer->state == INACTIVE.
    1710             :          */
    1711           0 :         raw_write_seqcount_barrier(&base->seq);
    1712             : 
    1713           0 :         WARN_ON_ONCE(base->running != timer);
    1714           0 :         base->running = NULL;
    1715           0 : }
    1716             : 
    1717        2723 : static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
    1718             :                                  unsigned long flags, unsigned int active_mask)
    1719             : {
    1720             :         struct hrtimer_clock_base *base;
    1721        2723 :         unsigned int active = cpu_base->active_bases & active_mask;
    1722             : 
    1723        5446 :         for_each_active_base(base, cpu_base, active) {
    1724             :                 struct timerqueue_node *node;
    1725             :                 ktime_t basenow;
    1726             : 
    1727           0 :                 basenow = ktime_add(now, base->offset);
    1728             : 
    1729           0 :                 while ((node = timerqueue_getnext(&base->active))) {
    1730             :                         struct hrtimer *timer;
    1731             : 
    1732           0 :                         timer = container_of(node, struct hrtimer, node);
    1733             : 
    1734             :                         /*
    1735             :                          * The immediate goal for using the softexpires is
    1736             :                          * minimizing wakeups, not running timers at the
    1737             :                          * earliest interrupt after their soft expiration.
    1738             :                          * This allows us to avoid using a Priority Search
    1739             :                          * Tree, which can answer a stabbing query for
    1740             :                          * overlapping intervals and instead use the simple
    1741             :                          * BST we already have.
    1742             :                          * We don't add extra wakeups by delaying timers that
    1743             :                          * are right-of a not yet expired timer, because that
    1744             :                          * timer will have to trigger a wakeup anyway.
    1745             :                          */
    1746           0 :                         if (basenow < hrtimer_get_softexpires_tv64(timer))
    1747             :                                 break;
    1748             : 
    1749           0 :                         __run_hrtimer(cpu_base, base, timer, &basenow, flags);
    1750             :                         if (active_mask == HRTIMER_ACTIVE_SOFT)
    1751             :                                 hrtimer_sync_wait_running(cpu_base, flags);
    1752             :                 }
    1753             :         }
    1754        2723 : }
    1755             : 
    1756           0 : static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
    1757             : {
    1758           0 :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1759             :         unsigned long flags;
    1760             :         ktime_t now;
    1761             : 
    1762           0 :         hrtimer_cpu_base_lock_expiry(cpu_base);
    1763           0 :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1764             : 
    1765           0 :         now = hrtimer_update_base(cpu_base);
    1766           0 :         __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
    1767             : 
    1768           0 :         cpu_base->softirq_activated = 0;
    1769           0 :         hrtimer_update_softirq_timer(cpu_base, true);
    1770             : 
    1771           0 :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1772           0 :         hrtimer_cpu_base_unlock_expiry(cpu_base);
    1773           0 : }
    1774             : 
    1775             : #ifdef CONFIG_HIGH_RES_TIMERS
    1776             : 
    1777             : /*
    1778             :  * High resolution timer interrupt
    1779             :  * Called with interrupts disabled
    1780             :  */
    1781             : void hrtimer_interrupt(struct clock_event_device *dev)
    1782             : {
    1783             :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1784             :         ktime_t expires_next, now, entry_time, delta;
    1785             :         unsigned long flags;
    1786             :         int retries = 0;
    1787             : 
    1788             :         BUG_ON(!cpu_base->hres_active);
    1789             :         cpu_base->nr_events++;
    1790             :         dev->next_event = KTIME_MAX;
    1791             : 
    1792             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1793             :         entry_time = now = hrtimer_update_base(cpu_base);
    1794             : retry:
    1795             :         cpu_base->in_hrtirq = 1;
    1796             :         /*
    1797             :          * We set expires_next to KTIME_MAX here with cpu_base->lock
    1798             :          * held to prevent that a timer is enqueued in our queue via
    1799             :          * the migration code. This does not affect enqueueing of
    1800             :          * timers which run their callback and need to be requeued on
    1801             :          * this CPU.
    1802             :          */
    1803             :         cpu_base->expires_next = KTIME_MAX;
    1804             : 
    1805             :         if (!ktime_before(now, cpu_base->softirq_expires_next)) {
    1806             :                 cpu_base->softirq_expires_next = KTIME_MAX;
    1807             :                 cpu_base->softirq_activated = 1;
    1808             :                 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
    1809             :         }
    1810             : 
    1811             :         __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
    1812             : 
    1813             :         /* Reevaluate the clock bases for the [soft] next expiry */
    1814             :         expires_next = hrtimer_update_next_event(cpu_base);
    1815             :         /*
    1816             :          * Store the new expiry value so the migration code can verify
    1817             :          * against it.
    1818             :          */
    1819             :         cpu_base->expires_next = expires_next;
    1820             :         cpu_base->in_hrtirq = 0;
    1821             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1822             : 
    1823             :         /* Reprogramming necessary ? */
    1824             :         if (!tick_program_event(expires_next, 0)) {
    1825             :                 cpu_base->hang_detected = 0;
    1826             :                 return;
    1827             :         }
    1828             : 
    1829             :         /*
    1830             :          * The next timer was already expired due to:
    1831             :          * - tracing
    1832             :          * - long lasting callbacks
    1833             :          * - being scheduled away when running in a VM
    1834             :          *
    1835             :          * We need to prevent that we loop forever in the hrtimer
    1836             :          * interrupt routine. We give it 3 attempts to avoid
    1837             :          * overreacting on some spurious event.
    1838             :          *
    1839             :          * Acquire base lock for updating the offsets and retrieving
    1840             :          * the current time.
    1841             :          */
    1842             :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1843             :         now = hrtimer_update_base(cpu_base);
    1844             :         cpu_base->nr_retries++;
    1845             :         if (++retries < 3)
    1846             :                 goto retry;
    1847             :         /*
    1848             :          * Give the system a chance to do something else than looping
    1849             :          * here. We stored the entry time, so we know exactly how long
    1850             :          * we spent here. We schedule the next event this amount of
    1851             :          * time away.
    1852             :          */
    1853             :         cpu_base->nr_hangs++;
    1854             :         cpu_base->hang_detected = 1;
    1855             :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1856             : 
    1857             :         delta = ktime_sub(now, entry_time);
    1858             :         if ((unsigned int)delta > cpu_base->max_hang_time)
    1859             :                 cpu_base->max_hang_time = (unsigned int) delta;
    1860             :         /*
    1861             :          * Limit it to a sensible value as we enforce a longer
    1862             :          * delay. Give the CPU at least 100ms to catch up.
    1863             :          */
    1864             :         if (delta > 100 * NSEC_PER_MSEC)
    1865             :                 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
    1866             :         else
    1867             :                 expires_next = ktime_add(now, delta);
    1868             :         tick_program_event(expires_next, 1);
    1869             :         pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
    1870             : }
    1871             : 
    1872             : /* called with interrupts disabled */
    1873             : static inline void __hrtimer_peek_ahead_timers(void)
    1874             : {
    1875             :         struct tick_device *td;
    1876             : 
    1877             :         if (!hrtimer_hres_active())
    1878             :                 return;
    1879             : 
    1880             :         td = this_cpu_ptr(&tick_cpu_device);
    1881             :         if (td && td->evtdev)
    1882             :                 hrtimer_interrupt(td->evtdev);
    1883             : }
    1884             : 
    1885             : #else /* CONFIG_HIGH_RES_TIMERS */
    1886             : 
    1887             : static inline void __hrtimer_peek_ahead_timers(void) { }
    1888             : 
    1889             : #endif  /* !CONFIG_HIGH_RES_TIMERS */
    1890             : 
    1891             : /*
    1892             :  * Called from run_local_timers in hardirq context every jiffy
    1893             :  */
    1894        2723 : void hrtimer_run_queues(void)
    1895             : {
    1896        2723 :         struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
    1897             :         unsigned long flags;
    1898             :         ktime_t now;
    1899             : 
    1900        2723 :         if (__hrtimer_hres_active(cpu_base))
    1901             :                 return;
    1902             : 
    1903             :         /*
    1904             :          * This _is_ ugly: We have to check periodically, whether we
    1905             :          * can switch to highres and / or nohz mode. The clocksource
    1906             :          * switch happens with xtime_lock held. Notification from
    1907             :          * there only sets the check bit in the tick_oneshot code,
    1908             :          * otherwise we might deadlock vs. xtime_lock.
    1909             :          */
    1910        2723 :         if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
    1911             :                 hrtimer_switch_to_hres();
    1912             :                 return;
    1913             :         }
    1914             : 
    1915        2723 :         raw_spin_lock_irqsave(&cpu_base->lock, flags);
    1916        2723 :         now = hrtimer_update_base(cpu_base);
    1917             : 
    1918        5446 :         if (!ktime_before(now, cpu_base->softirq_expires_next)) {
    1919           0 :                 cpu_base->softirq_expires_next = KTIME_MAX;
    1920           0 :                 cpu_base->softirq_activated = 1;
    1921           0 :                 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
    1922             :         }
    1923             : 
    1924        2723 :         __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
    1925        5446 :         raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
    1926             : }
    1927             : 
    1928             : /*
    1929             :  * Sleep related functions:
    1930             :  */
    1931           0 : static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
    1932             : {
    1933           0 :         struct hrtimer_sleeper *t =
    1934           0 :                 container_of(timer, struct hrtimer_sleeper, timer);
    1935           0 :         struct task_struct *task = t->task;
    1936             : 
    1937           0 :         t->task = NULL;
    1938           0 :         if (task)
    1939           0 :                 wake_up_process(task);
    1940             : 
    1941           0 :         return HRTIMER_NORESTART;
    1942             : }
    1943             : 
    1944             : /**
    1945             :  * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
    1946             :  * @sl:         sleeper to be started
    1947             :  * @mode:       timer mode abs/rel
    1948             :  *
    1949             :  * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
    1950             :  * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
    1951             :  */
    1952           0 : void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
    1953             :                                    enum hrtimer_mode mode)
    1954             : {
    1955             :         /*
    1956             :          * Make the enqueue delivery mode check work on RT. If the sleeper
    1957             :          * was initialized for hard interrupt delivery, force the mode bit.
    1958             :          * This is a special case for hrtimer_sleepers because
    1959             :          * hrtimer_init_sleeper() determines the delivery mode on RT so the
    1960             :          * fiddling with this decision is avoided at the call sites.
    1961             :          */
    1962             :         if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
    1963             :                 mode |= HRTIMER_MODE_HARD;
    1964             : 
    1965           0 :         hrtimer_start_expires(&sl->timer, mode);
    1966           0 : }
    1967             : EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
    1968             : 
    1969             : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
    1970             :                                    clockid_t clock_id, enum hrtimer_mode mode)
    1971             : {
    1972             :         /*
    1973             :          * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
    1974             :          * marked for hard interrupt expiry mode are moved into soft
    1975             :          * interrupt context either for latency reasons or because the
    1976             :          * hrtimer callback takes regular spinlocks or invokes other
    1977             :          * functions which are not suitable for hard interrupt context on
    1978             :          * PREEMPT_RT.
    1979             :          *
    1980             :          * The hrtimer_sleeper callback is RT compatible in hard interrupt
    1981             :          * context, but there is a latency concern: Untrusted userspace can
    1982             :          * spawn many threads which arm timers for the same expiry time on
    1983             :          * the same CPU. That causes a latency spike due to the wakeup of
    1984             :          * a gazillion threads.
    1985             :          *
    1986             :          * OTOH, privileged real-time user space applications rely on the
    1987             :          * low latency of hard interrupt wakeups. If the current task is in
    1988             :          * a real-time scheduling class, mark the mode for hard interrupt
    1989             :          * expiry.
    1990             :          */
    1991             :         if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
    1992             :                 if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
    1993             :                         mode |= HRTIMER_MODE_HARD;
    1994             :         }
    1995             : 
    1996           0 :         __hrtimer_init(&sl->timer, clock_id, mode);
    1997           0 :         sl->timer.function = hrtimer_wakeup;
    1998           0 :         sl->task = current;
    1999             : }
    2000             : 
    2001             : /**
    2002             :  * hrtimer_init_sleeper - initialize sleeper to the given clock
    2003             :  * @sl:         sleeper to be initialized
    2004             :  * @clock_id:   the clock to be used
    2005             :  * @mode:       timer mode abs/rel
    2006             :  */
    2007           0 : void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
    2008             :                           enum hrtimer_mode mode)
    2009             : {
    2010           0 :         debug_init(&sl->timer, clock_id, mode);
    2011           0 :         __hrtimer_init_sleeper(sl, clock_id, mode);
    2012             : 
    2013           0 : }
    2014             : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
    2015             : 
    2016           0 : int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
    2017             : {
    2018           0 :         switch(restart->nanosleep.type) {
    2019             : #ifdef CONFIG_COMPAT_32BIT_TIME
    2020             :         case TT_COMPAT:
    2021             :                 if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
    2022             :                         return -EFAULT;
    2023             :                 break;
    2024             : #endif
    2025             :         case TT_NATIVE:
    2026           0 :                 if (put_timespec64(ts, restart->nanosleep.rmtp))
    2027             :                         return -EFAULT;
    2028             :                 break;
    2029             :         default:
    2030           0 :                 BUG();
    2031             :         }
    2032           0 :         return -ERESTART_RESTARTBLOCK;
    2033             : }
    2034             : 
    2035           0 : static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
    2036             : {
    2037             :         struct restart_block *restart;
    2038             : 
    2039             :         do {
    2040           0 :                 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
    2041           0 :                 hrtimer_sleeper_start_expires(t, mode);
    2042             : 
    2043           0 :                 if (likely(t->task))
    2044           0 :                         schedule();
    2045             : 
    2046           0 :                 hrtimer_cancel(&t->timer);
    2047           0 :                 mode = HRTIMER_MODE_ABS;
    2048             : 
    2049           0 :         } while (t->task && !signal_pending(current));
    2050             : 
    2051           0 :         __set_current_state(TASK_RUNNING);
    2052             : 
    2053           0 :         if (!t->task)
    2054             :                 return 0;
    2055             : 
    2056           0 :         restart = &current->restart_block;
    2057           0 :         if (restart->nanosleep.type != TT_NONE) {
    2058           0 :                 ktime_t rem = hrtimer_expires_remaining(&t->timer);
    2059             :                 struct timespec64 rmt;
    2060             : 
    2061           0 :                 if (rem <= 0)
    2062             :                         return 0;
    2063           0 :                 rmt = ktime_to_timespec64(rem);
    2064             : 
    2065           0 :                 return nanosleep_copyout(restart, &rmt);
    2066             :         }
    2067             :         return -ERESTART_RESTARTBLOCK;
    2068             : }
    2069             : 
    2070           0 : static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
    2071             : {
    2072             :         struct hrtimer_sleeper t;
    2073             :         int ret;
    2074             : 
    2075           0 :         hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
    2076             :                                       HRTIMER_MODE_ABS);
    2077           0 :         hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
    2078           0 :         ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
    2079           0 :         destroy_hrtimer_on_stack(&t.timer);
    2080           0 :         return ret;
    2081             : }
    2082             : 
    2083           0 : long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
    2084             :                        const clockid_t clockid)
    2085             : {
    2086             :         struct restart_block *restart;
    2087             :         struct hrtimer_sleeper t;
    2088           0 :         int ret = 0;
    2089             :         u64 slack;
    2090             : 
    2091           0 :         slack = current->timer_slack_ns;
    2092           0 :         if (rt_task(current))
    2093           0 :                 slack = 0;
    2094             : 
    2095           0 :         hrtimer_init_sleeper_on_stack(&t, clockid, mode);
    2096           0 :         hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
    2097           0 :         ret = do_nanosleep(&t, mode);
    2098           0 :         if (ret != -ERESTART_RESTARTBLOCK)
    2099             :                 goto out;
    2100             : 
    2101             :         /* Absolute timers do not update the rmtp value and restart: */
    2102           0 :         if (mode == HRTIMER_MODE_ABS) {
    2103             :                 ret = -ERESTARTNOHAND;
    2104             :                 goto out;
    2105             :         }
    2106             : 
    2107           0 :         restart = &current->restart_block;
    2108           0 :         restart->nanosleep.clockid = t.timer.base->clockid;
    2109           0 :         restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
    2110           0 :         set_restart_fn(restart, hrtimer_nanosleep_restart);
    2111             : out:
    2112           0 :         destroy_hrtimer_on_stack(&t.timer);
    2113           0 :         return ret;
    2114             : }
    2115             : 
    2116             : #ifdef CONFIG_64BIT
    2117             : 
    2118           0 : SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
    2119             :                 struct __kernel_timespec __user *, rmtp)
    2120             : {
    2121             :         struct timespec64 tu;
    2122             : 
    2123           0 :         if (get_timespec64(&tu, rqtp))
    2124             :                 return -EFAULT;
    2125             : 
    2126           0 :         if (!timespec64_valid(&tu))
    2127             :                 return -EINVAL;
    2128             : 
    2129           0 :         current->restart_block.fn = do_no_restart_syscall;
    2130           0 :         current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
    2131           0 :         current->restart_block.nanosleep.rmtp = rmtp;
    2132           0 :         return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
    2133             :                                  CLOCK_MONOTONIC);
    2134             : }
    2135             : 
    2136             : #endif
    2137             : 
    2138             : #ifdef CONFIG_COMPAT_32BIT_TIME
    2139             : 
    2140             : SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
    2141             :                        struct old_timespec32 __user *, rmtp)
    2142             : {
    2143             :         struct timespec64 tu;
    2144             : 
    2145             :         if (get_old_timespec32(&tu, rqtp))
    2146             :                 return -EFAULT;
    2147             : 
    2148             :         if (!timespec64_valid(&tu))
    2149             :                 return -EINVAL;
    2150             : 
    2151             :         current->restart_block.fn = do_no_restart_syscall;
    2152             :         current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
    2153             :         current->restart_block.nanosleep.compat_rmtp = rmtp;
    2154             :         return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
    2155             :                                  CLOCK_MONOTONIC);
    2156             : }
    2157             : #endif
    2158             : 
    2159             : /*
    2160             :  * Functions related to boot-time initialization:
    2161             :  */
    2162           0 : int hrtimers_prepare_cpu(unsigned int cpu)
    2163             : {
    2164           1 :         struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
    2165             :         int i;
    2166             : 
    2167           9 :         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
    2168           8 :                 struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
    2169             : 
    2170           8 :                 clock_b->cpu_base = cpu_base;
    2171          16 :                 seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
    2172          16 :                 timerqueue_init_head(&clock_b->active);
    2173             :         }
    2174             : 
    2175           1 :         cpu_base->cpu = cpu;
    2176           1 :         cpu_base->active_bases = 0;
    2177           1 :         cpu_base->hres_active = 0;
    2178           1 :         cpu_base->hang_detected = 0;
    2179           1 :         cpu_base->next_timer = NULL;
    2180           1 :         cpu_base->softirq_next_timer = NULL;
    2181           1 :         cpu_base->expires_next = KTIME_MAX;
    2182           1 :         cpu_base->softirq_expires_next = KTIME_MAX;
    2183           1 :         hrtimer_cpu_base_init_expiry_lock(cpu_base);
    2184           0 :         return 0;
    2185             : }
    2186             : 
    2187             : #ifdef CONFIG_HOTPLUG_CPU
    2188             : 
    2189             : static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
    2190             :                                 struct hrtimer_clock_base *new_base)
    2191             : {
    2192             :         struct hrtimer *timer;
    2193             :         struct timerqueue_node *node;
    2194             : 
    2195             :         while ((node = timerqueue_getnext(&old_base->active))) {
    2196             :                 timer = container_of(node, struct hrtimer, node);
    2197             :                 BUG_ON(hrtimer_callback_running(timer));
    2198             :                 debug_deactivate(timer);
    2199             : 
    2200             :                 /*
    2201             :                  * Mark it as ENQUEUED not INACTIVE otherwise the
    2202             :                  * timer could be seen as !active and just vanish away
    2203             :                  * under us on another CPU
    2204             :                  */
    2205             :                 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
    2206             :                 timer->base = new_base;
    2207             :                 /*
    2208             :                  * Enqueue the timers on the new cpu. This does not
    2209             :                  * reprogram the event device in case the timer
    2210             :                  * expires before the earliest on this CPU, but we run
    2211             :                  * hrtimer_interrupt after we migrated everything to
    2212             :                  * sort out already expired timers and reprogram the
    2213             :                  * event device.
    2214             :                  */
    2215             :                 enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
    2216             :         }
    2217             : }
    2218             : 
    2219             : int hrtimers_dead_cpu(unsigned int scpu)
    2220             : {
    2221             :         struct hrtimer_cpu_base *old_base, *new_base;
    2222             :         int i;
    2223             : 
    2224             :         BUG_ON(cpu_online(scpu));
    2225             :         tick_cancel_sched_timer(scpu);
    2226             : 
    2227             :         /*
    2228             :          * this BH disable ensures that raise_softirq_irqoff() does
    2229             :          * not wakeup ksoftirqd (and acquire the pi-lock) while
    2230             :          * holding the cpu_base lock
    2231             :          */
    2232             :         local_bh_disable();
    2233             :         local_irq_disable();
    2234             :         old_base = &per_cpu(hrtimer_bases, scpu);
    2235             :         new_base = this_cpu_ptr(&hrtimer_bases);
    2236             :         /*
    2237             :          * The caller is globally serialized and nobody else
    2238             :          * takes two locks at once, deadlock is not possible.
    2239             :          */
    2240             :         raw_spin_lock(&new_base->lock);
    2241             :         raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
    2242             : 
    2243             :         for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
    2244             :                 migrate_hrtimer_list(&old_base->clock_base[i],
    2245             :                                      &new_base->clock_base[i]);
    2246             :         }
    2247             : 
    2248             :         /*
    2249             :          * The migration might have changed the first expiring softirq
    2250             :          * timer on this CPU. Update it.
    2251             :          */
    2252             :         hrtimer_update_softirq_timer(new_base, false);
    2253             : 
    2254             :         raw_spin_unlock(&old_base->lock);
    2255             :         raw_spin_unlock(&new_base->lock);
    2256             : 
    2257             :         /* Check, if we got expired work to do */
    2258             :         __hrtimer_peek_ahead_timers();
    2259             :         local_irq_enable();
    2260             :         local_bh_enable();
    2261             :         return 0;
    2262             : }
    2263             : 
    2264             : #endif /* CONFIG_HOTPLUG_CPU */
    2265             : 
    2266           1 : void __init hrtimers_init(void)
    2267             : {
    2268           1 :         hrtimers_prepare_cpu(smp_processor_id());
    2269           1 :         open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
    2270           1 : }
    2271             : 
    2272             : /**
    2273             :  * schedule_hrtimeout_range_clock - sleep until timeout
    2274             :  * @expires:    timeout value (ktime_t)
    2275             :  * @delta:      slack in expires timeout (ktime_t) for SCHED_OTHER tasks
    2276             :  * @mode:       timer mode
    2277             :  * @clock_id:   timer clock to be used
    2278             :  */
    2279             : int __sched
    2280           0 : schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
    2281             :                                const enum hrtimer_mode mode, clockid_t clock_id)
    2282             : {
    2283             :         struct hrtimer_sleeper t;
    2284             : 
    2285             :         /*
    2286             :          * Optimize when a zero timeout value is given. It does not
    2287             :          * matter whether this is an absolute or a relative time.
    2288             :          */
    2289           0 :         if (expires && *expires == 0) {
    2290           0 :                 __set_current_state(TASK_RUNNING);
    2291           0 :                 return 0;
    2292             :         }
    2293             : 
    2294             :         /*
    2295             :          * A NULL parameter means "infinite"
    2296             :          */
    2297           0 :         if (!expires) {
    2298           0 :                 schedule();
    2299           0 :                 return -EINTR;
    2300             :         }
    2301             : 
    2302             :         /*
    2303             :          * Override any slack passed by the user if under
    2304             :          * rt contraints.
    2305             :          */
    2306           0 :         if (rt_task(current))
    2307           0 :                 delta = 0;
    2308             : 
    2309           0 :         hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
    2310           0 :         hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
    2311           0 :         hrtimer_sleeper_start_expires(&t, mode);
    2312             : 
    2313           0 :         if (likely(t.task))
    2314           0 :                 schedule();
    2315             : 
    2316           0 :         hrtimer_cancel(&t.timer);
    2317           0 :         destroy_hrtimer_on_stack(&t.timer);
    2318             : 
    2319           0 :         __set_current_state(TASK_RUNNING);
    2320             : 
    2321           0 :         return !t.task ? 0 : -EINTR;
    2322             : }
    2323             : EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
    2324             : 
    2325             : /**
    2326             :  * schedule_hrtimeout_range - sleep until timeout
    2327             :  * @expires:    timeout value (ktime_t)
    2328             :  * @delta:      slack in expires timeout (ktime_t) for SCHED_OTHER tasks
    2329             :  * @mode:       timer mode
    2330             :  *
    2331             :  * Make the current task sleep until the given expiry time has
    2332             :  * elapsed. The routine will return immediately unless
    2333             :  * the current task state has been set (see set_current_state()).
    2334             :  *
    2335             :  * The @delta argument gives the kernel the freedom to schedule the
    2336             :  * actual wakeup to a time that is both power and performance friendly
    2337             :  * for regular (non RT/DL) tasks.
    2338             :  * The kernel give the normal best effort behavior for "@expires+@delta",
    2339             :  * but may decide to fire the timer earlier, but no earlier than @expires.
    2340             :  *
    2341             :  * You can set the task state as follows -
    2342             :  *
    2343             :  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
    2344             :  * pass before the routine returns unless the current task is explicitly
    2345             :  * woken up, (e.g. by wake_up_process()).
    2346             :  *
    2347             :  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
    2348             :  * delivered to the current task or the current task is explicitly woken
    2349             :  * up.
    2350             :  *
    2351             :  * The current task state is guaranteed to be TASK_RUNNING when this
    2352             :  * routine returns.
    2353             :  *
    2354             :  * Returns 0 when the timer has expired. If the task was woken before the
    2355             :  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
    2356             :  * by an explicit wakeup, it returns -EINTR.
    2357             :  */
    2358           0 : int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
    2359             :                                      const enum hrtimer_mode mode)
    2360             : {
    2361           0 :         return schedule_hrtimeout_range_clock(expires, delta, mode,
    2362             :                                               CLOCK_MONOTONIC);
    2363             : }
    2364             : EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
    2365             : 
    2366             : /**
    2367             :  * schedule_hrtimeout - sleep until timeout
    2368             :  * @expires:    timeout value (ktime_t)
    2369             :  * @mode:       timer mode
    2370             :  *
    2371             :  * Make the current task sleep until the given expiry time has
    2372             :  * elapsed. The routine will return immediately unless
    2373             :  * the current task state has been set (see set_current_state()).
    2374             :  *
    2375             :  * You can set the task state as follows -
    2376             :  *
    2377             :  * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
    2378             :  * pass before the routine returns unless the current task is explicitly
    2379             :  * woken up, (e.g. by wake_up_process()).
    2380             :  *
    2381             :  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
    2382             :  * delivered to the current task or the current task is explicitly woken
    2383             :  * up.
    2384             :  *
    2385             :  * The current task state is guaranteed to be TASK_RUNNING when this
    2386             :  * routine returns.
    2387             :  *
    2388             :  * Returns 0 when the timer has expired. If the task was woken before the
    2389             :  * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
    2390             :  * by an explicit wakeup, it returns -EINTR.
    2391             :  */
    2392           0 : int __sched schedule_hrtimeout(ktime_t *expires,
    2393             :                                const enum hrtimer_mode mode)
    2394             : {
    2395           0 :         return schedule_hrtimeout_range(expires, 0, mode);
    2396             : }
    2397             : EXPORT_SYMBOL_GPL(schedule_hrtimeout);

Generated by: LCOV version 1.14