LCOV - code coverage report
Current view: top level - kernel/time - timer.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 271 391 69.3 %
Date: 2023-07-19 18:55:55 Functions: 31 51 60.8 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  Kernel internal timers
       4             :  *
       5             :  *  Copyright (C) 1991, 1992  Linus Torvalds
       6             :  *
       7             :  *  1997-01-28  Modified by Finn Arne Gangstad to make timers scale better.
       8             :  *
       9             :  *  1997-09-10  Updated NTP code according to technical memorandum Jan '96
      10             :  *              "A Kernel Model for Precision Timekeeping" by Dave Mills
      11             :  *  1998-12-24  Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
      12             :  *              serialize accesses to xtime/lost_ticks).
      13             :  *                              Copyright (C) 1998  Andrea Arcangeli
      14             :  *  1999-03-10  Improved NTP compatibility by Ulrich Windl
      15             :  *  2002-05-31  Move sys_sysinfo here and make its locking sane, Robert Love
      16             :  *  2000-10-05  Implemented scalable SMP per-CPU timer handling.
      17             :  *                              Copyright (C) 2000, 2001, 2002  Ingo Molnar
      18             :  *              Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
      19             :  */
      20             : 
      21             : #include <linux/kernel_stat.h>
      22             : #include <linux/export.h>
      23             : #include <linux/interrupt.h>
      24             : #include <linux/percpu.h>
      25             : #include <linux/init.h>
      26             : #include <linux/mm.h>
      27             : #include <linux/swap.h>
      28             : #include <linux/pid_namespace.h>
      29             : #include <linux/notifier.h>
      30             : #include <linux/thread_info.h>
      31             : #include <linux/time.h>
      32             : #include <linux/jiffies.h>
      33             : #include <linux/posix-timers.h>
      34             : #include <linux/cpu.h>
      35             : #include <linux/syscalls.h>
      36             : #include <linux/delay.h>
      37             : #include <linux/tick.h>
      38             : #include <linux/kallsyms.h>
      39             : #include <linux/irq_work.h>
      40             : #include <linux/sched/signal.h>
      41             : #include <linux/sched/sysctl.h>
      42             : #include <linux/sched/nohz.h>
      43             : #include <linux/sched/debug.h>
      44             : #include <linux/slab.h>
      45             : #include <linux/compat.h>
      46             : #include <linux/random.h>
      47             : #include <linux/sysctl.h>
      48             : 
      49             : #include <linux/uaccess.h>
      50             : #include <asm/unistd.h>
      51             : #include <asm/div64.h>
      52             : #include <asm/timex.h>
      53             : #include <asm/io.h>
      54             : 
      55             : #include "tick-internal.h"
      56             : 
      57             : #define CREATE_TRACE_POINTS
      58             : #include <trace/events/timer.h>
      59             : 
      60             : __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
      61             : 
      62             : EXPORT_SYMBOL(jiffies_64);
      63             : 
      64             : /*
      65             :  * The timer wheel has LVL_DEPTH array levels. Each level provides an array of
      66             :  * LVL_SIZE buckets. Each level is driven by its own clock and therefor each
      67             :  * level has a different granularity.
      68             :  *
      69             :  * The level granularity is:            LVL_CLK_DIV ^ lvl
      70             :  * The level clock frequency is:        HZ / (LVL_CLK_DIV ^ level)
      71             :  *
      72             :  * The array level of a newly armed timer depends on the relative expiry
      73             :  * time. The farther the expiry time is away the higher the array level and
      74             :  * therefor the granularity becomes.
      75             :  *
      76             :  * Contrary to the original timer wheel implementation, which aims for 'exact'
      77             :  * expiry of the timers, this implementation removes the need for recascading
      78             :  * the timers into the lower array levels. The previous 'classic' timer wheel
      79             :  * implementation of the kernel already violated the 'exact' expiry by adding
      80             :  * slack to the expiry time to provide batched expiration. The granularity
      81             :  * levels provide implicit batching.
      82             :  *
      83             :  * This is an optimization of the original timer wheel implementation for the
      84             :  * majority of the timer wheel use cases: timeouts. The vast majority of
      85             :  * timeout timers (networking, disk I/O ...) are canceled before expiry. If
      86             :  * the timeout expires it indicates that normal operation is disturbed, so it
      87             :  * does not matter much whether the timeout comes with a slight delay.
      88             :  *
      89             :  * The only exception to this are networking timers with a small expiry
      90             :  * time. They rely on the granularity. Those fit into the first wheel level,
      91             :  * which has HZ granularity.
      92             :  *
      93             :  * We don't have cascading anymore. timers with a expiry time above the
      94             :  * capacity of the last wheel level are force expired at the maximum timeout
      95             :  * value of the last wheel level. From data sampling we know that the maximum
      96             :  * value observed is 5 days (network connection tracking), so this should not
      97             :  * be an issue.
      98             :  *
      99             :  * The currently chosen array constants values are a good compromise between
     100             :  * array size and granularity.
     101             :  *
     102             :  * This results in the following granularity and range levels:
     103             :  *
     104             :  * HZ 1000 steps
     105             :  * Level Offset  Granularity            Range
     106             :  *  0      0         1 ms                0 ms -         63 ms
     107             :  *  1     64         8 ms               64 ms -        511 ms
     108             :  *  2    128        64 ms              512 ms -       4095 ms (512ms - ~4s)
     109             :  *  3    192       512 ms             4096 ms -      32767 ms (~4s - ~32s)
     110             :  *  4    256      4096 ms (~4s)      32768 ms -     262143 ms (~32s - ~4m)
     111             :  *  5    320     32768 ms (~32s)    262144 ms -    2097151 ms (~4m - ~34m)
     112             :  *  6    384    262144 ms (~4m)    2097152 ms -   16777215 ms (~34m - ~4h)
     113             :  *  7    448   2097152 ms (~34m)  16777216 ms -  134217727 ms (~4h - ~1d)
     114             :  *  8    512  16777216 ms (~4h)  134217728 ms - 1073741822 ms (~1d - ~12d)
     115             :  *
     116             :  * HZ  300
     117             :  * Level Offset  Granularity            Range
     118             :  *  0      0         3 ms                0 ms -        210 ms
     119             :  *  1     64        26 ms              213 ms -       1703 ms (213ms - ~1s)
     120             :  *  2    128       213 ms             1706 ms -      13650 ms (~1s - ~13s)
     121             :  *  3    192      1706 ms (~1s)      13653 ms -     109223 ms (~13s - ~1m)
     122             :  *  4    256     13653 ms (~13s)    109226 ms -     873810 ms (~1m - ~14m)
     123             :  *  5    320    109226 ms (~1m)     873813 ms -    6990503 ms (~14m - ~1h)
     124             :  *  6    384    873813 ms (~14m)   6990506 ms -   55924050 ms (~1h - ~15h)
     125             :  *  7    448   6990506 ms (~1h)   55924053 ms -  447392423 ms (~15h - ~5d)
     126             :  *  8    512  55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d)
     127             :  *
     128             :  * HZ  250
     129             :  * Level Offset  Granularity            Range
     130             :  *  0      0         4 ms                0 ms -        255 ms
     131             :  *  1     64        32 ms              256 ms -       2047 ms (256ms - ~2s)
     132             :  *  2    128       256 ms             2048 ms -      16383 ms (~2s - ~16s)
     133             :  *  3    192      2048 ms (~2s)      16384 ms -     131071 ms (~16s - ~2m)
     134             :  *  4    256     16384 ms (~16s)    131072 ms -    1048575 ms (~2m - ~17m)
     135             :  *  5    320    131072 ms (~2m)    1048576 ms -    8388607 ms (~17m - ~2h)
     136             :  *  6    384   1048576 ms (~17m)   8388608 ms -   67108863 ms (~2h - ~18h)
     137             :  *  7    448   8388608 ms (~2h)   67108864 ms -  536870911 ms (~18h - ~6d)
     138             :  *  8    512  67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d)
     139             :  *
     140             :  * HZ  100
     141             :  * Level Offset  Granularity            Range
     142             :  *  0      0         10 ms               0 ms -        630 ms
     143             :  *  1     64         80 ms             640 ms -       5110 ms (640ms - ~5s)
     144             :  *  2    128        640 ms            5120 ms -      40950 ms (~5s - ~40s)
     145             :  *  3    192       5120 ms (~5s)     40960 ms -     327670 ms (~40s - ~5m)
     146             :  *  4    256      40960 ms (~40s)   327680 ms -    2621430 ms (~5m - ~43m)
     147             :  *  5    320     327680 ms (~5m)   2621440 ms -   20971510 ms (~43m - ~5h)
     148             :  *  6    384    2621440 ms (~43m) 20971520 ms -  167772150 ms (~5h - ~1d)
     149             :  *  7    448   20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d)
     150             :  */
     151             : 
     152             : /* Clock divisor for the next level */
     153             : #define LVL_CLK_SHIFT   3
     154             : #define LVL_CLK_DIV     (1UL << LVL_CLK_SHIFT)
     155             : #define LVL_CLK_MASK    (LVL_CLK_DIV - 1)
     156             : #define LVL_SHIFT(n)    ((n) * LVL_CLK_SHIFT)
     157             : #define LVL_GRAN(n)     (1UL << LVL_SHIFT(n))
     158             : 
     159             : /*
     160             :  * The time start value for each level to select the bucket at enqueue
     161             :  * time. We start from the last possible delta of the previous level
     162             :  * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()).
     163             :  */
     164             : #define LVL_START(n)    ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT))
     165             : 
     166             : /* Size of each clock level */
     167             : #define LVL_BITS        6
     168             : #define LVL_SIZE        (1UL << LVL_BITS)
     169             : #define LVL_MASK        (LVL_SIZE - 1)
     170             : #define LVL_OFFS(n)     ((n) * LVL_SIZE)
     171             : 
     172             : /* Level depth */
     173             : #if HZ > 100
     174             : # define LVL_DEPTH      9
     175             : # else
     176             : # define LVL_DEPTH      8
     177             : #endif
     178             : 
     179             : /* The cutoff (max. capacity of the wheel) */
     180             : #define WHEEL_TIMEOUT_CUTOFF    (LVL_START(LVL_DEPTH))
     181             : #define WHEEL_TIMEOUT_MAX       (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1))
     182             : 
     183             : /*
     184             :  * The resulting wheel size. If NOHZ is configured we allocate two
     185             :  * wheels so we have a separate storage for the deferrable timers.
     186             :  */
     187             : #define WHEEL_SIZE      (LVL_SIZE * LVL_DEPTH)
     188             : 
     189             : #ifdef CONFIG_NO_HZ_COMMON
     190             : # define NR_BASES       2
     191             : # define BASE_STD       0
     192             : # define BASE_DEF       1
     193             : #else
     194             : # define NR_BASES       1
     195             : # define BASE_STD       0
     196             : # define BASE_DEF       0
     197             : #endif
     198             : 
     199             : struct timer_base {
     200             :         raw_spinlock_t          lock;
     201             :         struct timer_list       *running_timer;
     202             : #ifdef CONFIG_PREEMPT_RT
     203             :         spinlock_t              expiry_lock;
     204             :         atomic_t                timer_waiters;
     205             : #endif
     206             :         unsigned long           clk;
     207             :         unsigned long           next_expiry;
     208             :         unsigned int            cpu;
     209             :         bool                    next_expiry_recalc;
     210             :         bool                    is_idle;
     211             :         bool                    timers_pending;
     212             :         DECLARE_BITMAP(pending_map, WHEEL_SIZE);
     213             :         struct hlist_head       vectors[WHEEL_SIZE];
     214             : } ____cacheline_aligned;
     215             : 
     216             : static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]);
     217             : 
     218             : #ifdef CONFIG_NO_HZ_COMMON
     219             : 
     220             : static DEFINE_STATIC_KEY_FALSE(timers_nohz_active);
     221             : static DEFINE_MUTEX(timer_keys_mutex);
     222             : 
     223             : static void timer_update_keys(struct work_struct *work);
     224             : static DECLARE_WORK(timer_update_work, timer_update_keys);
     225             : 
     226             : #ifdef CONFIG_SMP
     227             : static unsigned int sysctl_timer_migration = 1;
     228             : 
     229             : DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
     230             : 
     231             : static void timers_update_migration(void)
     232             : {
     233             :         if (sysctl_timer_migration && tick_nohz_active)
     234             :                 static_branch_enable(&timers_migration_enabled);
     235             :         else
     236             :                 static_branch_disable(&timers_migration_enabled);
     237             : }
     238             : 
     239             : #ifdef CONFIG_SYSCTL
     240             : static int timer_migration_handler(struct ctl_table *table, int write,
     241             :                             void *buffer, size_t *lenp, loff_t *ppos)
     242             : {
     243             :         int ret;
     244             : 
     245             :         mutex_lock(&timer_keys_mutex);
     246             :         ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
     247             :         if (!ret && write)
     248             :                 timers_update_migration();
     249             :         mutex_unlock(&timer_keys_mutex);
     250             :         return ret;
     251             : }
     252             : 
     253             : static struct ctl_table timer_sysctl[] = {
     254             :         {
     255             :                 .procname       = "timer_migration",
     256             :                 .data           = &sysctl_timer_migration,
     257             :                 .maxlen         = sizeof(unsigned int),
     258             :                 .mode           = 0644,
     259             :                 .proc_handler   = timer_migration_handler,
     260             :                 .extra1         = SYSCTL_ZERO,
     261             :                 .extra2         = SYSCTL_ONE,
     262             :         },
     263             :         {}
     264             : };
     265             : 
     266             : static int __init timer_sysctl_init(void)
     267             : {
     268             :         register_sysctl("kernel", timer_sysctl);
     269             :         return 0;
     270             : }
     271             : device_initcall(timer_sysctl_init);
     272             : #endif /* CONFIG_SYSCTL */
     273             : #else /* CONFIG_SMP */
     274             : static inline void timers_update_migration(void) { }
     275             : #endif /* !CONFIG_SMP */
     276             : 
     277             : static void timer_update_keys(struct work_struct *work)
     278             : {
     279             :         mutex_lock(&timer_keys_mutex);
     280             :         timers_update_migration();
     281             :         static_branch_enable(&timers_nohz_active);
     282             :         mutex_unlock(&timer_keys_mutex);
     283             : }
     284             : 
     285             : void timers_update_nohz(void)
     286             : {
     287             :         schedule_work(&timer_update_work);
     288             : }
     289             : 
     290             : static inline bool is_timers_nohz_active(void)
     291             : {
     292             :         return static_branch_unlikely(&timers_nohz_active);
     293             : }
     294             : #else
     295             : static inline bool is_timers_nohz_active(void) { return false; }
     296             : #endif /* NO_HZ_COMMON */
     297             : 
     298             : static unsigned long round_jiffies_common(unsigned long j, int cpu,
     299             :                 bool force_up)
     300             : {
     301             :         int rem;
     302           0 :         unsigned long original = j;
     303             : 
     304             :         /*
     305             :          * We don't want all cpus firing their timers at once hitting the
     306             :          * same lock or cachelines, so we skew each extra cpu with an extra
     307             :          * 3 jiffies. This 3 jiffies came originally from the mm/ code which
     308             :          * already did this.
     309             :          * The skew is done by adding 3*cpunr, then round, then subtract this
     310             :          * extra offset again.
     311             :          */
     312           0 :         j += cpu * 3;
     313             : 
     314           0 :         rem = j % HZ;
     315             : 
     316             :         /*
     317             :          * If the target jiffie is just after a whole second (which can happen
     318             :          * due to delays of the timer irq, long irq off times etc etc) then
     319             :          * we should round down to the whole second, not up. Use 1/4th second
     320             :          * as cutoff for this rounding as an extreme upper bound for this.
     321             :          * But never round down if @force_up is set.
     322             :          */
     323           0 :         if (rem < HZ/4 && !force_up) /* round down */
     324           0 :                 j = j - rem;
     325             :         else /* round up */
     326           0 :                 j = j - rem + HZ;
     327             : 
     328             :         /* now that we have rounded, subtract the extra skew again */
     329           0 :         j -= cpu * 3;
     330             : 
     331             :         /*
     332             :          * Make sure j is still in the future. Otherwise return the
     333             :          * unmodified value.
     334             :          */
     335           0 :         return time_is_after_jiffies(j) ? j : original;
     336             : }
     337             : 
     338             : /**
     339             :  * __round_jiffies - function to round jiffies to a full second
     340             :  * @j: the time in (absolute) jiffies that should be rounded
     341             :  * @cpu: the processor number on which the timeout will happen
     342             :  *
     343             :  * __round_jiffies() rounds an absolute time in the future (in jiffies)
     344             :  * up or down to (approximately) full seconds. This is useful for timers
     345             :  * for which the exact time they fire does not matter too much, as long as
     346             :  * they fire approximately every X seconds.
     347             :  *
     348             :  * By rounding these timers to whole seconds, all such timers will fire
     349             :  * at the same time, rather than at various times spread out. The goal
     350             :  * of this is to have the CPU wake up less, which saves power.
     351             :  *
     352             :  * The exact rounding is skewed for each processor to avoid all
     353             :  * processors firing at the exact same time, which could lead
     354             :  * to lock contention or spurious cache line bouncing.
     355             :  *
     356             :  * The return value is the rounded version of the @j parameter.
     357             :  */
     358           0 : unsigned long __round_jiffies(unsigned long j, int cpu)
     359             : {
     360           0 :         return round_jiffies_common(j, cpu, false);
     361             : }
     362             : EXPORT_SYMBOL_GPL(__round_jiffies);
     363             : 
     364             : /**
     365             :  * __round_jiffies_relative - function to round jiffies to a full second
     366             :  * @j: the time in (relative) jiffies that should be rounded
     367             :  * @cpu: the processor number on which the timeout will happen
     368             :  *
     369             :  * __round_jiffies_relative() rounds a time delta  in the future (in jiffies)
     370             :  * up or down to (approximately) full seconds. This is useful for timers
     371             :  * for which the exact time they fire does not matter too much, as long as
     372             :  * they fire approximately every X seconds.
     373             :  *
     374             :  * By rounding these timers to whole seconds, all such timers will fire
     375             :  * at the same time, rather than at various times spread out. The goal
     376             :  * of this is to have the CPU wake up less, which saves power.
     377             :  *
     378             :  * The exact rounding is skewed for each processor to avoid all
     379             :  * processors firing at the exact same time, which could lead
     380             :  * to lock contention or spurious cache line bouncing.
     381             :  *
     382             :  * The return value is the rounded version of the @j parameter.
     383             :  */
     384           0 : unsigned long __round_jiffies_relative(unsigned long j, int cpu)
     385             : {
     386           0 :         unsigned long j0 = jiffies;
     387             : 
     388             :         /* Use j0 because jiffies might change while we run */
     389           0 :         return round_jiffies_common(j + j0, cpu, false) - j0;
     390             : }
     391             : EXPORT_SYMBOL_GPL(__round_jiffies_relative);
     392             : 
     393             : /**
     394             :  * round_jiffies - function to round jiffies to a full second
     395             :  * @j: the time in (absolute) jiffies that should be rounded
     396             :  *
     397             :  * round_jiffies() rounds an absolute time in the future (in jiffies)
     398             :  * up or down to (approximately) full seconds. This is useful for timers
     399             :  * for which the exact time they fire does not matter too much, as long as
     400             :  * they fire approximately every X seconds.
     401             :  *
     402             :  * By rounding these timers to whole seconds, all such timers will fire
     403             :  * at the same time, rather than at various times spread out. The goal
     404             :  * of this is to have the CPU wake up less, which saves power.
     405             :  *
     406             :  * The return value is the rounded version of the @j parameter.
     407             :  */
     408           0 : unsigned long round_jiffies(unsigned long j)
     409             : {
     410           0 :         return round_jiffies_common(j, raw_smp_processor_id(), false);
     411             : }
     412             : EXPORT_SYMBOL_GPL(round_jiffies);
     413             : 
     414             : /**
     415             :  * round_jiffies_relative - function to round jiffies to a full second
     416             :  * @j: the time in (relative) jiffies that should be rounded
     417             :  *
     418             :  * round_jiffies_relative() rounds a time delta  in the future (in jiffies)
     419             :  * up or down to (approximately) full seconds. This is useful for timers
     420             :  * for which the exact time they fire does not matter too much, as long as
     421             :  * they fire approximately every X seconds.
     422             :  *
     423             :  * By rounding these timers to whole seconds, all such timers will fire
     424             :  * at the same time, rather than at various times spread out. The goal
     425             :  * of this is to have the CPU wake up less, which saves power.
     426             :  *
     427             :  * The return value is the rounded version of the @j parameter.
     428             :  */
     429           0 : unsigned long round_jiffies_relative(unsigned long j)
     430             : {
     431           0 :         return __round_jiffies_relative(j, raw_smp_processor_id());
     432             : }
     433             : EXPORT_SYMBOL_GPL(round_jiffies_relative);
     434             : 
     435             : /**
     436             :  * __round_jiffies_up - function to round jiffies up to a full second
     437             :  * @j: the time in (absolute) jiffies that should be rounded
     438             :  * @cpu: the processor number on which the timeout will happen
     439             :  *
     440             :  * This is the same as __round_jiffies() except that it will never
     441             :  * round down.  This is useful for timeouts for which the exact time
     442             :  * of firing does not matter too much, as long as they don't fire too
     443             :  * early.
     444             :  */
     445           0 : unsigned long __round_jiffies_up(unsigned long j, int cpu)
     446             : {
     447           0 :         return round_jiffies_common(j, cpu, true);
     448             : }
     449             : EXPORT_SYMBOL_GPL(__round_jiffies_up);
     450             : 
     451             : /**
     452             :  * __round_jiffies_up_relative - function to round jiffies up to a full second
     453             :  * @j: the time in (relative) jiffies that should be rounded
     454             :  * @cpu: the processor number on which the timeout will happen
     455             :  *
     456             :  * This is the same as __round_jiffies_relative() except that it will never
     457             :  * round down.  This is useful for timeouts for which the exact time
     458             :  * of firing does not matter too much, as long as they don't fire too
     459             :  * early.
     460             :  */
     461           0 : unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
     462             : {
     463           0 :         unsigned long j0 = jiffies;
     464             : 
     465             :         /* Use j0 because jiffies might change while we run */
     466           0 :         return round_jiffies_common(j + j0, cpu, true) - j0;
     467             : }
     468             : EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
     469             : 
     470             : /**
     471             :  * round_jiffies_up - function to round jiffies up to a full second
     472             :  * @j: the time in (absolute) jiffies that should be rounded
     473             :  *
     474             :  * This is the same as round_jiffies() except that it will never
     475             :  * round down.  This is useful for timeouts for which the exact time
     476             :  * of firing does not matter too much, as long as they don't fire too
     477             :  * early.
     478             :  */
     479           0 : unsigned long round_jiffies_up(unsigned long j)
     480             : {
     481           0 :         return round_jiffies_common(j, raw_smp_processor_id(), true);
     482             : }
     483             : EXPORT_SYMBOL_GPL(round_jiffies_up);
     484             : 
     485             : /**
     486             :  * round_jiffies_up_relative - function to round jiffies up to a full second
     487             :  * @j: the time in (relative) jiffies that should be rounded
     488             :  *
     489             :  * This is the same as round_jiffies_relative() except that it will never
     490             :  * round down.  This is useful for timeouts for which the exact time
     491             :  * of firing does not matter too much, as long as they don't fire too
     492             :  * early.
     493             :  */
     494           0 : unsigned long round_jiffies_up_relative(unsigned long j)
     495             : {
     496           0 :         return __round_jiffies_up_relative(j, raw_smp_processor_id());
     497             : }
     498             : EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
     499             : 
     500             : 
     501             : static inline unsigned int timer_get_idx(struct timer_list *timer)
     502             : {
     503         858 :         return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT;
     504             : }
     505             : 
     506             : static inline void timer_set_idx(struct timer_list *timer, unsigned int idx)
     507             : {
     508         928 :         timer->flags = (timer->flags & ~TIMER_ARRAYMASK) |
     509         464 :                         idx << TIMER_ARRAYSHIFT;
     510             : }
     511             : 
     512             : /*
     513             :  * Helper function to calculate the array index for a given expiry
     514             :  * time.
     515             :  */
     516             : static inline unsigned calc_index(unsigned long expires, unsigned lvl,
     517             :                                   unsigned long *bucket_expiry)
     518             : {
     519             : 
     520             :         /*
     521             :          * The timer wheel has to guarantee that a timer does not fire
     522             :          * early. Early expiry can happen due to:
     523             :          * - Timer is armed at the edge of a tick
     524             :          * - Truncation of the expiry time in the outer wheel levels
     525             :          *
     526             :          * Round up with level granularity to prevent this.
     527             :          */
     528         464 :         expires = (expires >> LVL_SHIFT(lvl)) + 1;
     529         464 :         *bucket_expiry = expires << LVL_SHIFT(lvl);
     530         464 :         return LVL_OFFS(lvl) + (expires & LVL_MASK);
     531             : }
     532             : 
     533         464 : static int calc_wheel_index(unsigned long expires, unsigned long clk,
     534             :                             unsigned long *bucket_expiry)
     535             : {
     536         464 :         unsigned long delta = expires - clk;
     537             :         unsigned int idx;
     538             : 
     539         464 :         if (delta < LVL_START(1)) {
     540          86 :                 idx = calc_index(expires, 0, bucket_expiry);
     541         378 :         } else if (delta < LVL_START(2)) {
     542           7 :                 idx = calc_index(expires, 1, bucket_expiry);
     543         371 :         } else if (delta < LVL_START(3)) {
     544           3 :                 idx = calc_index(expires, 2, bucket_expiry);
     545         368 :         } else if (delta < LVL_START(4)) {
     546         367 :                 idx = calc_index(expires, 3, bucket_expiry);
     547           1 :         } else if (delta < LVL_START(5)) {
     548           0 :                 idx = calc_index(expires, 4, bucket_expiry);
     549           1 :         } else if (delta < LVL_START(6)) {
     550           0 :                 idx = calc_index(expires, 5, bucket_expiry);
     551           1 :         } else if (delta < LVL_START(7)) {
     552           1 :                 idx = calc_index(expires, 6, bucket_expiry);
     553             :         } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) {
     554             :                 idx = calc_index(expires, 7, bucket_expiry);
     555           0 :         } else if ((long) delta < 0) {
     556           0 :                 idx = clk & LVL_MASK;
     557           0 :                 *bucket_expiry = clk;
     558             :         } else {
     559             :                 /*
     560             :                  * Force expire obscene large timeouts to expire at the
     561             :                  * capacity limit of the wheel.
     562             :                  */
     563           0 :                 if (delta >= WHEEL_TIMEOUT_CUTOFF)
     564           0 :                         expires = clk + WHEEL_TIMEOUT_MAX;
     565             : 
     566           0 :                 idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry);
     567             :         }
     568         464 :         return idx;
     569             : }
     570             : 
     571             : static void
     572             : trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
     573             : {
     574             :         if (!is_timers_nohz_active())
     575             :                 return;
     576             : 
     577             :         /*
     578             :          * TODO: This wants some optimizing similar to the code below, but we
     579             :          * will do that when we switch from push to pull for deferrable timers.
     580             :          */
     581             :         if (timer->flags & TIMER_DEFERRABLE) {
     582             :                 if (tick_nohz_full_cpu(base->cpu))
     583             :                         wake_up_nohz_cpu(base->cpu);
     584             :                 return;
     585             :         }
     586             : 
     587             :         /*
     588             :          * We might have to IPI the remote CPU if the base is idle and the
     589             :          * timer is not deferrable. If the other CPU is on the way to idle
     590             :          * then it can't set base->is_idle as we hold the base lock:
     591             :          */
     592             :         if (base->is_idle)
     593             :                 wake_up_nohz_cpu(base->cpu);
     594             : }
     595             : 
     596             : /*
     597             :  * Enqueue the timer into the hash bucket, mark it pending in
     598             :  * the bitmap, store the index in the timer flags then wake up
     599             :  * the target CPU if needed.
     600             :  */
     601         464 : static void enqueue_timer(struct timer_base *base, struct timer_list *timer,
     602             :                           unsigned int idx, unsigned long bucket_expiry)
     603             : {
     604             : 
     605         928 :         hlist_add_head(&timer->entry, base->vectors + idx);
     606         928 :         __set_bit(idx, base->pending_map);
     607         928 :         timer_set_idx(timer, idx);
     608             : 
     609         464 :         trace_timer_start(timer, timer->expires, timer->flags);
     610             : 
     611             :         /*
     612             :          * Check whether this is the new first expiring timer. The
     613             :          * effective expiry time of the timer is required here
     614             :          * (bucket_expiry) instead of timer->expires.
     615             :          */
     616         464 :         if (time_before(bucket_expiry, base->next_expiry)) {
     617             :                 /*
     618             :                  * Set the next expiry time and kick the CPU so it
     619             :                  * can reevaluate the wheel:
     620             :                  */
     621          76 :                 base->next_expiry = bucket_expiry;
     622          76 :                 base->timers_pending = true;
     623          76 :                 base->next_expiry_recalc = false;
     624          76 :                 trigger_dyntick_cpu(base, timer);
     625             :         }
     626         464 : }
     627             : 
     628         464 : static void internal_add_timer(struct timer_base *base, struct timer_list *timer)
     629             : {
     630             :         unsigned long bucket_expiry;
     631             :         unsigned int idx;
     632             : 
     633         464 :         idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry);
     634         464 :         enqueue_timer(base, timer, idx, bucket_expiry);
     635         464 : }
     636             : 
     637             : #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
     638             : 
     639             : static const struct debug_obj_descr timer_debug_descr;
     640             : 
     641             : struct timer_hint {
     642             :         void    (*function)(struct timer_list *t);
     643             :         long    offset;
     644             : };
     645             : 
     646             : #define TIMER_HINT(fn, container, timr, hintfn)                 \
     647             :         {                                                       \
     648             :                 .function = fn,                                 \
     649             :                 .offset   = offsetof(container, hintfn) -       \
     650             :                             offsetof(container, timr)           \
     651             :         }
     652             : 
     653             : static const struct timer_hint timer_hints[] = {
     654             :         TIMER_HINT(delayed_work_timer_fn,
     655             :                    struct delayed_work, timer, work.func),
     656             :         TIMER_HINT(kthread_delayed_work_timer_fn,
     657             :                    struct kthread_delayed_work, timer, work.func),
     658             : };
     659             : 
     660             : static void *timer_debug_hint(void *addr)
     661             : {
     662             :         struct timer_list *timer = addr;
     663             :         int i;
     664             : 
     665             :         for (i = 0; i < ARRAY_SIZE(timer_hints); i++) {
     666             :                 if (timer_hints[i].function == timer->function) {
     667             :                         void (**fn)(void) = addr + timer_hints[i].offset;
     668             : 
     669             :                         return *fn;
     670             :                 }
     671             :         }
     672             : 
     673             :         return timer->function;
     674             : }
     675             : 
     676             : static bool timer_is_static_object(void *addr)
     677             : {
     678             :         struct timer_list *timer = addr;
     679             : 
     680             :         return (timer->entry.pprev == NULL &&
     681             :                 timer->entry.next == TIMER_ENTRY_STATIC);
     682             : }
     683             : 
     684             : /*
     685             :  * fixup_init is called when:
     686             :  * - an active object is initialized
     687             :  */
     688             : static bool timer_fixup_init(void *addr, enum debug_obj_state state)
     689             : {
     690             :         struct timer_list *timer = addr;
     691             : 
     692             :         switch (state) {
     693             :         case ODEBUG_STATE_ACTIVE:
     694             :                 del_timer_sync(timer);
     695             :                 debug_object_init(timer, &timer_debug_descr);
     696             :                 return true;
     697             :         default:
     698             :                 return false;
     699             :         }
     700             : }
     701             : 
     702             : /* Stub timer callback for improperly used timers. */
     703             : static void stub_timer(struct timer_list *unused)
     704             : {
     705             :         WARN_ON(1);
     706             : }
     707             : 
     708             : /*
     709             :  * fixup_activate is called when:
     710             :  * - an active object is activated
     711             :  * - an unknown non-static object is activated
     712             :  */
     713             : static bool timer_fixup_activate(void *addr, enum debug_obj_state state)
     714             : {
     715             :         struct timer_list *timer = addr;
     716             : 
     717             :         switch (state) {
     718             :         case ODEBUG_STATE_NOTAVAILABLE:
     719             :                 timer_setup(timer, stub_timer, 0);
     720             :                 return true;
     721             : 
     722             :         case ODEBUG_STATE_ACTIVE:
     723             :                 WARN_ON(1);
     724             :                 fallthrough;
     725             :         default:
     726             :                 return false;
     727             :         }
     728             : }
     729             : 
     730             : /*
     731             :  * fixup_free is called when:
     732             :  * - an active object is freed
     733             :  */
     734             : static bool timer_fixup_free(void *addr, enum debug_obj_state state)
     735             : {
     736             :         struct timer_list *timer = addr;
     737             : 
     738             :         switch (state) {
     739             :         case ODEBUG_STATE_ACTIVE:
     740             :                 del_timer_sync(timer);
     741             :                 debug_object_free(timer, &timer_debug_descr);
     742             :                 return true;
     743             :         default:
     744             :                 return false;
     745             :         }
     746             : }
     747             : 
     748             : /*
     749             :  * fixup_assert_init is called when:
     750             :  * - an untracked/uninit-ed object is found
     751             :  */
     752             : static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
     753             : {
     754             :         struct timer_list *timer = addr;
     755             : 
     756             :         switch (state) {
     757             :         case ODEBUG_STATE_NOTAVAILABLE:
     758             :                 timer_setup(timer, stub_timer, 0);
     759             :                 return true;
     760             :         default:
     761             :                 return false;
     762             :         }
     763             : }
     764             : 
     765             : static const struct debug_obj_descr timer_debug_descr = {
     766             :         .name                   = "timer_list",
     767             :         .debug_hint             = timer_debug_hint,
     768             :         .is_static_object       = timer_is_static_object,
     769             :         .fixup_init             = timer_fixup_init,
     770             :         .fixup_activate         = timer_fixup_activate,
     771             :         .fixup_free             = timer_fixup_free,
     772             :         .fixup_assert_init      = timer_fixup_assert_init,
     773             : };
     774             : 
     775             : static inline void debug_timer_init(struct timer_list *timer)
     776             : {
     777             :         debug_object_init(timer, &timer_debug_descr);
     778             : }
     779             : 
     780             : static inline void debug_timer_activate(struct timer_list *timer)
     781             : {
     782             :         debug_object_activate(timer, &timer_debug_descr);
     783             : }
     784             : 
     785             : static inline void debug_timer_deactivate(struct timer_list *timer)
     786             : {
     787             :         debug_object_deactivate(timer, &timer_debug_descr);
     788             : }
     789             : 
     790             : static inline void debug_timer_assert_init(struct timer_list *timer)
     791             : {
     792             :         debug_object_assert_init(timer, &timer_debug_descr);
     793             : }
     794             : 
     795             : static void do_init_timer(struct timer_list *timer,
     796             :                           void (*func)(struct timer_list *),
     797             :                           unsigned int flags,
     798             :                           const char *name, struct lock_class_key *key);
     799             : 
     800             : void init_timer_on_stack_key(struct timer_list *timer,
     801             :                              void (*func)(struct timer_list *),
     802             :                              unsigned int flags,
     803             :                              const char *name, struct lock_class_key *key)
     804             : {
     805             :         debug_object_init_on_stack(timer, &timer_debug_descr);
     806             :         do_init_timer(timer, func, flags, name, key);
     807             : }
     808             : EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
     809             : 
     810             : void destroy_timer_on_stack(struct timer_list *timer)
     811             : {
     812             :         debug_object_free(timer, &timer_debug_descr);
     813             : }
     814             : EXPORT_SYMBOL_GPL(destroy_timer_on_stack);
     815             : 
     816             : #else
     817             : static inline void debug_timer_init(struct timer_list *timer) { }
     818             : static inline void debug_timer_activate(struct timer_list *timer) { }
     819             : static inline void debug_timer_deactivate(struct timer_list *timer) { }
     820             : static inline void debug_timer_assert_init(struct timer_list *timer) { }
     821             : #endif
     822             : 
     823             : static inline void debug_init(struct timer_list *timer)
     824             : {
     825         432 :         debug_timer_init(timer);
     826         432 :         trace_timer_init(timer);
     827             : }
     828             : 
     829             : static inline void debug_deactivate(struct timer_list *timer)
     830             : {
     831         461 :         debug_timer_deactivate(timer);
     832         461 :         trace_timer_cancel(timer);
     833             : }
     834             : 
     835             : static inline void debug_assert_init(struct timer_list *timer)
     836             : {
     837         925 :         debug_timer_assert_init(timer);
     838             : }
     839             : 
     840         432 : static void do_init_timer(struct timer_list *timer,
     841             :                           void (*func)(struct timer_list *),
     842             :                           unsigned int flags,
     843             :                           const char *name, struct lock_class_key *key)
     844             : {
     845         432 :         timer->entry.pprev = NULL;
     846         432 :         timer->function = func;
     847         432 :         if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS))
     848           0 :                 flags &= TIMER_INIT_FLAGS;
     849         432 :         timer->flags = flags | raw_smp_processor_id();
     850             :         lockdep_init_map(&timer->lockdep_map, name, key, 0);
     851         432 : }
     852             : 
     853             : /**
     854             :  * init_timer_key - initialize a timer
     855             :  * @timer: the timer to be initialized
     856             :  * @func: timer callback function
     857             :  * @flags: timer flags
     858             :  * @name: name of the timer
     859             :  * @key: lockdep class key of the fake lock used for tracking timer
     860             :  *       sync lock dependencies
     861             :  *
     862             :  * init_timer_key() must be done to a timer prior calling *any* of the
     863             :  * other timer functions.
     864             :  */
     865          10 : void init_timer_key(struct timer_list *timer,
     866             :                     void (*func)(struct timer_list *), unsigned int flags,
     867             :                     const char *name, struct lock_class_key *key)
     868             : {
     869         432 :         debug_init(timer);
     870         432 :         do_init_timer(timer, func, flags, name, key);
     871          10 : }
     872             : EXPORT_SYMBOL(init_timer_key);
     873             : 
     874             : static inline void detach_timer(struct timer_list *timer, bool clear_pending)
     875             : {
     876         461 :         struct hlist_node *entry = &timer->entry;
     877             : 
     878         922 :         debug_deactivate(timer);
     879             : 
     880         461 :         __hlist_del(entry);
     881         369 :         if (clear_pending)
     882         461 :                 entry->pprev = NULL;
     883         461 :         entry->next = LIST_POISON2;
     884             : }
     885             : 
     886         858 : static int detach_if_pending(struct timer_list *timer, struct timer_base *base,
     887             :                              bool clear_pending)
     888             : {
     889        1716 :         unsigned idx = timer_get_idx(timer);
     890             : 
     891         858 :         if (!timer_pending(timer))
     892             :                 return 0;
     893             : 
     894         738 :         if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) {
     895         736 :                 __clear_bit(idx, base->pending_map);
     896         368 :                 base->next_expiry_recalc = true;
     897             :         }
     898             : 
     899         738 :         detach_timer(timer, clear_pending);
     900         369 :         return 1;
     901             : }
     902             : 
     903             : static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
     904             : {
     905         916 :         struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
     906             : 
     907             :         /*
     908             :          * If the timer is deferrable and NO_HZ_COMMON is set then we need
     909             :          * to use the deferrable base.
     910             :          */
     911             :         if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
     912             :                 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
     913             :         return base;
     914             : }
     915             : 
     916             : static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
     917             : {
     918         435 :         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
     919             : 
     920             :         /*
     921             :          * If the timer is deferrable and NO_HZ_COMMON is set then we need
     922             :          * to use the deferrable base.
     923             :          */
     924             :         if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
     925             :                 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
     926             :         return base;
     927             : }
     928             : 
     929             : static inline struct timer_base *get_timer_base(u32 tflags)
     930             : {
     931         887 :         return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK);
     932             : }
     933             : 
     934             : static inline struct timer_base *
     935             : get_target_base(struct timer_base *base, unsigned tflags)
     936             : {
     937             : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
     938             :         if (static_branch_likely(&timers_migration_enabled) &&
     939             :             !(tflags & TIMER_PINNED))
     940             :                 return get_timer_cpu_base(tflags, get_nohz_timer_target());
     941             : #endif
     942         435 :         return get_timer_this_cpu_base(tflags);
     943             : }
     944             : 
     945         464 : static inline void forward_timer_base(struct timer_base *base)
     946             : {
     947         464 :         unsigned long jnow = READ_ONCE(jiffies);
     948             : 
     949             :         /*
     950             :          * No need to forward if we are close enough below jiffies.
     951             :          * Also while executing timers, base->clk is 1 offset ahead
     952             :          * of jiffies to avoid endless requeuing to current jiffies.
     953             :          */
     954         464 :         if ((long)(jnow - base->clk) < 1)
     955             :                 return;
     956             : 
     957             :         /*
     958             :          * If the next expiry value is > jiffies, then we fast forward to
     959             :          * jiffies otherwise we forward to the next expiry value.
     960             :          */
     961          80 :         if (time_after(base->next_expiry, jnow)) {
     962          80 :                 base->clk = jnow;
     963             :         } else {
     964           0 :                 if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
     965             :                         return;
     966           0 :                 base->clk = base->next_expiry;
     967             :         }
     968             : }
     969             : 
     970             : 
     971             : /*
     972             :  * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means
     973             :  * that all timers which are tied to this base are locked, and the base itself
     974             :  * is locked too.
     975             :  *
     976             :  * So __run_timers/migrate_timers can safely modify all timers which could
     977             :  * be found in the base->vectors array.
     978             :  *
     979             :  * When a timer is migrating then the TIMER_MIGRATING flag is set and we need
     980             :  * to wait until the migration is done.
     981             :  */
     982         887 : static struct timer_base *lock_timer_base(struct timer_list *timer,
     983             :                                           unsigned long *flags)
     984             :         __acquires(timer->base->lock)
     985             : {
     986             :         for (;;) {
     987             :                 struct timer_base *base;
     988             :                 u32 tf;
     989             : 
     990             :                 /*
     991             :                  * We need to use READ_ONCE() here, otherwise the compiler
     992             :                  * might re-read @tf between the check for TIMER_MIGRATING
     993             :                  * and spin_lock().
     994             :                  */
     995         887 :                 tf = READ_ONCE(timer->flags);
     996             : 
     997         887 :                 if (!(tf & TIMER_MIGRATING)) {
     998        1774 :                         base = get_timer_base(tf);
     999         887 :                         raw_spin_lock_irqsave(&base->lock, *flags);
    1000         887 :                         if (timer->flags == tf)
    1001         887 :                                 return base;
    1002           0 :                         raw_spin_unlock_irqrestore(&base->lock, *flags);
    1003             :                 }
    1004             :                 cpu_relax();
    1005             :         }
    1006             : }
    1007             : 
    1008             : #define MOD_TIMER_PENDING_ONLY          0x01
    1009             : #define MOD_TIMER_REDUCE                0x02
    1010             : #define MOD_TIMER_NOTPENDING            0x04
    1011             : 
    1012             : static inline int
    1013         435 : __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options)
    1014             : {
    1015         435 :         unsigned long clk = 0, flags, bucket_expiry;
    1016             :         struct timer_base *base, *new_base;
    1017         435 :         unsigned int idx = UINT_MAX;
    1018         435 :         int ret = 0;
    1019             : 
    1020         435 :         debug_assert_init(timer);
    1021             : 
    1022             :         /*
    1023             :          * This is a common optimization triggered by the networking code - if
    1024             :          * the timer is re-modified to have the same timeout or ends up in the
    1025             :          * same array bucket then just return:
    1026             :          */
    1027         437 :         if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) {
    1028             :                 /*
    1029             :                  * The downside of this optimization is that it can result in
    1030             :                  * larger granularity than you would get from adding a new
    1031             :                  * timer with this expiry.
    1032             :                  */
    1033           0 :                 long diff = timer->expires - expires;
    1034             : 
    1035           0 :                 if (!diff)
    1036             :                         return 1;
    1037           0 :                 if (options & MOD_TIMER_REDUCE && diff <= 0)
    1038             :                         return 1;
    1039             : 
    1040             :                 /*
    1041             :                  * We lock timer base and calculate the bucket index right
    1042             :                  * here. If the timer ends up in the same bucket, then we
    1043             :                  * just update the expiry time and avoid the whole
    1044             :                  * dequeue/enqueue dance.
    1045             :                  */
    1046           0 :                 base = lock_timer_base(timer, &flags);
    1047             :                 /*
    1048             :                  * Has @timer been shutdown? This needs to be evaluated
    1049             :                  * while holding base lock to prevent a race against the
    1050             :                  * shutdown code.
    1051             :                  */
    1052           0 :                 if (!timer->function)
    1053             :                         goto out_unlock;
    1054             : 
    1055           0 :                 forward_timer_base(base);
    1056             : 
    1057           0 :                 if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) &&
    1058           0 :                     time_before_eq(timer->expires, expires)) {
    1059             :                         ret = 1;
    1060             :                         goto out_unlock;
    1061             :                 }
    1062             : 
    1063           0 :                 clk = base->clk;
    1064           0 :                 idx = calc_wheel_index(expires, clk, &bucket_expiry);
    1065             : 
    1066             :                 /*
    1067             :                  * Retrieve and compare the array index of the pending
    1068             :                  * timer. If it matches set the expiry to the new value so a
    1069             :                  * subsequent call will exit in the expires check above.
    1070             :                  */
    1071           0 :                 if (idx == timer_get_idx(timer)) {
    1072           0 :                         if (!(options & MOD_TIMER_REDUCE))
    1073           0 :                                 timer->expires = expires;
    1074           0 :                         else if (time_after(timer->expires, expires))
    1075           0 :                                 timer->expires = expires;
    1076             :                         ret = 1;
    1077             :                         goto out_unlock;
    1078             :                 }
    1079             :         } else {
    1080         435 :                 base = lock_timer_base(timer, &flags);
    1081             :                 /*
    1082             :                  * Has @timer been shutdown? This needs to be evaluated
    1083             :                  * while holding base lock to prevent a race against the
    1084             :                  * shutdown code.
    1085             :                  */
    1086         435 :                 if (!timer->function)
    1087             :                         goto out_unlock;
    1088             : 
    1089         435 :                 forward_timer_base(base);
    1090             :         }
    1091             : 
    1092         435 :         ret = detach_if_pending(timer, base, false);
    1093         435 :         if (!ret && (options & MOD_TIMER_PENDING_ONLY))
    1094             :                 goto out_unlock;
    1095             : 
    1096         870 :         new_base = get_target_base(base, timer->flags);
    1097             : 
    1098         435 :         if (base != new_base) {
    1099             :                 /*
    1100             :                  * We are trying to schedule the timer on the new base.
    1101             :                  * However we can't change timer's base while it is running,
    1102             :                  * otherwise timer_delete_sync() can't detect that the timer's
    1103             :                  * handler yet has not finished. This also guarantees that the
    1104             :                  * timer is serialized wrt itself.
    1105             :                  */
    1106           0 :                 if (likely(base->running_timer != timer)) {
    1107             :                         /* See the comment in lock_timer_base() */
    1108           0 :                         timer->flags |= TIMER_MIGRATING;
    1109             : 
    1110           0 :                         raw_spin_unlock(&base->lock);
    1111           0 :                         base = new_base;
    1112           0 :                         raw_spin_lock(&base->lock);
    1113           0 :                         WRITE_ONCE(timer->flags,
    1114             :                                    (timer->flags & ~TIMER_BASEMASK) | base->cpu);
    1115           0 :                         forward_timer_base(base);
    1116             :                 }
    1117             :         }
    1118             : 
    1119         435 :         debug_timer_activate(timer);
    1120             : 
    1121         435 :         timer->expires = expires;
    1122             :         /*
    1123             :          * If 'idx' was calculated above and the base time did not advance
    1124             :          * between calculating 'idx' and possibly switching the base, only
    1125             :          * enqueue_timer() is required. Otherwise we need to (re)calculate
    1126             :          * the wheel index via internal_add_timer().
    1127             :          */
    1128         435 :         if (idx != UINT_MAX && clk == base->clk)
    1129           0 :                 enqueue_timer(base, timer, idx, bucket_expiry);
    1130             :         else
    1131         435 :                 internal_add_timer(base, timer);
    1132             : 
    1133             : out_unlock:
    1134         870 :         raw_spin_unlock_irqrestore(&base->lock, flags);
    1135             : 
    1136         435 :         return ret;
    1137             : }
    1138             : 
    1139             : /**
    1140             :  * mod_timer_pending - Modify a pending timer's timeout
    1141             :  * @timer:      The pending timer to be modified
    1142             :  * @expires:    New absolute timeout in jiffies
    1143             :  *
    1144             :  * mod_timer_pending() is the same for pending timers as mod_timer(), but
    1145             :  * will not activate inactive timers.
    1146             :  *
    1147             :  * If @timer->function == NULL then the start operation is silently
    1148             :  * discarded.
    1149             :  *
    1150             :  * Return:
    1151             :  * * %0 - The timer was inactive and not modified or was in
    1152             :  *        shutdown state and the operation was discarded
    1153             :  * * %1 - The timer was active and requeued to expire at @expires
    1154             :  */
    1155           0 : int mod_timer_pending(struct timer_list *timer, unsigned long expires)
    1156             : {
    1157           0 :         return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY);
    1158             : }
    1159             : EXPORT_SYMBOL(mod_timer_pending);
    1160             : 
    1161             : /**
    1162             :  * mod_timer - Modify a timer's timeout
    1163             :  * @timer:      The timer to be modified
    1164             :  * @expires:    New absolute timeout in jiffies
    1165             :  *
    1166             :  * mod_timer(timer, expires) is equivalent to:
    1167             :  *
    1168             :  *     del_timer(timer); timer->expires = expires; add_timer(timer);
    1169             :  *
    1170             :  * mod_timer() is more efficient than the above open coded sequence. In
    1171             :  * case that the timer is inactive, the del_timer() part is a NOP. The
    1172             :  * timer is in any case activated with the new expiry time @expires.
    1173             :  *
    1174             :  * Note that if there are multiple unserialized concurrent users of the
    1175             :  * same timer, then mod_timer() is the only safe way to modify the timeout,
    1176             :  * since add_timer() cannot modify an already running timer.
    1177             :  *
    1178             :  * If @timer->function == NULL then the start operation is silently
    1179             :  * discarded. In this case the return value is 0 and meaningless.
    1180             :  *
    1181             :  * Return:
    1182             :  * * %0 - The timer was inactive and started or was in shutdown
    1183             :  *        state and the operation was discarded
    1184             :  * * %1 - The timer was active and requeued to expire at @expires or
    1185             :  *        the timer was active and not modified because @expires did
    1186             :  *        not change the effective expiry time
    1187             :  */
    1188           2 : int mod_timer(struct timer_list *timer, unsigned long expires)
    1189             : {
    1190           2 :         return __mod_timer(timer, expires, 0);
    1191             : }
    1192             : EXPORT_SYMBOL(mod_timer);
    1193             : 
    1194             : /**
    1195             :  * timer_reduce - Modify a timer's timeout if it would reduce the timeout
    1196             :  * @timer:      The timer to be modified
    1197             :  * @expires:    New absolute timeout in jiffies
    1198             :  *
    1199             :  * timer_reduce() is very similar to mod_timer(), except that it will only
    1200             :  * modify an enqueued timer if that would reduce the expiration time. If
    1201             :  * @timer is not enqueued it starts the timer.
    1202             :  *
    1203             :  * If @timer->function == NULL then the start operation is silently
    1204             :  * discarded.
    1205             :  *
    1206             :  * Return:
    1207             :  * * %0 - The timer was inactive and started or was in shutdown
    1208             :  *        state and the operation was discarded
    1209             :  * * %1 - The timer was active and requeued to expire at @expires or
    1210             :  *        the timer was active and not modified because @expires
    1211             :  *        did not change the effective expiry time such that the
    1212             :  *        timer would expire earlier than already scheduled
    1213             :  */
    1214           0 : int timer_reduce(struct timer_list *timer, unsigned long expires)
    1215             : {
    1216           0 :         return __mod_timer(timer, expires, MOD_TIMER_REDUCE);
    1217             : }
    1218             : EXPORT_SYMBOL(timer_reduce);
    1219             : 
    1220             : /**
    1221             :  * add_timer - Start a timer
    1222             :  * @timer:      The timer to be started
    1223             :  *
    1224             :  * Start @timer to expire at @timer->expires in the future. @timer->expires
    1225             :  * is the absolute expiry time measured in 'jiffies'. When the timer expires
    1226             :  * timer->function(timer) will be invoked from soft interrupt context.
    1227             :  *
    1228             :  * The @timer->expires and @timer->function fields must be set prior
    1229             :  * to calling this function.
    1230             :  *
    1231             :  * If @timer->function == NULL then the start operation is silently
    1232             :  * discarded.
    1233             :  *
    1234             :  * If @timer->expires is already in the past @timer will be queued to
    1235             :  * expire at the next timer tick.
    1236             :  *
    1237             :  * This can only operate on an inactive timer. Attempts to invoke this on
    1238             :  * an active timer are rejected with a warning.
    1239             :  */
    1240          11 : void add_timer(struct timer_list *timer)
    1241             : {
    1242          11 :         if (WARN_ON_ONCE(timer_pending(timer)))
    1243             :                 return;
    1244          11 :         __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING);
    1245             : }
    1246             : EXPORT_SYMBOL(add_timer);
    1247             : 
    1248             : /**
    1249             :  * add_timer_on - Start a timer on a particular CPU
    1250             :  * @timer:      The timer to be started
    1251             :  * @cpu:        The CPU to start it on
    1252             :  *
    1253             :  * Same as add_timer() except that it starts the timer on the given CPU.
    1254             :  *
    1255             :  * See add_timer() for further details.
    1256             :  */
    1257          29 : void add_timer_on(struct timer_list *timer, int cpu)
    1258             : {
    1259             :         struct timer_base *new_base, *base;
    1260             :         unsigned long flags;
    1261             : 
    1262          58 :         debug_assert_init(timer);
    1263             : 
    1264          29 :         if (WARN_ON_ONCE(timer_pending(timer)))
    1265           0 :                 return;
    1266             : 
    1267          58 :         new_base = get_timer_cpu_base(timer->flags, cpu);
    1268             : 
    1269             :         /*
    1270             :          * If @timer was on a different CPU, it should be migrated with the
    1271             :          * old base locked to prevent other operations proceeding with the
    1272             :          * wrong base locked.  See lock_timer_base().
    1273             :          */
    1274          29 :         base = lock_timer_base(timer, &flags);
    1275             :         /*
    1276             :          * Has @timer been shutdown? This needs to be evaluated while
    1277             :          * holding base lock to prevent a race against the shutdown code.
    1278             :          */
    1279          29 :         if (!timer->function)
    1280             :                 goto out_unlock;
    1281             : 
    1282          29 :         if (base != new_base) {
    1283           0 :                 timer->flags |= TIMER_MIGRATING;
    1284             : 
    1285           0 :                 raw_spin_unlock(&base->lock);
    1286           0 :                 base = new_base;
    1287           0 :                 raw_spin_lock(&base->lock);
    1288           0 :                 WRITE_ONCE(timer->flags,
    1289             :                            (timer->flags & ~TIMER_BASEMASK) | cpu);
    1290             :         }
    1291          29 :         forward_timer_base(base);
    1292             : 
    1293          29 :         debug_timer_activate(timer);
    1294          29 :         internal_add_timer(base, timer);
    1295             : out_unlock:
    1296          58 :         raw_spin_unlock_irqrestore(&base->lock, flags);
    1297             : }
    1298             : EXPORT_SYMBOL_GPL(add_timer_on);
    1299             : 
    1300             : /**
    1301             :  * __timer_delete - Internal function: Deactivate a timer
    1302             :  * @timer:      The timer to be deactivated
    1303             :  * @shutdown:   If true, this indicates that the timer is about to be
    1304             :  *              shutdown permanently.
    1305             :  *
    1306             :  * If @shutdown is true then @timer->function is set to NULL under the
    1307             :  * timer base lock which prevents further rearming of the time. In that
    1308             :  * case any attempt to rearm @timer after this function returns will be
    1309             :  * silently ignored.
    1310             :  *
    1311             :  * Return:
    1312             :  * * %0 - The timer was not pending
    1313             :  * * %1 - The timer was pending and deactivated
    1314             :  */
    1315          38 : static int __timer_delete(struct timer_list *timer, bool shutdown)
    1316             : {
    1317             :         struct timer_base *base;
    1318             :         unsigned long flags;
    1319          38 :         int ret = 0;
    1320             : 
    1321          76 :         debug_assert_init(timer);
    1322             : 
    1323             :         /*
    1324             :          * If @shutdown is set then the lock has to be taken whether the
    1325             :          * timer is pending or not to protect against a concurrent rearm
    1326             :          * which might hit between the lockless pending check and the lock
    1327             :          * aquisition. By taking the lock it is ensured that such a newly
    1328             :          * enqueued timer is dequeued and cannot end up with
    1329             :          * timer->function == NULL in the expiry code.
    1330             :          *
    1331             :          * If timer->function is currently executed, then this makes sure
    1332             :          * that the callback cannot requeue the timer.
    1333             :          */
    1334          38 :         if (timer_pending(timer) || shutdown) {
    1335           0 :                 base = lock_timer_base(timer, &flags);
    1336           0 :                 ret = detach_if_pending(timer, base, true);
    1337           0 :                 if (shutdown)
    1338           0 :                         timer->function = NULL;
    1339           0 :                 raw_spin_unlock_irqrestore(&base->lock, flags);
    1340             :         }
    1341             : 
    1342          38 :         return ret;
    1343             : }
    1344             : 
    1345             : /**
    1346             :  * timer_delete - Deactivate a timer
    1347             :  * @timer:      The timer to be deactivated
    1348             :  *
    1349             :  * The function only deactivates a pending timer, but contrary to
    1350             :  * timer_delete_sync() it does not take into account whether the timer's
    1351             :  * callback function is concurrently executed on a different CPU or not.
    1352             :  * It neither prevents rearming of the timer.  If @timer can be rearmed
    1353             :  * concurrently then the return value of this function is meaningless.
    1354             :  *
    1355             :  * Return:
    1356             :  * * %0 - The timer was not pending
    1357             :  * * %1 - The timer was pending and deactivated
    1358             :  */
    1359          38 : int timer_delete(struct timer_list *timer)
    1360             : {
    1361          38 :         return __timer_delete(timer, false);
    1362             : }
    1363             : EXPORT_SYMBOL(timer_delete);
    1364             : 
    1365             : /**
    1366             :  * timer_shutdown - Deactivate a timer and prevent rearming
    1367             :  * @timer:      The timer to be deactivated
    1368             :  *
    1369             :  * The function does not wait for an eventually running timer callback on a
    1370             :  * different CPU but it prevents rearming of the timer. Any attempt to arm
    1371             :  * @timer after this function returns will be silently ignored.
    1372             :  *
    1373             :  * This function is useful for teardown code and should only be used when
    1374             :  * timer_shutdown_sync() cannot be invoked due to locking or context constraints.
    1375             :  *
    1376             :  * Return:
    1377             :  * * %0 - The timer was not pending
    1378             :  * * %1 - The timer was pending
    1379             :  */
    1380           0 : int timer_shutdown(struct timer_list *timer)
    1381             : {
    1382           0 :         return __timer_delete(timer, true);
    1383             : }
    1384             : EXPORT_SYMBOL_GPL(timer_shutdown);
    1385             : 
    1386             : /**
    1387             :  * __try_to_del_timer_sync - Internal function: Try to deactivate a timer
    1388             :  * @timer:      Timer to deactivate
    1389             :  * @shutdown:   If true, this indicates that the timer is about to be
    1390             :  *              shutdown permanently.
    1391             :  *
    1392             :  * If @shutdown is true then @timer->function is set to NULL under the
    1393             :  * timer base lock which prevents further rearming of the timer. Any
    1394             :  * attempt to rearm @timer after this function returns will be silently
    1395             :  * ignored.
    1396             :  *
    1397             :  * This function cannot guarantee that the timer cannot be rearmed
    1398             :  * right after dropping the base lock if @shutdown is false. That
    1399             :  * needs to be prevented by the calling code if necessary.
    1400             :  *
    1401             :  * Return:
    1402             :  * * %0  - The timer was not pending
    1403             :  * * %1  - The timer was pending and deactivated
    1404             :  * * %-1 - The timer callback function is running on a different CPU
    1405             :  */
    1406         423 : static int __try_to_del_timer_sync(struct timer_list *timer, bool shutdown)
    1407             : {
    1408             :         struct timer_base *base;
    1409             :         unsigned long flags;
    1410         423 :         int ret = -1;
    1411             : 
    1412         423 :         debug_assert_init(timer);
    1413             : 
    1414         423 :         base = lock_timer_base(timer, &flags);
    1415             : 
    1416         423 :         if (base->running_timer != timer)
    1417         423 :                 ret = detach_if_pending(timer, base, true);
    1418         423 :         if (shutdown)
    1419           0 :                 timer->function = NULL;
    1420             : 
    1421         846 :         raw_spin_unlock_irqrestore(&base->lock, flags);
    1422             : 
    1423         423 :         return ret;
    1424             : }
    1425             : 
    1426             : /**
    1427             :  * try_to_del_timer_sync - Try to deactivate a timer
    1428             :  * @timer:      Timer to deactivate
    1429             :  *
    1430             :  * This function tries to deactivate a timer. On success the timer is not
    1431             :  * queued and the timer callback function is not running on any CPU.
    1432             :  *
    1433             :  * This function does not guarantee that the timer cannot be rearmed right
    1434             :  * after dropping the base lock. That needs to be prevented by the calling
    1435             :  * code if necessary.
    1436             :  *
    1437             :  * Return:
    1438             :  * * %0  - The timer was not pending
    1439             :  * * %1  - The timer was pending and deactivated
    1440             :  * * %-1 - The timer callback function is running on a different CPU
    1441             :  */
    1442           0 : int try_to_del_timer_sync(struct timer_list *timer)
    1443             : {
    1444           0 :         return __try_to_del_timer_sync(timer, false);
    1445             : }
    1446             : EXPORT_SYMBOL(try_to_del_timer_sync);
    1447             : 
    1448             : #ifdef CONFIG_PREEMPT_RT
    1449             : static __init void timer_base_init_expiry_lock(struct timer_base *base)
    1450             : {
    1451             :         spin_lock_init(&base->expiry_lock);
    1452             : }
    1453             : 
    1454             : static inline void timer_base_lock_expiry(struct timer_base *base)
    1455             : {
    1456             :         spin_lock(&base->expiry_lock);
    1457             : }
    1458             : 
    1459             : static inline void timer_base_unlock_expiry(struct timer_base *base)
    1460             : {
    1461             :         spin_unlock(&base->expiry_lock);
    1462             : }
    1463             : 
    1464             : /*
    1465             :  * The counterpart to del_timer_wait_running().
    1466             :  *
    1467             :  * If there is a waiter for base->expiry_lock, then it was waiting for the
    1468             :  * timer callback to finish. Drop expiry_lock and reacquire it. That allows
    1469             :  * the waiter to acquire the lock and make progress.
    1470             :  */
    1471             : static void timer_sync_wait_running(struct timer_base *base)
    1472             : {
    1473             :         if (atomic_read(&base->timer_waiters)) {
    1474             :                 raw_spin_unlock_irq(&base->lock);
    1475             :                 spin_unlock(&base->expiry_lock);
    1476             :                 spin_lock(&base->expiry_lock);
    1477             :                 raw_spin_lock_irq(&base->lock);
    1478             :         }
    1479             : }
    1480             : 
    1481             : /*
    1482             :  * This function is called on PREEMPT_RT kernels when the fast path
    1483             :  * deletion of a timer failed because the timer callback function was
    1484             :  * running.
    1485             :  *
    1486             :  * This prevents priority inversion, if the softirq thread on a remote CPU
    1487             :  * got preempted, and it prevents a life lock when the task which tries to
    1488             :  * delete a timer preempted the softirq thread running the timer callback
    1489             :  * function.
    1490             :  */
    1491             : static void del_timer_wait_running(struct timer_list *timer)
    1492             : {
    1493             :         u32 tf;
    1494             : 
    1495             :         tf = READ_ONCE(timer->flags);
    1496             :         if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) {
    1497             :                 struct timer_base *base = get_timer_base(tf);
    1498             : 
    1499             :                 /*
    1500             :                  * Mark the base as contended and grab the expiry lock,
    1501             :                  * which is held by the softirq across the timer
    1502             :                  * callback. Drop the lock immediately so the softirq can
    1503             :                  * expire the next timer. In theory the timer could already
    1504             :                  * be running again, but that's more than unlikely and just
    1505             :                  * causes another wait loop.
    1506             :                  */
    1507             :                 atomic_inc(&base->timer_waiters);
    1508             :                 spin_lock_bh(&base->expiry_lock);
    1509             :                 atomic_dec(&base->timer_waiters);
    1510             :                 spin_unlock_bh(&base->expiry_lock);
    1511             :         }
    1512             : }
    1513             : #else
    1514             : static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
    1515             : static inline void timer_base_lock_expiry(struct timer_base *base) { }
    1516             : static inline void timer_base_unlock_expiry(struct timer_base *base) { }
    1517             : static inline void timer_sync_wait_running(struct timer_base *base) { }
    1518             : static inline void del_timer_wait_running(struct timer_list *timer) { }
    1519             : #endif
    1520             : 
    1521             : /**
    1522             :  * __timer_delete_sync - Internal function: Deactivate a timer and wait
    1523             :  *                       for the handler to finish.
    1524             :  * @timer:      The timer to be deactivated
    1525             :  * @shutdown:   If true, @timer->function will be set to NULL under the
    1526             :  *              timer base lock which prevents rearming of @timer
    1527             :  *
    1528             :  * If @shutdown is not set the timer can be rearmed later. If the timer can
    1529             :  * be rearmed concurrently, i.e. after dropping the base lock then the
    1530             :  * return value is meaningless.
    1531             :  *
    1532             :  * If @shutdown is set then @timer->function is set to NULL under timer
    1533             :  * base lock which prevents rearming of the timer. Any attempt to rearm
    1534             :  * a shutdown timer is silently ignored.
    1535             :  *
    1536             :  * If the timer should be reused after shutdown it has to be initialized
    1537             :  * again.
    1538             :  *
    1539             :  * Return:
    1540             :  * * %0 - The timer was not pending
    1541             :  * * %1 - The timer was pending and deactivated
    1542             :  */
    1543         423 : static int __timer_delete_sync(struct timer_list *timer, bool shutdown)
    1544             : {
    1545             :         int ret;
    1546             : 
    1547             : #ifdef CONFIG_LOCKDEP
    1548             :         unsigned long flags;
    1549             : 
    1550             :         /*
    1551             :          * If lockdep gives a backtrace here, please reference
    1552             :          * the synchronization rules above.
    1553             :          */
    1554             :         local_irq_save(flags);
    1555             :         lock_map_acquire(&timer->lockdep_map);
    1556             :         lock_map_release(&timer->lockdep_map);
    1557             :         local_irq_restore(flags);
    1558             : #endif
    1559             :         /*
    1560             :          * don't use it in hardirq context, because it
    1561             :          * could lead to deadlock.
    1562             :          */
    1563         423 :         WARN_ON(in_hardirq() && !(timer->flags & TIMER_IRQSAFE));
    1564             : 
    1565             :         /*
    1566             :          * Must be able to sleep on PREEMPT_RT because of the slowpath in
    1567             :          * del_timer_wait_running().
    1568             :          */
    1569             :         if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE))
    1570             :                 lockdep_assert_preemption_enabled();
    1571             : 
    1572             :         do {
    1573         423 :                 ret = __try_to_del_timer_sync(timer, shutdown);
    1574             : 
    1575         423 :                 if (unlikely(ret < 0)) {
    1576           0 :                         del_timer_wait_running(timer);
    1577             :                         cpu_relax();
    1578             :                 }
    1579         423 :         } while (ret < 0);
    1580             : 
    1581         423 :         return ret;
    1582             : }
    1583             : 
    1584             : /**
    1585             :  * timer_delete_sync - Deactivate a timer and wait for the handler to finish.
    1586             :  * @timer:      The timer to be deactivated
    1587             :  *
    1588             :  * Synchronization rules: Callers must prevent restarting of the timer,
    1589             :  * otherwise this function is meaningless. It must not be called from
    1590             :  * interrupt contexts unless the timer is an irqsafe one. The caller must
    1591             :  * not hold locks which would prevent completion of the timer's callback
    1592             :  * function. The timer's handler must not call add_timer_on(). Upon exit
    1593             :  * the timer is not queued and the handler is not running on any CPU.
    1594             :  *
    1595             :  * For !irqsafe timers, the caller must not hold locks that are held in
    1596             :  * interrupt context. Even if the lock has nothing to do with the timer in
    1597             :  * question.  Here's why::
    1598             :  *
    1599             :  *    CPU0                             CPU1
    1600             :  *    ----                             ----
    1601             :  *                                     <SOFTIRQ>
    1602             :  *                                       call_timer_fn();
    1603             :  *                                       base->running_timer = mytimer;
    1604             :  *    spin_lock_irq(somelock);
    1605             :  *                                     <IRQ>
    1606             :  *                                        spin_lock(somelock);
    1607             :  *    timer_delete_sync(mytimer);
    1608             :  *    while (base->running_timer == mytimer);
    1609             :  *
    1610             :  * Now timer_delete_sync() will never return and never release somelock.
    1611             :  * The interrupt on the other CPU is waiting to grab somelock but it has
    1612             :  * interrupted the softirq that CPU0 is waiting to finish.
    1613             :  *
    1614             :  * This function cannot guarantee that the timer is not rearmed again by
    1615             :  * some concurrent or preempting code, right after it dropped the base
    1616             :  * lock. If there is the possibility of a concurrent rearm then the return
    1617             :  * value of the function is meaningless.
    1618             :  *
    1619             :  * If such a guarantee is needed, e.g. for teardown situations then use
    1620             :  * timer_shutdown_sync() instead.
    1621             :  *
    1622             :  * Return:
    1623             :  * * %0 - The timer was not pending
    1624             :  * * %1 - The timer was pending and deactivated
    1625             :  */
    1626           2 : int timer_delete_sync(struct timer_list *timer)
    1627             : {
    1628         423 :         return __timer_delete_sync(timer, false);
    1629             : }
    1630             : EXPORT_SYMBOL(timer_delete_sync);
    1631             : 
    1632             : /**
    1633             :  * timer_shutdown_sync - Shutdown a timer and prevent rearming
    1634             :  * @timer: The timer to be shutdown
    1635             :  *
    1636             :  * When the function returns it is guaranteed that:
    1637             :  *   - @timer is not queued
    1638             :  *   - The callback function of @timer is not running
    1639             :  *   - @timer cannot be enqueued again. Any attempt to rearm
    1640             :  *     @timer is silently ignored.
    1641             :  *
    1642             :  * See timer_delete_sync() for synchronization rules.
    1643             :  *
    1644             :  * This function is useful for final teardown of an infrastructure where
    1645             :  * the timer is subject to a circular dependency problem.
    1646             :  *
    1647             :  * A common pattern for this is a timer and a workqueue where the timer can
    1648             :  * schedule work and work can arm the timer. On shutdown the workqueue must
    1649             :  * be destroyed and the timer must be prevented from rearming. Unless the
    1650             :  * code has conditionals like 'if (mything->in_shutdown)' to prevent that
    1651             :  * there is no way to get this correct with timer_delete_sync().
    1652             :  *
    1653             :  * timer_shutdown_sync() is solving the problem. The correct ordering of
    1654             :  * calls in this case is:
    1655             :  *
    1656             :  *      timer_shutdown_sync(&mything->timer);
    1657             :  *      workqueue_destroy(&mything->workqueue);
    1658             :  *
    1659             :  * After this 'mything' can be safely freed.
    1660             :  *
    1661             :  * This obviously implies that the timer is not required to be functional
    1662             :  * for the rest of the shutdown operation.
    1663             :  *
    1664             :  * Return:
    1665             :  * * %0 - The timer was not pending
    1666             :  * * %1 - The timer was pending
    1667             :  */
    1668           0 : int timer_shutdown_sync(struct timer_list *timer)
    1669             : {
    1670           0 :         return __timer_delete_sync(timer, true);
    1671             : }
    1672             : EXPORT_SYMBOL_GPL(timer_shutdown_sync);
    1673             : 
    1674          92 : static void call_timer_fn(struct timer_list *timer,
    1675             :                           void (*fn)(struct timer_list *),
    1676             :                           unsigned long baseclk)
    1677             : {
    1678          92 :         int count = preempt_count();
    1679             : 
    1680             : #ifdef CONFIG_LOCKDEP
    1681             :         /*
    1682             :          * It is permissible to free the timer from inside the
    1683             :          * function that is called from it, this we need to take into
    1684             :          * account for lockdep too. To avoid bogus "held lock freed"
    1685             :          * warnings as well as problems when looking into
    1686             :          * timer->lockdep_map, make a copy and use that here.
    1687             :          */
    1688             :         struct lockdep_map lockdep_map;
    1689             : 
    1690             :         lockdep_copy_map(&lockdep_map, &timer->lockdep_map);
    1691             : #endif
    1692             :         /*
    1693             :          * Couple the lock chain with the lock chain at
    1694             :          * timer_delete_sync() by acquiring the lock_map around the fn()
    1695             :          * call here and in timer_delete_sync().
    1696             :          */
    1697             :         lock_map_acquire(&lockdep_map);
    1698             : 
    1699          92 :         trace_timer_expire_entry(timer, baseclk);
    1700          92 :         fn(timer);
    1701          92 :         trace_timer_expire_exit(timer);
    1702             : 
    1703             :         lock_map_release(&lockdep_map);
    1704             : 
    1705          92 :         if (count != preempt_count()) {
    1706           0 :                 WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
    1707             :                           fn, count, preempt_count());
    1708             :                 /*
    1709             :                  * Restore the preempt count. That gives us a decent
    1710             :                  * chance to survive and extract information. If the
    1711             :                  * callback kept a lock held, bad luck, but not worse
    1712             :                  * than the BUG() we had.
    1713             :                  */
    1714             :                 preempt_count_set(count);
    1715             :         }
    1716          92 : }
    1717             : 
    1718          92 : static void expire_timers(struct timer_base *base, struct hlist_head *head)
    1719             : {
    1720             :         /*
    1721             :          * This value is required only for tracing. base->clk was
    1722             :          * incremented directly before expire_timers was called. But expiry
    1723             :          * is related to the old base->clk value.
    1724             :          */
    1725             :         unsigned long baseclk = base->clk - 1;
    1726             : 
    1727         184 :         while (!hlist_empty(head)) {
    1728             :                 struct timer_list *timer;
    1729             :                 void (*fn)(struct timer_list *);
    1730             : 
    1731          92 :                 timer = hlist_entry(head->first, struct timer_list, entry);
    1732             : 
    1733          92 :                 base->running_timer = timer;
    1734          92 :                 detach_timer(timer, true);
    1735             : 
    1736          92 :                 fn = timer->function;
    1737             : 
    1738          92 :                 if (WARN_ON_ONCE(!fn)) {
    1739             :                         /* Should never happen. Emphasis on should! */
    1740           0 :                         base->running_timer = NULL;
    1741           0 :                         continue;
    1742             :                 }
    1743             : 
    1744          92 :                 if (timer->flags & TIMER_IRQSAFE) {
    1745           9 :                         raw_spin_unlock(&base->lock);
    1746           9 :                         call_timer_fn(timer, fn, baseclk);
    1747           9 :                         raw_spin_lock(&base->lock);
    1748           9 :                         base->running_timer = NULL;
    1749             :                 } else {
    1750          83 :                         raw_spin_unlock_irq(&base->lock);
    1751          83 :                         call_timer_fn(timer, fn, baseclk);
    1752          83 :                         raw_spin_lock_irq(&base->lock);
    1753          83 :                         base->running_timer = NULL;
    1754          83 :                         timer_sync_wait_running(base);
    1755             :                 }
    1756             :         }
    1757          92 : }
    1758             : 
    1759          92 : static int collect_expired_timers(struct timer_base *base,
    1760             :                                   struct hlist_head *heads)
    1761             : {
    1762          92 :         unsigned long clk = base->clk = base->next_expiry;
    1763             :         struct hlist_head *vec;
    1764          92 :         int i, levels = 0;
    1765             :         unsigned int idx;
    1766             : 
    1767         126 :         for (i = 0; i < LVL_DEPTH; i++) {
    1768         126 :                 idx = (clk & LVL_MASK) + i * LVL_SIZE;
    1769             : 
    1770         252 :                 if (__test_and_clear_bit(idx, base->pending_map)) {
    1771          92 :                         vec = base->vectors + idx;
    1772         184 :                         hlist_move_list(vec, heads++);
    1773          92 :                         levels++;
    1774             :                 }
    1775             :                 /* Is it time to look at the next level? */
    1776         126 :                 if (clk & LVL_CLK_MASK)
    1777             :                         break;
    1778             :                 /* Shift clock for the next level granularity */
    1779          34 :                 clk >>= LVL_CLK_SHIFT;
    1780             :         }
    1781          92 :         return levels;
    1782             : }
    1783             : 
    1784             : /*
    1785             :  * Find the next pending bucket of a level. Search from level start (@offset)
    1786             :  * + @clk upwards and if nothing there, search from start of the level
    1787             :  * (@offset) up to @offset + clk.
    1788             :  */
    1789         623 : static int next_pending_bucket(struct timer_base *base, unsigned offset,
    1790             :                                unsigned clk)
    1791             : {
    1792         623 :         unsigned pos, start = offset + clk;
    1793         623 :         unsigned end = offset + LVL_SIZE;
    1794             : 
    1795         623 :         pos = find_next_bit(base->pending_map, end, start);
    1796         623 :         if (pos < end)
    1797         157 :                 return pos - start;
    1798             : 
    1799         466 :         pos = find_next_bit(base->pending_map, start, offset);
    1800         466 :         return pos < start ? pos + LVL_SIZE - start : -1;
    1801             : }
    1802             : 
    1803             : /*
    1804             :  * Search the first expiring timer in the various clock levels. Caller must
    1805             :  * hold base->lock.
    1806             :  */
    1807          92 : static unsigned long __next_timer_interrupt(struct timer_base *base)
    1808             : {
    1809             :         unsigned long clk, next, adj;
    1810          92 :         unsigned lvl, offset = 0;
    1811             : 
    1812          92 :         next = base->clk + NEXT_TIMER_MAX_DELTA;
    1813          92 :         clk = base->clk;
    1814         696 :         for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) {
    1815         623 :                 int pos = next_pending_bucket(base, offset, clk & LVL_MASK);
    1816         623 :                 unsigned long lvl_clk = clk & LVL_CLK_MASK;
    1817             : 
    1818         623 :                 if (pos >= 0) {
    1819         258 :                         unsigned long tmp = clk + (unsigned long) pos;
    1820             : 
    1821         258 :                         tmp <<= LVL_SHIFT(lvl);
    1822         258 :                         if (time_before(tmp, next))
    1823          98 :                                 next = tmp;
    1824             : 
    1825             :                         /*
    1826             :                          * If the next expiration happens before we reach
    1827             :                          * the next level, no need to check further.
    1828             :                          */
    1829         258 :                         if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK))
    1830             :                                 break;
    1831             :                 }
    1832             :                 /*
    1833             :                  * Clock for the next level. If the current level clock lower
    1834             :                  * bits are zero, we look at the next level as is. If not we
    1835             :                  * need to advance it by one because that's going to be the
    1836             :                  * next expiring bucket in that level. base->clk is the next
    1837             :                  * expiring jiffie. So in case of:
    1838             :                  *
    1839             :                  * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
    1840             :                  *  0    0    0    0    0    0
    1841             :                  *
    1842             :                  * we have to look at all levels @index 0. With
    1843             :                  *
    1844             :                  * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
    1845             :                  *  0    0    0    0    0    2
    1846             :                  *
    1847             :                  * LVL0 has the next expiring bucket @index 2. The upper
    1848             :                  * levels have the next expiring bucket @index 1.
    1849             :                  *
    1850             :                  * In case that the propagation wraps the next level the same
    1851             :                  * rules apply:
    1852             :                  *
    1853             :                  * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0
    1854             :                  *  0    0    0    0    F    2
    1855             :                  *
    1856             :                  * So after looking at LVL0 we get:
    1857             :                  *
    1858             :                  * LVL5 LVL4 LVL3 LVL2 LVL1
    1859             :                  *  0    0    0    1    0
    1860             :                  *
    1861             :                  * So no propagation from LVL1 to LVL2 because that happened
    1862             :                  * with the add already, but then we need to propagate further
    1863             :                  * from LVL2 to LVL3.
    1864             :                  *
    1865             :                  * So the simple check whether the lower bits of the current
    1866             :                  * level are 0 or not is sufficient for all cases.
    1867             :                  */
    1868         604 :                 adj = lvl_clk ? 1 : 0;
    1869         604 :                 clk >>= LVL_CLK_SHIFT;
    1870         604 :                 clk += adj;
    1871             :         }
    1872             : 
    1873          92 :         base->next_expiry_recalc = false;
    1874          92 :         base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA);
    1875             : 
    1876          92 :         return next;
    1877             : }
    1878             : 
    1879             : #ifdef CONFIG_NO_HZ_COMMON
    1880             : /*
    1881             :  * Check, if the next hrtimer event is before the next timer wheel
    1882             :  * event:
    1883             :  */
    1884             : static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
    1885             : {
    1886             :         u64 nextevt = hrtimer_get_next_event();
    1887             : 
    1888             :         /*
    1889             :          * If high resolution timers are enabled
    1890             :          * hrtimer_get_next_event() returns KTIME_MAX.
    1891             :          */
    1892             :         if (expires <= nextevt)
    1893             :                 return expires;
    1894             : 
    1895             :         /*
    1896             :          * If the next timer is already expired, return the tick base
    1897             :          * time so the tick is fired immediately.
    1898             :          */
    1899             :         if (nextevt <= basem)
    1900             :                 return basem;
    1901             : 
    1902             :         /*
    1903             :          * Round up to the next jiffie. High resolution timers are
    1904             :          * off, so the hrtimers are expired in the tick and we need to
    1905             :          * make sure that this tick really expires the timer to avoid
    1906             :          * a ping pong of the nohz stop code.
    1907             :          *
    1908             :          * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3
    1909             :          */
    1910             :         return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
    1911             : }
    1912             : 
    1913             : /**
    1914             :  * get_next_timer_interrupt - return the time (clock mono) of the next timer
    1915             :  * @basej:      base time jiffies
    1916             :  * @basem:      base time clock monotonic
    1917             :  *
    1918             :  * Returns the tick aligned clock monotonic time of the next pending
    1919             :  * timer or KTIME_MAX if no timer is pending.
    1920             :  */
    1921             : u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
    1922             : {
    1923             :         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
    1924             :         u64 expires = KTIME_MAX;
    1925             :         unsigned long nextevt;
    1926             : 
    1927             :         /*
    1928             :          * Pretend that there is no timer pending if the cpu is offline.
    1929             :          * Possible pending timers will be migrated later to an active cpu.
    1930             :          */
    1931             :         if (cpu_is_offline(smp_processor_id()))
    1932             :                 return expires;
    1933             : 
    1934             :         raw_spin_lock(&base->lock);
    1935             :         if (base->next_expiry_recalc)
    1936             :                 base->next_expiry = __next_timer_interrupt(base);
    1937             :         nextevt = base->next_expiry;
    1938             : 
    1939             :         /*
    1940             :          * We have a fresh next event. Check whether we can forward the
    1941             :          * base. We can only do that when @basej is past base->clk
    1942             :          * otherwise we might rewind base->clk.
    1943             :          */
    1944             :         if (time_after(basej, base->clk)) {
    1945             :                 if (time_after(nextevt, basej))
    1946             :                         base->clk = basej;
    1947             :                 else if (time_after(nextevt, base->clk))
    1948             :                         base->clk = nextevt;
    1949             :         }
    1950             : 
    1951             :         if (time_before_eq(nextevt, basej)) {
    1952             :                 expires = basem;
    1953             :                 base->is_idle = false;
    1954             :         } else {
    1955             :                 if (base->timers_pending)
    1956             :                         expires = basem + (u64)(nextevt - basej) * TICK_NSEC;
    1957             :                 /*
    1958             :                  * If we expect to sleep more than a tick, mark the base idle.
    1959             :                  * Also the tick is stopped so any added timer must forward
    1960             :                  * the base clk itself to keep granularity small. This idle
    1961             :                  * logic is only maintained for the BASE_STD base, deferrable
    1962             :                  * timers may still see large granularity skew (by design).
    1963             :                  */
    1964             :                 if ((expires - basem) > TICK_NSEC)
    1965             :                         base->is_idle = true;
    1966             :         }
    1967             :         raw_spin_unlock(&base->lock);
    1968             : 
    1969             :         return cmp_next_hrtimer_event(basem, expires);
    1970             : }
    1971             : 
    1972             : /**
    1973             :  * timer_clear_idle - Clear the idle state of the timer base
    1974             :  *
    1975             :  * Called with interrupts disabled
    1976             :  */
    1977             : void timer_clear_idle(void)
    1978             : {
    1979             :         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
    1980             : 
    1981             :         /*
    1982             :          * We do this unlocked. The worst outcome is a remote enqueue sending
    1983             :          * a pointless IPI, but taking the lock would just make the window for
    1984             :          * sending the IPI a few instructions smaller for the cost of taking
    1985             :          * the lock in the exit from idle path.
    1986             :          */
    1987             :         base->is_idle = false;
    1988             : }
    1989             : #endif
    1990             : 
    1991             : /**
    1992             :  * __run_timers - run all expired timers (if any) on this CPU.
    1993             :  * @base: the timer vector to be processed.
    1994             :  */
    1995          92 : static inline void __run_timers(struct timer_base *base)
    1996             : {
    1997             :         struct hlist_head heads[LVL_DEPTH];
    1998             :         int levels;
    1999             : 
    2000          92 :         if (time_before(jiffies, base->next_expiry))
    2001           0 :                 return;
    2002             : 
    2003          92 :         timer_base_lock_expiry(base);
    2004          92 :         raw_spin_lock_irq(&base->lock);
    2005             : 
    2006         276 :         while (time_after_eq(jiffies, base->clk) &&
    2007          92 :                time_after_eq(jiffies, base->next_expiry)) {
    2008          92 :                 levels = collect_expired_timers(base, heads);
    2009             :                 /*
    2010             :                  * The two possible reasons for not finding any expired
    2011             :                  * timer at this clk are that all matching timers have been
    2012             :                  * dequeued or no timer has been queued since
    2013             :                  * base::next_expiry was set to base::clk +
    2014             :                  * NEXT_TIMER_MAX_DELTA.
    2015             :                  */
    2016          92 :                 WARN_ON_ONCE(!levels && !base->next_expiry_recalc
    2017             :                              && base->timers_pending);
    2018          92 :                 base->clk++;
    2019          92 :                 base->next_expiry = __next_timer_interrupt(base);
    2020             : 
    2021         276 :                 while (levels--)
    2022          92 :                         expire_timers(base, heads + levels);
    2023             :         }
    2024          92 :         raw_spin_unlock_irq(&base->lock);
    2025          92 :         timer_base_unlock_expiry(base);
    2026             : }
    2027             : 
    2028             : /*
    2029             :  * This function runs timers and the timer-tq in bottom half context.
    2030             :  */
    2031          92 : static __latent_entropy void run_timer_softirq(struct softirq_action *h)
    2032             : {
    2033          92 :         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
    2034             : 
    2035          92 :         __run_timers(base);
    2036             :         if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
    2037             :                 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
    2038          92 : }
    2039             : 
    2040             : /*
    2041             :  * Called by the local, per-CPU timer interrupt on SMP.
    2042             :  */
    2043        2943 : static void run_local_timers(void)
    2044             : {
    2045        2943 :         struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
    2046             : 
    2047        2943 :         hrtimer_run_queues();
    2048             :         /* Raise the softirq only if required. */
    2049        2943 :         if (time_before(jiffies, base->next_expiry)) {
    2050             :                 if (!IS_ENABLED(CONFIG_NO_HZ_COMMON))
    2051             :                         return;
    2052             :                 /* CPU is awake, so check the deferrable base. */
    2053             :                 base++;
    2054             :                 if (time_before(jiffies, base->next_expiry))
    2055             :                         return;
    2056             :         }
    2057          92 :         raise_softirq(TIMER_SOFTIRQ);
    2058             : }
    2059             : 
    2060             : /*
    2061             :  * Called from the timer interrupt handler to charge one tick to the current
    2062             :  * process.  user_tick is 1 if the tick is user time, 0 for system.
    2063             :  */
    2064        2943 : void update_process_times(int user_tick)
    2065             : {
    2066        2943 :         struct task_struct *p = current;
    2067             : 
    2068             :         /* Note: this timer irq context must be accounted for as well. */
    2069        2943 :         account_process_tick(p, user_tick);
    2070        2943 :         run_local_timers();
    2071        2943 :         rcu_sched_clock_irq(user_tick);
    2072             : #ifdef CONFIG_IRQ_WORK
    2073        2943 :         if (in_irq())
    2074        2943 :                 irq_work_tick();
    2075             : #endif
    2076        2943 :         scheduler_tick();
    2077             :         if (IS_ENABLED(CONFIG_POSIX_TIMERS))
    2078        2943 :                 run_posix_cpu_timers();
    2079        2943 : }
    2080             : 
    2081             : /*
    2082             :  * Since schedule_timeout()'s timer is defined on the stack, it must store
    2083             :  * the target task on the stack as well.
    2084             :  */
    2085             : struct process_timer {
    2086             :         struct timer_list timer;
    2087             :         struct task_struct *task;
    2088             : };
    2089             : 
    2090          54 : static void process_timeout(struct timer_list *t)
    2091             : {
    2092          54 :         struct process_timer *timeout = from_timer(timeout, t, timer);
    2093             : 
    2094          54 :         wake_up_process(timeout->task);
    2095          54 : }
    2096             : 
    2097             : /**
    2098             :  * schedule_timeout - sleep until timeout
    2099             :  * @timeout: timeout value in jiffies
    2100             :  *
    2101             :  * Make the current task sleep until @timeout jiffies have elapsed.
    2102             :  * The function behavior depends on the current task state
    2103             :  * (see also set_current_state() description):
    2104             :  *
    2105             :  * %TASK_RUNNING - the scheduler is called, but the task does not sleep
    2106             :  * at all. That happens because sched_submit_work() does nothing for
    2107             :  * tasks in %TASK_RUNNING state.
    2108             :  *
    2109             :  * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
    2110             :  * pass before the routine returns unless the current task is explicitly
    2111             :  * woken up, (e.g. by wake_up_process()).
    2112             :  *
    2113             :  * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
    2114             :  * delivered to the current task or the current task is explicitly woken
    2115             :  * up.
    2116             :  *
    2117             :  * The current task state is guaranteed to be %TASK_RUNNING when this
    2118             :  * routine returns.
    2119             :  *
    2120             :  * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
    2121             :  * the CPU away without a bound on the timeout. In this case the return
    2122             :  * value will be %MAX_SCHEDULE_TIMEOUT.
    2123             :  *
    2124             :  * Returns 0 when the timer has expired otherwise the remaining time in
    2125             :  * jiffies will be returned. In all cases the return value is guaranteed
    2126             :  * to be non-negative.
    2127             :  */
    2128         828 : signed long __sched schedule_timeout(signed long timeout)
    2129             : {
    2130             :         struct process_timer timer;
    2131             :         unsigned long expire;
    2132             : 
    2133         828 :         switch (timeout)
    2134             :         {
    2135             :         case MAX_SCHEDULE_TIMEOUT:
    2136             :                 /*
    2137             :                  * These two special cases are useful to be comfortable
    2138             :                  * in the caller. Nothing more. We could take
    2139             :                  * MAX_SCHEDULE_TIMEOUT from one of the negative value
    2140             :                  * but I' d like to return a valid offset (>=0) to allow
    2141             :                  * the caller to do everything it want with the retval.
    2142             :                  */
    2143         406 :                 schedule();
    2144         406 :                 goto out;
    2145             :         default:
    2146             :                 /*
    2147             :                  * Another bit of PARANOID. Note that the retval will be
    2148             :                  * 0 since no piece of kernel is supposed to do a check
    2149             :                  * for a negative retval of schedule_timeout() (since it
    2150             :                  * should never happens anyway). You just have the printk()
    2151             :                  * that will tell you if something is gone wrong and where.
    2152             :                  */
    2153         422 :                 if (timeout < 0) {
    2154           0 :                         printk(KERN_ERR "schedule_timeout: wrong timeout "
    2155             :                                 "value %lx\n", timeout);
    2156           0 :                         dump_stack();
    2157           0 :                         __set_current_state(TASK_RUNNING);
    2158           0 :                         goto out;
    2159             :                 }
    2160             :         }
    2161             : 
    2162         422 :         expire = timeout + jiffies;
    2163             : 
    2164         422 :         timer.task = current;
    2165         844 :         timer_setup_on_stack(&timer.timer, process_timeout, 0);
    2166         422 :         __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING);
    2167         422 :         schedule();
    2168         421 :         del_timer_sync(&timer.timer);
    2169             : 
    2170             :         /* Remove the timer from the object tracker */
    2171         421 :         destroy_timer_on_stack(&timer.timer);
    2172             : 
    2173         421 :         timeout = expire - jiffies;
    2174             : 
    2175             :  out:
    2176         827 :         return timeout < 0 ? 0 : timeout;
    2177             : }
    2178             : EXPORT_SYMBOL(schedule_timeout);
    2179             : 
    2180             : /*
    2181             :  * We can use __set_current_state() here because schedule_timeout() calls
    2182             :  * schedule() unconditionally.
    2183             :  */
    2184           0 : signed long __sched schedule_timeout_interruptible(signed long timeout)
    2185             : {
    2186           0 :         __set_current_state(TASK_INTERRUPTIBLE);
    2187           0 :         return schedule_timeout(timeout);
    2188             : }
    2189             : EXPORT_SYMBOL(schedule_timeout_interruptible);
    2190             : 
    2191           0 : signed long __sched schedule_timeout_killable(signed long timeout)
    2192             : {
    2193           0 :         __set_current_state(TASK_KILLABLE);
    2194           0 :         return schedule_timeout(timeout);
    2195             : }
    2196             : EXPORT_SYMBOL(schedule_timeout_killable);
    2197             : 
    2198           0 : signed long __sched schedule_timeout_uninterruptible(signed long timeout)
    2199             : {
    2200           0 :         __set_current_state(TASK_UNINTERRUPTIBLE);
    2201           0 :         return schedule_timeout(timeout);
    2202             : }
    2203             : EXPORT_SYMBOL(schedule_timeout_uninterruptible);
    2204             : 
    2205             : /*
    2206             :  * Like schedule_timeout_uninterruptible(), except this task will not contribute
    2207             :  * to load average.
    2208             :  */
    2209           0 : signed long __sched schedule_timeout_idle(signed long timeout)
    2210             : {
    2211           0 :         __set_current_state(TASK_IDLE);
    2212           0 :         return schedule_timeout(timeout);
    2213             : }
    2214             : EXPORT_SYMBOL(schedule_timeout_idle);
    2215             : 
    2216             : #ifdef CONFIG_HOTPLUG_CPU
    2217             : static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head)
    2218             : {
    2219             :         struct timer_list *timer;
    2220             :         int cpu = new_base->cpu;
    2221             : 
    2222             :         while (!hlist_empty(head)) {
    2223             :                 timer = hlist_entry(head->first, struct timer_list, entry);
    2224             :                 detach_timer(timer, false);
    2225             :                 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu;
    2226             :                 internal_add_timer(new_base, timer);
    2227             :         }
    2228             : }
    2229             : 
    2230             : int timers_prepare_cpu(unsigned int cpu)
    2231             : {
    2232             :         struct timer_base *base;
    2233             :         int b;
    2234             : 
    2235             :         for (b = 0; b < NR_BASES; b++) {
    2236             :                 base = per_cpu_ptr(&timer_bases[b], cpu);
    2237             :                 base->clk = jiffies;
    2238             :                 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
    2239             :                 base->next_expiry_recalc = false;
    2240             :                 base->timers_pending = false;
    2241             :                 base->is_idle = false;
    2242             :         }
    2243             :         return 0;
    2244             : }
    2245             : 
    2246             : int timers_dead_cpu(unsigned int cpu)
    2247             : {
    2248             :         struct timer_base *old_base;
    2249             :         struct timer_base *new_base;
    2250             :         int b, i;
    2251             : 
    2252             :         for (b = 0; b < NR_BASES; b++) {
    2253             :                 old_base = per_cpu_ptr(&timer_bases[b], cpu);
    2254             :                 new_base = get_cpu_ptr(&timer_bases[b]);
    2255             :                 /*
    2256             :                  * The caller is globally serialized and nobody else
    2257             :                  * takes two locks at once, deadlock is not possible.
    2258             :                  */
    2259             :                 raw_spin_lock_irq(&new_base->lock);
    2260             :                 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
    2261             : 
    2262             :                 /*
    2263             :                  * The current CPUs base clock might be stale. Update it
    2264             :                  * before moving the timers over.
    2265             :                  */
    2266             :                 forward_timer_base(new_base);
    2267             : 
    2268             :                 WARN_ON_ONCE(old_base->running_timer);
    2269             :                 old_base->running_timer = NULL;
    2270             : 
    2271             :                 for (i = 0; i < WHEEL_SIZE; i++)
    2272             :                         migrate_timer_list(new_base, old_base->vectors + i);
    2273             : 
    2274             :                 raw_spin_unlock(&old_base->lock);
    2275             :                 raw_spin_unlock_irq(&new_base->lock);
    2276             :                 put_cpu_ptr(&timer_bases);
    2277             :         }
    2278             :         return 0;
    2279             : }
    2280             : 
    2281             : #endif /* CONFIG_HOTPLUG_CPU */
    2282             : 
    2283           1 : static void __init init_timer_cpu(int cpu)
    2284             : {
    2285             :         struct timer_base *base;
    2286             :         int i;
    2287             : 
    2288           2 :         for (i = 0; i < NR_BASES; i++) {
    2289           1 :                 base = per_cpu_ptr(&timer_bases[i], cpu);
    2290           1 :                 base->cpu = cpu;
    2291             :                 raw_spin_lock_init(&base->lock);
    2292           1 :                 base->clk = jiffies;
    2293           1 :                 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
    2294           1 :                 timer_base_init_expiry_lock(base);
    2295             :         }
    2296           1 : }
    2297             : 
    2298           1 : static void __init init_timer_cpus(void)
    2299             : {
    2300             :         int cpu;
    2301             : 
    2302           2 :         for_each_possible_cpu(cpu)
    2303           1 :                 init_timer_cpu(cpu);
    2304           1 : }
    2305             : 
    2306           1 : void __init init_timers(void)
    2307             : {
    2308           1 :         init_timer_cpus();
    2309             :         posix_cputimers_init_work();
    2310           1 :         open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
    2311           1 : }
    2312             : 
    2313             : /**
    2314             :  * msleep - sleep safely even with waitqueue interruptions
    2315             :  * @msecs: Time in milliseconds to sleep for
    2316             :  */
    2317           0 : void msleep(unsigned int msecs)
    2318             : {
    2319           0 :         unsigned long timeout = msecs_to_jiffies(msecs) + 1;
    2320             : 
    2321           0 :         while (timeout)
    2322           0 :                 timeout = schedule_timeout_uninterruptible(timeout);
    2323           0 : }
    2324             : 
    2325             : EXPORT_SYMBOL(msleep);
    2326             : 
    2327             : /**
    2328             :  * msleep_interruptible - sleep waiting for signals
    2329             :  * @msecs: Time in milliseconds to sleep for
    2330             :  */
    2331           0 : unsigned long msleep_interruptible(unsigned int msecs)
    2332             : {
    2333           0 :         unsigned long timeout = msecs_to_jiffies(msecs) + 1;
    2334             : 
    2335           0 :         while (timeout && !signal_pending(current))
    2336           0 :                 timeout = schedule_timeout_interruptible(timeout);
    2337           0 :         return jiffies_to_msecs(timeout);
    2338             : }
    2339             : 
    2340             : EXPORT_SYMBOL(msleep_interruptible);
    2341             : 
    2342             : /**
    2343             :  * usleep_range_state - Sleep for an approximate time in a given state
    2344             :  * @min:        Minimum time in usecs to sleep
    2345             :  * @max:        Maximum time in usecs to sleep
    2346             :  * @state:      State of the current task that will be while sleeping
    2347             :  *
    2348             :  * In non-atomic context where the exact wakeup time is flexible, use
    2349             :  * usleep_range_state() instead of udelay().  The sleep improves responsiveness
    2350             :  * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
    2351             :  * power usage by allowing hrtimers to take advantage of an already-
    2352             :  * scheduled interrupt instead of scheduling a new one just for this sleep.
    2353             :  */
    2354           0 : void __sched usleep_range_state(unsigned long min, unsigned long max,
    2355             :                                 unsigned int state)
    2356             : {
    2357           0 :         ktime_t exp = ktime_add_us(ktime_get(), min);
    2358           0 :         u64 delta = (u64)(max - min) * NSEC_PER_USEC;
    2359             : 
    2360             :         for (;;) {
    2361           0 :                 __set_current_state(state);
    2362             :                 /* Do not return before the requested sleep time has elapsed */
    2363           0 :                 if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
    2364             :                         break;
    2365             :         }
    2366           0 : }
    2367             : EXPORT_SYMBOL(usleep_range_state);

Generated by: LCOV version 1.14