LCOV - code coverage report
Current view: top level - kernel/time - clocksource.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 94 250 37.6 %
Date: 2023-07-19 18:55:55 Functions: 7 27 25.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * This file contains the functions which manage clocksource drivers.
       4             :  *
       5             :  * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
       6             :  */
       7             : 
       8             : #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
       9             : 
      10             : #include <linux/device.h>
      11             : #include <linux/clocksource.h>
      12             : #include <linux/init.h>
      13             : #include <linux/module.h>
      14             : #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
      15             : #include <linux/tick.h>
      16             : #include <linux/kthread.h>
      17             : #include <linux/prandom.h>
      18             : #include <linux/cpu.h>
      19             : 
      20             : #include "tick-internal.h"
      21             : #include "timekeeping_internal.h"
      22             : 
      23             : /**
      24             :  * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
      25             :  * @mult:       pointer to mult variable
      26             :  * @shift:      pointer to shift variable
      27             :  * @from:       frequency to convert from
      28             :  * @to:         frequency to convert to
      29             :  * @maxsec:     guaranteed runtime conversion range in seconds
      30             :  *
      31             :  * The function evaluates the shift/mult pair for the scaled math
      32             :  * operations of clocksources and clockevents.
      33             :  *
      34             :  * @to and @from are frequency values in HZ. For clock sources @to is
      35             :  * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
      36             :  * event @to is the counter frequency and @from is NSEC_PER_SEC.
      37             :  *
      38             :  * The @maxsec conversion range argument controls the time frame in
      39             :  * seconds which must be covered by the runtime conversion with the
      40             :  * calculated mult and shift factors. This guarantees that no 64bit
      41             :  * overflow happens when the input value of the conversion is
      42             :  * multiplied with the calculated mult factor. Larger ranges may
      43             :  * reduce the conversion accuracy by choosing smaller mult and shift
      44             :  * factors.
      45             :  */
      46             : void
      47           0 : clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
      48             : {
      49             :         u64 tmp;
      50           1 :         u32 sft, sftacc= 32;
      51             : 
      52             :         /*
      53             :          * Calculate the shift factor which is limiting the conversion
      54             :          * range:
      55             :          */
      56           1 :         tmp = ((u64)maxsec * from) >> 32;
      57           1 :         while (tmp) {
      58           0 :                 tmp >>=1;
      59           0 :                 sftacc--;
      60             :         }
      61             : 
      62             :         /*
      63             :          * Find the conversion shift/mult pair which has the best
      64             :          * accuracy and fits the maxsec conversion range:
      65             :          */
      66           9 :         for (sft = 32; sft > 0; sft--) {
      67          10 :                 tmp = (u64) to << sft;
      68          10 :                 tmp += from / 2;
      69          10 :                 do_div(tmp, from);
      70          10 :                 if ((tmp >> sftacc) == 0)
      71             :                         break;
      72             :         }
      73           1 :         *mult = tmp;
      74           1 :         *shift = sft;
      75           0 : }
      76             : EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
      77             : 
      78             : /*[Clocksource internal variables]---------
      79             :  * curr_clocksource:
      80             :  *      currently selected clocksource.
      81             :  * suspend_clocksource:
      82             :  *      used to calculate the suspend time.
      83             :  * clocksource_list:
      84             :  *      linked list with the registered clocksources
      85             :  * clocksource_mutex:
      86             :  *      protects manipulations to curr_clocksource and the clocksource_list
      87             :  * override_name:
      88             :  *      Name of the user-specified clocksource.
      89             :  */
      90             : static struct clocksource *curr_clocksource;
      91             : static struct clocksource *suspend_clocksource;
      92             : static LIST_HEAD(clocksource_list);
      93             : static DEFINE_MUTEX(clocksource_mutex);
      94             : static char override_name[CS_NAME_LEN];
      95             : static int finished_booting;
      96             : static u64 suspend_start;
      97             : 
      98             : /*
      99             :  * Interval: 0.5sec.
     100             :  */
     101             : #define WATCHDOG_INTERVAL (HZ >> 1)
     102             : 
     103             : /*
     104             :  * Threshold: 0.0312s, when doubled: 0.0625s.
     105             :  * Also a default for cs->uncertainty_margin when registering clocks.
     106             :  */
     107             : #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
     108             : 
     109             : /*
     110             :  * Maximum permissible delay between two readouts of the watchdog
     111             :  * clocksource surrounding a read of the clocksource being validated.
     112             :  * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
     113             :  * a lower bound for cs->uncertainty_margin values when registering clocks.
     114             :  *
     115             :  * The default of 500 parts per million is based on NTP's limits.
     116             :  * If a clocksource is good enough for NTP, it is good enough for us!
     117             :  */
     118             : #ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
     119             : #define MAX_SKEW_USEC   CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
     120             : #else
     121             : #define MAX_SKEW_USEC   (125 * WATCHDOG_INTERVAL / HZ)
     122             : #endif
     123             : 
     124             : #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
     125             : 
     126             : #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
     127             : static void clocksource_watchdog_work(struct work_struct *work);
     128             : static void clocksource_select(void);
     129             : 
     130             : static LIST_HEAD(watchdog_list);
     131             : static struct clocksource *watchdog;
     132             : static struct timer_list watchdog_timer;
     133             : static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
     134             : static DEFINE_SPINLOCK(watchdog_lock);
     135             : static int watchdog_running;
     136             : static atomic_t watchdog_reset_pending;
     137             : 
     138             : static inline void clocksource_watchdog_lock(unsigned long *flags)
     139             : {
     140             :         spin_lock_irqsave(&watchdog_lock, *flags);
     141             : }
     142             : 
     143             : static inline void clocksource_watchdog_unlock(unsigned long *flags)
     144             : {
     145             :         spin_unlock_irqrestore(&watchdog_lock, *flags);
     146             : }
     147             : 
     148             : static int clocksource_watchdog_kthread(void *data);
     149             : static void __clocksource_change_rating(struct clocksource *cs, int rating);
     150             : 
     151             : static void clocksource_watchdog_work(struct work_struct *work)
     152             : {
     153             :         /*
     154             :          * We cannot directly run clocksource_watchdog_kthread() here, because
     155             :          * clocksource_select() calls timekeeping_notify() which uses
     156             :          * stop_machine(). One cannot use stop_machine() from a workqueue() due
     157             :          * lock inversions wrt CPU hotplug.
     158             :          *
     159             :          * Also, we only ever run this work once or twice during the lifetime
     160             :          * of the kernel, so there is no point in creating a more permanent
     161             :          * kthread for this.
     162             :          *
     163             :          * If kthread_run fails the next watchdog scan over the
     164             :          * watchdog_list will find the unstable clock again.
     165             :          */
     166             :         kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
     167             : }
     168             : 
     169             : static void __clocksource_unstable(struct clocksource *cs)
     170             : {
     171             :         cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
     172             :         cs->flags |= CLOCK_SOURCE_UNSTABLE;
     173             : 
     174             :         /*
     175             :          * If the clocksource is registered clocksource_watchdog_kthread() will
     176             :          * re-rate and re-select.
     177             :          */
     178             :         if (list_empty(&cs->list)) {
     179             :                 cs->rating = 0;
     180             :                 return;
     181             :         }
     182             : 
     183             :         if (cs->mark_unstable)
     184             :                 cs->mark_unstable(cs);
     185             : 
     186             :         /* kick clocksource_watchdog_kthread() */
     187             :         if (finished_booting)
     188             :                 schedule_work(&watchdog_work);
     189             : }
     190             : 
     191             : /**
     192             :  * clocksource_mark_unstable - mark clocksource unstable via watchdog
     193             :  * @cs:         clocksource to be marked unstable
     194             :  *
     195             :  * This function is called by the x86 TSC code to mark clocksources as unstable;
     196             :  * it defers demotion and re-selection to a kthread.
     197             :  */
     198             : void clocksource_mark_unstable(struct clocksource *cs)
     199             : {
     200             :         unsigned long flags;
     201             : 
     202             :         spin_lock_irqsave(&watchdog_lock, flags);
     203             :         if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
     204             :                 if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
     205             :                         list_add(&cs->wd_list, &watchdog_list);
     206             :                 __clocksource_unstable(cs);
     207             :         }
     208             :         spin_unlock_irqrestore(&watchdog_lock, flags);
     209             : }
     210             : 
     211             : ulong max_cswd_read_retries = 2;
     212             : module_param(max_cswd_read_retries, ulong, 0644);
     213             : EXPORT_SYMBOL_GPL(max_cswd_read_retries);
     214             : static int verify_n_cpus = 8;
     215             : module_param(verify_n_cpus, int, 0644);
     216             : 
     217             : enum wd_read_status {
     218             :         WD_READ_SUCCESS,
     219             :         WD_READ_UNSTABLE,
     220             :         WD_READ_SKIP
     221             : };
     222             : 
     223             : static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
     224             : {
     225             :         unsigned int nretries;
     226             :         u64 wd_end, wd_end2, wd_delta;
     227             :         int64_t wd_delay, wd_seq_delay;
     228             : 
     229             :         for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
     230             :                 local_irq_disable();
     231             :                 *wdnow = watchdog->read(watchdog);
     232             :                 *csnow = cs->read(cs);
     233             :                 wd_end = watchdog->read(watchdog);
     234             :                 wd_end2 = watchdog->read(watchdog);
     235             :                 local_irq_enable();
     236             : 
     237             :                 wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
     238             :                 wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
     239             :                                               watchdog->shift);
     240             :                 if (wd_delay <= WATCHDOG_MAX_SKEW) {
     241             :                         if (nretries > 1 || nretries >= max_cswd_read_retries) {
     242             :                                 pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
     243             :                                         smp_processor_id(), watchdog->name, nretries);
     244             :                         }
     245             :                         return WD_READ_SUCCESS;
     246             :                 }
     247             : 
     248             :                 /*
     249             :                  * Now compute delay in consecutive watchdog read to see if
     250             :                  * there is too much external interferences that cause
     251             :                  * significant delay in reading both clocksource and watchdog.
     252             :                  *
     253             :                  * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2,
     254             :                  * report system busy, reinit the watchdog and skip the current
     255             :                  * watchdog test.
     256             :                  */
     257             :                 wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
     258             :                 wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
     259             :                 if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
     260             :                         goto skip_test;
     261             :         }
     262             : 
     263             :         pr_warn("timekeeping watchdog on CPU%d: wd-%s-wd excessive read-back delay of %lldns vs. limit of %ldns, wd-wd read-back delay only %lldns, attempt %d, marking %s unstable\n",
     264             :                 smp_processor_id(), cs->name, wd_delay, WATCHDOG_MAX_SKEW, wd_seq_delay, nretries, cs->name);
     265             :         return WD_READ_UNSTABLE;
     266             : 
     267             : skip_test:
     268             :         pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
     269             :                 smp_processor_id(), watchdog->name, wd_seq_delay);
     270             :         pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
     271             :                 cs->name, wd_delay);
     272             :         return WD_READ_SKIP;
     273             : }
     274             : 
     275             : static u64 csnow_mid;
     276             : static cpumask_t cpus_ahead;
     277             : static cpumask_t cpus_behind;
     278             : static cpumask_t cpus_chosen;
     279             : 
     280             : static void clocksource_verify_choose_cpus(void)
     281             : {
     282             :         int cpu, i, n = verify_n_cpus;
     283             : 
     284             :         if (n < 0) {
     285             :                 /* Check all of the CPUs. */
     286             :                 cpumask_copy(&cpus_chosen, cpu_online_mask);
     287             :                 cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
     288             :                 return;
     289             :         }
     290             : 
     291             :         /* If no checking desired, or no other CPU to check, leave. */
     292             :         cpumask_clear(&cpus_chosen);
     293             :         if (n == 0 || num_online_cpus() <= 1)
     294             :                 return;
     295             : 
     296             :         /* Make sure to select at least one CPU other than the current CPU. */
     297             :         cpu = cpumask_first(cpu_online_mask);
     298             :         if (cpu == smp_processor_id())
     299             :                 cpu = cpumask_next(cpu, cpu_online_mask);
     300             :         if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
     301             :                 return;
     302             :         cpumask_set_cpu(cpu, &cpus_chosen);
     303             : 
     304             :         /* Force a sane value for the boot parameter. */
     305             :         if (n > nr_cpu_ids)
     306             :                 n = nr_cpu_ids;
     307             : 
     308             :         /*
     309             :          * Randomly select the specified number of CPUs.  If the same
     310             :          * CPU is selected multiple times, that CPU is checked only once,
     311             :          * and no replacement CPU is selected.  This gracefully handles
     312             :          * situations where verify_n_cpus is greater than the number of
     313             :          * CPUs that are currently online.
     314             :          */
     315             :         for (i = 1; i < n; i++) {
     316             :                 cpu = get_random_u32_below(nr_cpu_ids);
     317             :                 cpu = cpumask_next(cpu - 1, cpu_online_mask);
     318             :                 if (cpu >= nr_cpu_ids)
     319             :                         cpu = cpumask_first(cpu_online_mask);
     320             :                 if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
     321             :                         cpumask_set_cpu(cpu, &cpus_chosen);
     322             :         }
     323             : 
     324             :         /* Don't verify ourselves. */
     325             :         cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
     326             : }
     327             : 
     328             : static void clocksource_verify_one_cpu(void *csin)
     329             : {
     330             :         struct clocksource *cs = (struct clocksource *)csin;
     331             : 
     332             :         csnow_mid = cs->read(cs);
     333             : }
     334             : 
     335             : void clocksource_verify_percpu(struct clocksource *cs)
     336             : {
     337             :         int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
     338             :         u64 csnow_begin, csnow_end;
     339             :         int cpu, testcpu;
     340             :         s64 delta;
     341             : 
     342             :         if (verify_n_cpus == 0)
     343             :                 return;
     344             :         cpumask_clear(&cpus_ahead);
     345             :         cpumask_clear(&cpus_behind);
     346             :         cpus_read_lock();
     347             :         preempt_disable();
     348             :         clocksource_verify_choose_cpus();
     349             :         if (cpumask_empty(&cpus_chosen)) {
     350             :                 preempt_enable();
     351             :                 cpus_read_unlock();
     352             :                 pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
     353             :                 return;
     354             :         }
     355             :         testcpu = smp_processor_id();
     356             :         pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
     357             :         for_each_cpu(cpu, &cpus_chosen) {
     358             :                 if (cpu == testcpu)
     359             :                         continue;
     360             :                 csnow_begin = cs->read(cs);
     361             :                 smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
     362             :                 csnow_end = cs->read(cs);
     363             :                 delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
     364             :                 if (delta < 0)
     365             :                         cpumask_set_cpu(cpu, &cpus_behind);
     366             :                 delta = (csnow_end - csnow_mid) & cs->mask;
     367             :                 if (delta < 0)
     368             :                         cpumask_set_cpu(cpu, &cpus_ahead);
     369             :                 delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
     370             :                 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
     371             :                 if (cs_nsec > cs_nsec_max)
     372             :                         cs_nsec_max = cs_nsec;
     373             :                 if (cs_nsec < cs_nsec_min)
     374             :                         cs_nsec_min = cs_nsec;
     375             :         }
     376             :         preempt_enable();
     377             :         cpus_read_unlock();
     378             :         if (!cpumask_empty(&cpus_ahead))
     379             :                 pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
     380             :                         cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
     381             :         if (!cpumask_empty(&cpus_behind))
     382             :                 pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
     383             :                         cpumask_pr_args(&cpus_behind), testcpu, cs->name);
     384             :         if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
     385             :                 pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
     386             :                         testcpu, cs_nsec_min, cs_nsec_max, cs->name);
     387             : }
     388             : EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
     389             : 
     390             : static inline void clocksource_reset_watchdog(void)
     391             : {
     392             :         struct clocksource *cs;
     393             : 
     394             :         list_for_each_entry(cs, &watchdog_list, wd_list)
     395             :                 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
     396             : }
     397             : 
     398             : 
     399             : static void clocksource_watchdog(struct timer_list *unused)
     400             : {
     401             :         u64 csnow, wdnow, cslast, wdlast, delta;
     402             :         int next_cpu, reset_pending;
     403             :         int64_t wd_nsec, cs_nsec;
     404             :         struct clocksource *cs;
     405             :         enum wd_read_status read_ret;
     406             :         unsigned long extra_wait = 0;
     407             :         u32 md;
     408             : 
     409             :         spin_lock(&watchdog_lock);
     410             :         if (!watchdog_running)
     411             :                 goto out;
     412             : 
     413             :         reset_pending = atomic_read(&watchdog_reset_pending);
     414             : 
     415             :         list_for_each_entry(cs, &watchdog_list, wd_list) {
     416             : 
     417             :                 /* Clocksource already marked unstable? */
     418             :                 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
     419             :                         if (finished_booting)
     420             :                                 schedule_work(&watchdog_work);
     421             :                         continue;
     422             :                 }
     423             : 
     424             :                 read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
     425             : 
     426             :                 if (read_ret == WD_READ_UNSTABLE) {
     427             :                         /* Clock readout unreliable, so give it up. */
     428             :                         __clocksource_unstable(cs);
     429             :                         continue;
     430             :                 }
     431             : 
     432             :                 /*
     433             :                  * When WD_READ_SKIP is returned, it means the system is likely
     434             :                  * under very heavy load, where the latency of reading
     435             :                  * watchdog/clocksource is very big, and affect the accuracy of
     436             :                  * watchdog check. So give system some space and suspend the
     437             :                  * watchdog check for 5 minutes.
     438             :                  */
     439             :                 if (read_ret == WD_READ_SKIP) {
     440             :                         /*
     441             :                          * As the watchdog timer will be suspended, and
     442             :                          * cs->last could keep unchanged for 5 minutes, reset
     443             :                          * the counters.
     444             :                          */
     445             :                         clocksource_reset_watchdog();
     446             :                         extra_wait = HZ * 300;
     447             :                         break;
     448             :                 }
     449             : 
     450             :                 /* Clocksource initialized ? */
     451             :                 if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
     452             :                     atomic_read(&watchdog_reset_pending)) {
     453             :                         cs->flags |= CLOCK_SOURCE_WATCHDOG;
     454             :                         cs->wd_last = wdnow;
     455             :                         cs->cs_last = csnow;
     456             :                         continue;
     457             :                 }
     458             : 
     459             :                 delta = clocksource_delta(wdnow, cs->wd_last, watchdog->mask);
     460             :                 wd_nsec = clocksource_cyc2ns(delta, watchdog->mult,
     461             :                                              watchdog->shift);
     462             : 
     463             :                 delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
     464             :                 cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
     465             :                 wdlast = cs->wd_last; /* save these in case we print them */
     466             :                 cslast = cs->cs_last;
     467             :                 cs->cs_last = csnow;
     468             :                 cs->wd_last = wdnow;
     469             : 
     470             :                 if (atomic_read(&watchdog_reset_pending))
     471             :                         continue;
     472             : 
     473             :                 /* Check the deviation from the watchdog clocksource. */
     474             :                 md = cs->uncertainty_margin + watchdog->uncertainty_margin;
     475             :                 if (abs(cs_nsec - wd_nsec) > md) {
     476             :                         u64 cs_wd_msec;
     477             :                         u64 wd_msec;
     478             :                         u32 wd_rem;
     479             : 
     480             :                         pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
     481             :                                 smp_processor_id(), cs->name);
     482             :                         pr_warn("                      '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
     483             :                                 watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
     484             :                         pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
     485             :                                 cs->name, cs_nsec, csnow, cslast, cs->mask);
     486             :                         cs_wd_msec = div_u64_rem(cs_nsec - wd_nsec, 1000U * 1000U, &wd_rem);
     487             :                         wd_msec = div_u64_rem(wd_nsec, 1000U * 1000U, &wd_rem);
     488             :                         pr_warn("                      Clocksource '%s' skewed %lld ns (%lld ms) over watchdog '%s' interval of %lld ns (%lld ms)\n",
     489             :                                 cs->name, cs_nsec - wd_nsec, cs_wd_msec, watchdog->name, wd_nsec, wd_msec);
     490             :                         if (curr_clocksource == cs)
     491             :                                 pr_warn("                      '%s' is current clocksource.\n", cs->name);
     492             :                         else if (curr_clocksource)
     493             :                                 pr_warn("                      '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
     494             :                         else
     495             :                                 pr_warn("                      No current clocksource.\n");
     496             :                         __clocksource_unstable(cs);
     497             :                         continue;
     498             :                 }
     499             : 
     500             :                 if (cs == curr_clocksource && cs->tick_stable)
     501             :                         cs->tick_stable(cs);
     502             : 
     503             :                 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
     504             :                     (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
     505             :                     (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
     506             :                         /* Mark it valid for high-res. */
     507             :                         cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
     508             : 
     509             :                         /*
     510             :                          * clocksource_done_booting() will sort it if
     511             :                          * finished_booting is not set yet.
     512             :                          */
     513             :                         if (!finished_booting)
     514             :                                 continue;
     515             : 
     516             :                         /*
     517             :                          * If this is not the current clocksource let
     518             :                          * the watchdog thread reselect it. Due to the
     519             :                          * change to high res this clocksource might
     520             :                          * be preferred now. If it is the current
     521             :                          * clocksource let the tick code know about
     522             :                          * that change.
     523             :                          */
     524             :                         if (cs != curr_clocksource) {
     525             :                                 cs->flags |= CLOCK_SOURCE_RESELECT;
     526             :                                 schedule_work(&watchdog_work);
     527             :                         } else {
     528             :                                 tick_clock_notify();
     529             :                         }
     530             :                 }
     531             :         }
     532             : 
     533             :         /*
     534             :          * We only clear the watchdog_reset_pending, when we did a
     535             :          * full cycle through all clocksources.
     536             :          */
     537             :         if (reset_pending)
     538             :                 atomic_dec(&watchdog_reset_pending);
     539             : 
     540             :         /*
     541             :          * Cycle through CPUs to check if the CPUs stay synchronized
     542             :          * to each other.
     543             :          */
     544             :         next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
     545             :         if (next_cpu >= nr_cpu_ids)
     546             :                 next_cpu = cpumask_first(cpu_online_mask);
     547             : 
     548             :         /*
     549             :          * Arm timer if not already pending: could race with concurrent
     550             :          * pair clocksource_stop_watchdog() clocksource_start_watchdog().
     551             :          */
     552             :         if (!timer_pending(&watchdog_timer)) {
     553             :                 watchdog_timer.expires += WATCHDOG_INTERVAL + extra_wait;
     554             :                 add_timer_on(&watchdog_timer, next_cpu);
     555             :         }
     556             : out:
     557             :         spin_unlock(&watchdog_lock);
     558             : }
     559             : 
     560             : static inline void clocksource_start_watchdog(void)
     561             : {
     562             :         if (watchdog_running || !watchdog || list_empty(&watchdog_list))
     563             :                 return;
     564             :         timer_setup(&watchdog_timer, clocksource_watchdog, 0);
     565             :         watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
     566             :         add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
     567             :         watchdog_running = 1;
     568             : }
     569             : 
     570             : static inline void clocksource_stop_watchdog(void)
     571             : {
     572             :         if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
     573             :                 return;
     574             :         del_timer(&watchdog_timer);
     575             :         watchdog_running = 0;
     576             : }
     577             : 
     578             : static void clocksource_resume_watchdog(void)
     579             : {
     580             :         atomic_inc(&watchdog_reset_pending);
     581             : }
     582             : 
     583             : static void clocksource_enqueue_watchdog(struct clocksource *cs)
     584             : {
     585             :         INIT_LIST_HEAD(&cs->wd_list);
     586             : 
     587             :         if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
     588             :                 /* cs is a clocksource to be watched. */
     589             :                 list_add(&cs->wd_list, &watchdog_list);
     590             :                 cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
     591             :         } else {
     592             :                 /* cs is a watchdog. */
     593             :                 if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
     594             :                         cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
     595             :         }
     596             : }
     597             : 
     598             : static void clocksource_select_watchdog(bool fallback)
     599             : {
     600             :         struct clocksource *cs, *old_wd;
     601             :         unsigned long flags;
     602             : 
     603             :         spin_lock_irqsave(&watchdog_lock, flags);
     604             :         /* save current watchdog */
     605             :         old_wd = watchdog;
     606             :         if (fallback)
     607             :                 watchdog = NULL;
     608             : 
     609             :         list_for_each_entry(cs, &clocksource_list, list) {
     610             :                 /* cs is a clocksource to be watched. */
     611             :                 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
     612             :                         continue;
     613             : 
     614             :                 /* Skip current if we were requested for a fallback. */
     615             :                 if (fallback && cs == old_wd)
     616             :                         continue;
     617             : 
     618             :                 /* Pick the best watchdog. */
     619             :                 if (!watchdog || cs->rating > watchdog->rating)
     620             :                         watchdog = cs;
     621             :         }
     622             :         /* If we failed to find a fallback restore the old one. */
     623             :         if (!watchdog)
     624             :                 watchdog = old_wd;
     625             : 
     626             :         /* If we changed the watchdog we need to reset cycles. */
     627             :         if (watchdog != old_wd)
     628             :                 clocksource_reset_watchdog();
     629             : 
     630             :         /* Check if the watchdog timer needs to be started. */
     631             :         clocksource_start_watchdog();
     632             :         spin_unlock_irqrestore(&watchdog_lock, flags);
     633             : }
     634             : 
     635             : static void clocksource_dequeue_watchdog(struct clocksource *cs)
     636             : {
     637             :         if (cs != watchdog) {
     638             :                 if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
     639             :                         /* cs is a watched clocksource. */
     640             :                         list_del_init(&cs->wd_list);
     641             :                         /* Check if the watchdog timer needs to be stopped. */
     642             :                         clocksource_stop_watchdog();
     643             :                 }
     644             :         }
     645             : }
     646             : 
     647             : static int __clocksource_watchdog_kthread(void)
     648             : {
     649             :         struct clocksource *cs, *tmp;
     650             :         unsigned long flags;
     651             :         int select = 0;
     652             : 
     653             :         /* Do any required per-CPU skew verification. */
     654             :         if (curr_clocksource &&
     655             :             curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
     656             :             curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
     657             :                 clocksource_verify_percpu(curr_clocksource);
     658             : 
     659             :         spin_lock_irqsave(&watchdog_lock, flags);
     660             :         list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
     661             :                 if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
     662             :                         list_del_init(&cs->wd_list);
     663             :                         __clocksource_change_rating(cs, 0);
     664             :                         select = 1;
     665             :                 }
     666             :                 if (cs->flags & CLOCK_SOURCE_RESELECT) {
     667             :                         cs->flags &= ~CLOCK_SOURCE_RESELECT;
     668             :                         select = 1;
     669             :                 }
     670             :         }
     671             :         /* Check if the watchdog timer needs to be stopped. */
     672             :         clocksource_stop_watchdog();
     673             :         spin_unlock_irqrestore(&watchdog_lock, flags);
     674             : 
     675             :         return select;
     676             : }
     677             : 
     678             : static int clocksource_watchdog_kthread(void *data)
     679             : {
     680             :         mutex_lock(&clocksource_mutex);
     681             :         if (__clocksource_watchdog_kthread())
     682             :                 clocksource_select();
     683             :         mutex_unlock(&clocksource_mutex);
     684             :         return 0;
     685             : }
     686             : 
     687             : static bool clocksource_is_watchdog(struct clocksource *cs)
     688             : {
     689             :         return cs == watchdog;
     690             : }
     691             : 
     692             : #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
     693             : 
     694             : static void clocksource_enqueue_watchdog(struct clocksource *cs)
     695             : {
     696           2 :         if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
     697           1 :                 cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
     698             : }
     699             : 
     700             : static void clocksource_select_watchdog(bool fallback) { }
     701             : static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
     702             : static inline void clocksource_resume_watchdog(void) { }
     703             : static inline int __clocksource_watchdog_kthread(void) { return 0; }
     704             : static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
     705           0 : void clocksource_mark_unstable(struct clocksource *cs) { }
     706             : 
     707             : static inline void clocksource_watchdog_lock(unsigned long *flags) { }
     708             : static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
     709             : 
     710             : #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
     711             : 
     712             : static bool clocksource_is_suspend(struct clocksource *cs)
     713             : {
     714           0 :         return cs == suspend_clocksource;
     715             : }
     716             : 
     717           2 : static void __clocksource_suspend_select(struct clocksource *cs)
     718             : {
     719             :         /*
     720             :          * Skip the clocksource which will be stopped in suspend state.
     721             :          */
     722           2 :         if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
     723             :                 return;
     724             : 
     725             :         /*
     726             :          * The nonstop clocksource can be selected as the suspend clocksource to
     727             :          * calculate the suspend time, so it should not supply suspend/resume
     728             :          * interfaces to suspend the nonstop clocksource when system suspends.
     729             :          */
     730           0 :         if (cs->suspend || cs->resume) {
     731           0 :                 pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
     732             :                         cs->name);
     733             :         }
     734             : 
     735             :         /* Pick the best rating. */
     736           0 :         if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
     737           0 :                 suspend_clocksource = cs;
     738             : }
     739             : 
     740             : /**
     741             :  * clocksource_suspend_select - Select the best clocksource for suspend timing
     742             :  * @fallback:   if select a fallback clocksource
     743             :  */
     744           0 : static void clocksource_suspend_select(bool fallback)
     745             : {
     746             :         struct clocksource *cs, *old_suspend;
     747             : 
     748           0 :         old_suspend = suspend_clocksource;
     749           0 :         if (fallback)
     750           0 :                 suspend_clocksource = NULL;
     751             : 
     752           0 :         list_for_each_entry(cs, &clocksource_list, list) {
     753             :                 /* Skip current if we were requested for a fallback. */
     754           0 :                 if (fallback && cs == old_suspend)
     755           0 :                         continue;
     756             : 
     757           0 :                 __clocksource_suspend_select(cs);
     758             :         }
     759           0 : }
     760             : 
     761             : /**
     762             :  * clocksource_start_suspend_timing - Start measuring the suspend timing
     763             :  * @cs:                 current clocksource from timekeeping
     764             :  * @start_cycles:       current cycles from timekeeping
     765             :  *
     766             :  * This function will save the start cycle values of suspend timer to calculate
     767             :  * the suspend time when resuming system.
     768             :  *
     769             :  * This function is called late in the suspend process from timekeeping_suspend(),
     770             :  * that means processes are frozen, non-boot cpus and interrupts are disabled
     771             :  * now. It is therefore possible to start the suspend timer without taking the
     772             :  * clocksource mutex.
     773             :  */
     774           0 : void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
     775             : {
     776           0 :         if (!suspend_clocksource)
     777             :                 return;
     778             : 
     779             :         /*
     780             :          * If current clocksource is the suspend timer, we should use the
     781             :          * tkr_mono.cycle_last value as suspend_start to avoid same reading
     782             :          * from suspend timer.
     783             :          */
     784           0 :         if (clocksource_is_suspend(cs)) {
     785           0 :                 suspend_start = start_cycles;
     786           0 :                 return;
     787             :         }
     788             : 
     789           0 :         if (suspend_clocksource->enable &&
     790           0 :             suspend_clocksource->enable(suspend_clocksource)) {
     791           0 :                 pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
     792             :                 return;
     793             :         }
     794             : 
     795           0 :         suspend_start = suspend_clocksource->read(suspend_clocksource);
     796             : }
     797             : 
     798             : /**
     799             :  * clocksource_stop_suspend_timing - Stop measuring the suspend timing
     800             :  * @cs:         current clocksource from timekeeping
     801             :  * @cycle_now:  current cycles from timekeeping
     802             :  *
     803             :  * This function will calculate the suspend time from suspend timer.
     804             :  *
     805             :  * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
     806             :  *
     807             :  * This function is called early in the resume process from timekeeping_resume(),
     808             :  * that means there is only one cpu, no processes are running and the interrupts
     809             :  * are disabled. It is therefore possible to stop the suspend timer without
     810             :  * taking the clocksource mutex.
     811             :  */
     812           0 : u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
     813             : {
     814           0 :         u64 now, delta, nsec = 0;
     815             : 
     816           0 :         if (!suspend_clocksource)
     817             :                 return 0;
     818             : 
     819             :         /*
     820             :          * If current clocksource is the suspend timer, we should use the
     821             :          * tkr_mono.cycle_last value from timekeeping as current cycle to
     822             :          * avoid same reading from suspend timer.
     823             :          */
     824           0 :         if (clocksource_is_suspend(cs))
     825             :                 now = cycle_now;
     826             :         else
     827           0 :                 now = suspend_clocksource->read(suspend_clocksource);
     828             : 
     829           0 :         if (now > suspend_start) {
     830           0 :                 delta = clocksource_delta(now, suspend_start,
     831           0 :                                           suspend_clocksource->mask);
     832           0 :                 nsec = mul_u64_u32_shr(delta, suspend_clocksource->mult,
     833             :                                        suspend_clocksource->shift);
     834             :         }
     835             : 
     836             :         /*
     837             :          * Disable the suspend timer to save power if current clocksource is
     838             :          * not the suspend timer.
     839             :          */
     840           0 :         if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
     841           0 :                 suspend_clocksource->disable(suspend_clocksource);
     842             : 
     843             :         return nsec;
     844             : }
     845             : 
     846             : /**
     847             :  * clocksource_suspend - suspend the clocksource(s)
     848             :  */
     849           0 : void clocksource_suspend(void)
     850             : {
     851             :         struct clocksource *cs;
     852             : 
     853           0 :         list_for_each_entry_reverse(cs, &clocksource_list, list)
     854           0 :                 if (cs->suspend)
     855           0 :                         cs->suspend(cs);
     856           0 : }
     857             : 
     858             : /**
     859             :  * clocksource_resume - resume the clocksource(s)
     860             :  */
     861           0 : void clocksource_resume(void)
     862             : {
     863             :         struct clocksource *cs;
     864             : 
     865           0 :         list_for_each_entry(cs, &clocksource_list, list)
     866           0 :                 if (cs->resume)
     867           0 :                         cs->resume(cs);
     868             : 
     869             :         clocksource_resume_watchdog();
     870           0 : }
     871             : 
     872             : /**
     873             :  * clocksource_touch_watchdog - Update watchdog
     874             :  *
     875             :  * Update the watchdog after exception contexts such as kgdb so as not
     876             :  * to incorrectly trip the watchdog. This might fail when the kernel
     877             :  * was stopped in code which holds watchdog_lock.
     878             :  */
     879           0 : void clocksource_touch_watchdog(void)
     880             : {
     881             :         clocksource_resume_watchdog();
     882           0 : }
     883             : 
     884             : /**
     885             :  * clocksource_max_adjustment- Returns max adjustment amount
     886             :  * @cs:         Pointer to clocksource
     887             :  *
     888             :  */
     889             : static u32 clocksource_max_adjustment(struct clocksource *cs)
     890             : {
     891             :         u64 ret;
     892             :         /*
     893             :          * We won't try to correct for more than 11% adjustments (110,000 ppm),
     894             :          */
     895           2 :         ret = (u64)cs->mult * 11;
     896           2 :         do_div(ret,100);
     897           2 :         return (u32)ret;
     898             : }
     899             : 
     900             : /**
     901             :  * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
     902             :  * @mult:       cycle to nanosecond multiplier
     903             :  * @shift:      cycle to nanosecond divisor (power of two)
     904             :  * @maxadj:     maximum adjustment value to mult (~11%)
     905             :  * @mask:       bitmask for two's complement subtraction of non 64 bit counters
     906             :  * @max_cyc:    maximum cycle value before potential overflow (does not include
     907             :  *              any safety margin)
     908             :  *
     909             :  * NOTE: This function includes a safety margin of 50%, in other words, we
     910             :  * return half the number of nanoseconds the hardware counter can technically
     911             :  * cover. This is done so that we can potentially detect problems caused by
     912             :  * delayed timers or bad hardware, which might result in time intervals that
     913             :  * are larger than what the math used can handle without overflows.
     914             :  */
     915           0 : u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
     916             : {
     917             :         u64 max_nsecs, max_cycles;
     918             : 
     919             :         /*
     920             :          * Calculate the maximum number of cycles that we can pass to the
     921             :          * cyc2ns() function without overflowing a 64-bit result.
     922             :          */
     923           2 :         max_cycles = ULLONG_MAX;
     924           2 :         do_div(max_cycles, mult+maxadj);
     925             : 
     926             :         /*
     927             :          * The actual maximum number of cycles we can defer the clocksource is
     928             :          * determined by the minimum of max_cycles and mask.
     929             :          * Note: Here we subtract the maxadj to make sure we don't sleep for
     930             :          * too long if there's a large negative adjustment.
     931             :          */
     932           2 :         max_cycles = min(max_cycles, mask);
     933           4 :         max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
     934             : 
     935             :         /* return the max_cycles value as well if requested */
     936           2 :         if (max_cyc)
     937           2 :                 *max_cyc = max_cycles;
     938             : 
     939             :         /* Return 50% of the actual maximum, so we can detect bad values */
     940           2 :         max_nsecs >>= 1;
     941             : 
     942           0 :         return max_nsecs;
     943             : }
     944             : 
     945             : /**
     946             :  * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
     947             :  * @cs:         Pointer to clocksource to be updated
     948             :  *
     949             :  */
     950             : static inline void clocksource_update_max_deferment(struct clocksource *cs)
     951             : {
     952           4 :         cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
     953             :                                                 cs->maxadj, cs->mask,
     954             :                                                 &cs->max_cycles);
     955             : }
     956             : 
     957           3 : static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
     958             : {
     959             :         struct clocksource *cs;
     960             : 
     961           4 :         if (!finished_booting || list_empty(&clocksource_list))
     962             :                 return NULL;
     963             : 
     964             :         /*
     965             :          * We pick the clocksource with the highest rating. If oneshot
     966             :          * mode is active, we pick the highres valid clocksource with
     967             :          * the best rating.
     968             :          */
     969           1 :         list_for_each_entry(cs, &clocksource_list, list) {
     970           1 :                 if (skipcur && cs == curr_clocksource)
     971           0 :                         continue;
     972           1 :                 if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
     973           0 :                         continue;
     974             :                 return cs;
     975             :         }
     976             :         return NULL;
     977             : }
     978             : 
     979           3 : static void __clocksource_select(bool skipcur)
     980             : {
     981           3 :         bool oneshot = tick_oneshot_mode_active();
     982             :         struct clocksource *best, *cs;
     983             : 
     984             :         /* Find the best suitable clocksource */
     985           3 :         best = clocksource_find_best(oneshot, skipcur);
     986           3 :         if (!best)
     987             :                 return;
     988             : 
     989           1 :         if (!strlen(override_name))
     990             :                 goto found;
     991             : 
     992             :         /* Check for the override clocksource. */
     993           0 :         list_for_each_entry(cs, &clocksource_list, list) {
     994           0 :                 if (skipcur && cs == curr_clocksource)
     995           0 :                         continue;
     996           0 :                 if (strcmp(cs->name, override_name) != 0)
     997           0 :                         continue;
     998             :                 /*
     999             :                  * Check to make sure we don't switch to a non-highres
    1000             :                  * capable clocksource if the tick code is in oneshot
    1001             :                  * mode (highres or nohz)
    1002             :                  */
    1003             :                 if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
    1004             :                         /* Override clocksource cannot be used. */
    1005             :                         if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
    1006             :                                 pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
    1007             :                                         cs->name);
    1008             :                                 override_name[0] = 0;
    1009             :                         } else {
    1010             :                                 /*
    1011             :                                  * The override cannot be currently verified.
    1012             :                                  * Deferring to let the watchdog check.
    1013             :                                  */
    1014             :                                 pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
    1015             :                                         cs->name);
    1016             :                         }
    1017             :                 } else
    1018             :                         /* Override clocksource can be used. */
    1019             :                         best = cs;
    1020             :                 break;
    1021             :         }
    1022             : 
    1023             : found:
    1024           1 :         if (curr_clocksource != best && !timekeeping_notify(best)) {
    1025           1 :                 pr_info("Switched to clocksource %s\n", best->name);
    1026           1 :                 curr_clocksource = best;
    1027             :         }
    1028             : }
    1029             : 
    1030             : /**
    1031             :  * clocksource_select - Select the best clocksource available
    1032             :  *
    1033             :  * Private function. Must hold clocksource_mutex when called.
    1034             :  *
    1035             :  * Select the clocksource with the best rating, or the clocksource,
    1036             :  * which is selected by userspace override.
    1037             :  */
    1038             : static void clocksource_select(void)
    1039             : {
    1040           3 :         __clocksource_select(false);
    1041             : }
    1042             : 
    1043             : static void clocksource_select_fallback(void)
    1044             : {
    1045           0 :         __clocksource_select(true);
    1046             : }
    1047             : 
    1048             : /*
    1049             :  * clocksource_done_booting - Called near the end of core bootup
    1050             :  *
    1051             :  * Hack to avoid lots of clocksource churn at boot time.
    1052             :  * We use fs_initcall because we want this to start before
    1053             :  * device_initcall but after subsys_initcall.
    1054             :  */
    1055           1 : static int __init clocksource_done_booting(void)
    1056             : {
    1057           1 :         mutex_lock(&clocksource_mutex);
    1058           1 :         curr_clocksource = clocksource_default_clock();
    1059           1 :         finished_booting = 1;
    1060             :         /*
    1061             :          * Run the watchdog first to eliminate unstable clock sources
    1062             :          */
    1063             :         __clocksource_watchdog_kthread();
    1064             :         clocksource_select();
    1065           1 :         mutex_unlock(&clocksource_mutex);
    1066           1 :         return 0;
    1067             : }
    1068             : fs_initcall(clocksource_done_booting);
    1069             : 
    1070             : /*
    1071             :  * Enqueue the clocksource sorted by rating
    1072             :  */
    1073             : static void clocksource_enqueue(struct clocksource *cs)
    1074             : {
    1075           2 :         struct list_head *entry = &clocksource_list;
    1076             :         struct clocksource *tmp;
    1077             : 
    1078           3 :         list_for_each_entry(tmp, &clocksource_list, list) {
    1079             :                 /* Keep track of the place, where to insert */
    1080           1 :                 if (tmp->rating < cs->rating)
    1081             :                         break;
    1082           1 :                 entry = &tmp->list;
    1083             :         }
    1084           4 :         list_add(&cs->list, entry);
    1085             : }
    1086             : 
    1087             : /**
    1088             :  * __clocksource_update_freq_scale - Used update clocksource with new freq
    1089             :  * @cs:         clocksource to be registered
    1090             :  * @scale:      Scale factor multiplied against freq to get clocksource hz
    1091             :  * @freq:       clocksource frequency (cycles per second) divided by scale
    1092             :  *
    1093             :  * This should only be called from the clocksource->enable() method.
    1094             :  *
    1095             :  * This *SHOULD NOT* be called directly! Please use the
    1096             :  * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
    1097             :  * functions.
    1098             :  */
    1099           2 : void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
    1100             : {
    1101             :         u64 sec;
    1102             : 
    1103             :         /*
    1104             :          * Default clocksources are *special* and self-define their mult/shift.
    1105             :          * But, you're not special, so you should specify a freq value.
    1106             :          */
    1107           2 :         if (freq) {
    1108             :                 /*
    1109             :                  * Calc the maximum number of seconds which we can run before
    1110             :                  * wrapping around. For clocksources which have a mask > 32-bit
    1111             :                  * we need to limit the max sleep time to have a good
    1112             :                  * conversion precision. 10 minutes is still a reasonable
    1113             :                  * amount. That results in a shift value of 24 for a
    1114             :                  * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
    1115             :                  * ~ 0.06ppm granularity for NTP.
    1116             :                  */
    1117           1 :                 sec = cs->mask;
    1118           1 :                 do_div(sec, freq);
    1119           1 :                 do_div(sec, scale);
    1120           1 :                 if (!sec)
    1121             :                         sec = 1;
    1122           1 :                 else if (sec > 600 && cs->mask > UINT_MAX)
    1123           1 :                         sec = 600;
    1124             : 
    1125           2 :                 clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
    1126           1 :                                        NSEC_PER_SEC / scale, sec * scale);
    1127             :         }
    1128             : 
    1129             :         /*
    1130             :          * If the uncertainty margin is not specified, calculate it.
    1131             :          * If both scale and freq are non-zero, calculate the clock
    1132             :          * period, but bound below at 2*WATCHDOG_MAX_SKEW.  However,
    1133             :          * if either of scale or freq is zero, be very conservative and
    1134             :          * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the
    1135             :          * uncertainty margin.  Allow stupidly small uncertainty margins
    1136             :          * to be specified by the caller for testing purposes, but warn
    1137             :          * to discourage production use of this capability.
    1138             :          */
    1139           2 :         if (scale && freq && !cs->uncertainty_margin) {
    1140           1 :                 cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
    1141           1 :                 if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
    1142           1 :                         cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
    1143           1 :         } else if (!cs->uncertainty_margin) {
    1144           0 :                 cs->uncertainty_margin = WATCHDOG_THRESHOLD;
    1145             :         }
    1146           2 :         WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
    1147             : 
    1148             :         /*
    1149             :          * Ensure clocksources that have large 'mult' values don't overflow
    1150             :          * when adjusted.
    1151             :          */
    1152           4 :         cs->maxadj = clocksource_max_adjustment(cs);
    1153           4 :         while (freq && ((cs->mult + cs->maxadj < cs->mult)
    1154           1 :                 || (cs->mult - cs->maxadj > cs->mult))) {
    1155           0 :                 cs->mult >>= 1;
    1156           0 :                 cs->shift--;
    1157           0 :                 cs->maxadj = clocksource_max_adjustment(cs);
    1158             :         }
    1159             : 
    1160             :         /*
    1161             :          * Only warn for *special* clocksources that self-define
    1162             :          * their mult/shift values and don't specify a freq.
    1163             :          */
    1164           2 :         WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
    1165             :                 "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
    1166             :                 cs->name);
    1167             : 
    1168           2 :         clocksource_update_max_deferment(cs);
    1169             : 
    1170           2 :         pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
    1171             :                 cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
    1172           2 : }
    1173             : EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
    1174             : 
    1175             : /**
    1176             :  * __clocksource_register_scale - Used to install new clocksources
    1177             :  * @cs:         clocksource to be registered
    1178             :  * @scale:      Scale factor multiplied against freq to get clocksource hz
    1179             :  * @freq:       clocksource frequency (cycles per second) divided by scale
    1180             :  *
    1181             :  * Returns -EBUSY if registration fails, zero otherwise.
    1182             :  *
    1183             :  * This *SHOULD NOT* be called directly! Please use the
    1184             :  * clocksource_register_hz() or clocksource_register_khz helper functions.
    1185             :  */
    1186           2 : int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
    1187             : {
    1188             :         unsigned long flags;
    1189             : 
    1190           2 :         clocksource_arch_init(cs);
    1191             : 
    1192           2 :         if (WARN_ON_ONCE((unsigned int)cs->id >= CSID_MAX))
    1193           0 :                 cs->id = CSID_GENERIC;
    1194           2 :         if (cs->vdso_clock_mode < 0 ||
    1195           2 :             cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
    1196           0 :                 pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
    1197             :                         cs->name, cs->vdso_clock_mode);
    1198           0 :                 cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
    1199             :         }
    1200             : 
    1201             :         /* Initialize mult/shift and max_idle_ns */
    1202           2 :         __clocksource_update_freq_scale(cs, scale, freq);
    1203             : 
    1204             :         /* Add clocksource to the clocksource list */
    1205           2 :         mutex_lock(&clocksource_mutex);
    1206             : 
    1207           2 :         clocksource_watchdog_lock(&flags);
    1208           2 :         clocksource_enqueue(cs);
    1209           2 :         clocksource_enqueue_watchdog(cs);
    1210           2 :         clocksource_watchdog_unlock(&flags);
    1211             : 
    1212           2 :         clocksource_select();
    1213           2 :         clocksource_select_watchdog(false);
    1214           2 :         __clocksource_suspend_select(cs);
    1215           2 :         mutex_unlock(&clocksource_mutex);
    1216           2 :         return 0;
    1217             : }
    1218             : EXPORT_SYMBOL_GPL(__clocksource_register_scale);
    1219             : 
    1220           0 : static void __clocksource_change_rating(struct clocksource *cs, int rating)
    1221             : {
    1222           0 :         list_del(&cs->list);
    1223           0 :         cs->rating = rating;
    1224           0 :         clocksource_enqueue(cs);
    1225           0 : }
    1226             : 
    1227             : /**
    1228             :  * clocksource_change_rating - Change the rating of a registered clocksource
    1229             :  * @cs:         clocksource to be changed
    1230             :  * @rating:     new rating
    1231             :  */
    1232           0 : void clocksource_change_rating(struct clocksource *cs, int rating)
    1233             : {
    1234             :         unsigned long flags;
    1235             : 
    1236           0 :         mutex_lock(&clocksource_mutex);
    1237           0 :         clocksource_watchdog_lock(&flags);
    1238           0 :         __clocksource_change_rating(cs, rating);
    1239           0 :         clocksource_watchdog_unlock(&flags);
    1240             : 
    1241           0 :         clocksource_select();
    1242           0 :         clocksource_select_watchdog(false);
    1243           0 :         clocksource_suspend_select(false);
    1244           0 :         mutex_unlock(&clocksource_mutex);
    1245           0 : }
    1246             : EXPORT_SYMBOL(clocksource_change_rating);
    1247             : 
    1248             : /*
    1249             :  * Unbind clocksource @cs. Called with clocksource_mutex held
    1250             :  */
    1251           0 : static int clocksource_unbind(struct clocksource *cs)
    1252             : {
    1253             :         unsigned long flags;
    1254             : 
    1255           0 :         if (clocksource_is_watchdog(cs)) {
    1256             :                 /* Select and try to install a replacement watchdog. */
    1257             :                 clocksource_select_watchdog(true);
    1258             :                 if (clocksource_is_watchdog(cs))
    1259             :                         return -EBUSY;
    1260             :         }
    1261             : 
    1262           0 :         if (cs == curr_clocksource) {
    1263             :                 /* Select and try to install a replacement clock source */
    1264             :                 clocksource_select_fallback();
    1265           0 :                 if (curr_clocksource == cs)
    1266             :                         return -EBUSY;
    1267             :         }
    1268             : 
    1269           0 :         if (clocksource_is_suspend(cs)) {
    1270             :                 /*
    1271             :                  * Select and try to install a replacement suspend clocksource.
    1272             :                  * If no replacement suspend clocksource, we will just let the
    1273             :                  * clocksource go and have no suspend clocksource.
    1274             :                  */
    1275           0 :                 clocksource_suspend_select(true);
    1276             :         }
    1277             : 
    1278           0 :         clocksource_watchdog_lock(&flags);
    1279           0 :         clocksource_dequeue_watchdog(cs);
    1280           0 :         list_del_init(&cs->list);
    1281           0 :         clocksource_watchdog_unlock(&flags);
    1282             : 
    1283           0 :         return 0;
    1284             : }
    1285             : 
    1286             : /**
    1287             :  * clocksource_unregister - remove a registered clocksource
    1288             :  * @cs: clocksource to be unregistered
    1289             :  */
    1290           0 : int clocksource_unregister(struct clocksource *cs)
    1291             : {
    1292           0 :         int ret = 0;
    1293             : 
    1294           0 :         mutex_lock(&clocksource_mutex);
    1295           0 :         if (!list_empty(&cs->list))
    1296           0 :                 ret = clocksource_unbind(cs);
    1297           0 :         mutex_unlock(&clocksource_mutex);
    1298           0 :         return ret;
    1299             : }
    1300             : EXPORT_SYMBOL(clocksource_unregister);
    1301             : 
    1302             : #ifdef CONFIG_SYSFS
    1303             : /**
    1304             :  * current_clocksource_show - sysfs interface for current clocksource
    1305             :  * @dev:        unused
    1306             :  * @attr:       unused
    1307             :  * @buf:        char buffer to be filled with clocksource list
    1308             :  *
    1309             :  * Provides sysfs interface for listing current clocksource.
    1310             :  */
    1311           0 : static ssize_t current_clocksource_show(struct device *dev,
    1312             :                                         struct device_attribute *attr,
    1313             :                                         char *buf)
    1314             : {
    1315           0 :         ssize_t count = 0;
    1316             : 
    1317           0 :         mutex_lock(&clocksource_mutex);
    1318           0 :         count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
    1319           0 :         mutex_unlock(&clocksource_mutex);
    1320             : 
    1321           0 :         return count;
    1322             : }
    1323             : 
    1324           0 : ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
    1325             : {
    1326           0 :         size_t ret = cnt;
    1327             : 
    1328             :         /* strings from sysfs write are not 0 terminated! */
    1329           0 :         if (!cnt || cnt >= CS_NAME_LEN)
    1330             :                 return -EINVAL;
    1331             : 
    1332             :         /* strip of \n: */
    1333           0 :         if (buf[cnt-1] == '\n')
    1334           0 :                 cnt--;
    1335           0 :         if (cnt > 0)
    1336           0 :                 memcpy(dst, buf, cnt);
    1337           0 :         dst[cnt] = 0;
    1338           0 :         return ret;
    1339             : }
    1340             : 
    1341             : /**
    1342             :  * current_clocksource_store - interface for manually overriding clocksource
    1343             :  * @dev:        unused
    1344             :  * @attr:       unused
    1345             :  * @buf:        name of override clocksource
    1346             :  * @count:      length of buffer
    1347             :  *
    1348             :  * Takes input from sysfs interface for manually overriding the default
    1349             :  * clocksource selection.
    1350             :  */
    1351           0 : static ssize_t current_clocksource_store(struct device *dev,
    1352             :                                          struct device_attribute *attr,
    1353             :                                          const char *buf, size_t count)
    1354             : {
    1355             :         ssize_t ret;
    1356             : 
    1357           0 :         mutex_lock(&clocksource_mutex);
    1358             : 
    1359           0 :         ret = sysfs_get_uname(buf, override_name, count);
    1360           0 :         if (ret >= 0)
    1361             :                 clocksource_select();
    1362             : 
    1363           0 :         mutex_unlock(&clocksource_mutex);
    1364             : 
    1365           0 :         return ret;
    1366             : }
    1367             : static DEVICE_ATTR_RW(current_clocksource);
    1368             : 
    1369             : /**
    1370             :  * unbind_clocksource_store - interface for manually unbinding clocksource
    1371             :  * @dev:        unused
    1372             :  * @attr:       unused
    1373             :  * @buf:        unused
    1374             :  * @count:      length of buffer
    1375             :  *
    1376             :  * Takes input from sysfs interface for manually unbinding a clocksource.
    1377             :  */
    1378           0 : static ssize_t unbind_clocksource_store(struct device *dev,
    1379             :                                         struct device_attribute *attr,
    1380             :                                         const char *buf, size_t count)
    1381             : {
    1382             :         struct clocksource *cs;
    1383             :         char name[CS_NAME_LEN];
    1384             :         ssize_t ret;
    1385             : 
    1386           0 :         ret = sysfs_get_uname(buf, name, count);
    1387           0 :         if (ret < 0)
    1388             :                 return ret;
    1389             : 
    1390           0 :         ret = -ENODEV;
    1391           0 :         mutex_lock(&clocksource_mutex);
    1392           0 :         list_for_each_entry(cs, &clocksource_list, list) {
    1393           0 :                 if (strcmp(cs->name, name))
    1394           0 :                         continue;
    1395           0 :                 ret = clocksource_unbind(cs);
    1396           0 :                 break;
    1397             :         }
    1398           0 :         mutex_unlock(&clocksource_mutex);
    1399             : 
    1400           0 :         return ret ? ret : count;
    1401             : }
    1402             : static DEVICE_ATTR_WO(unbind_clocksource);
    1403             : 
    1404             : /**
    1405             :  * available_clocksource_show - sysfs interface for listing clocksource
    1406             :  * @dev:        unused
    1407             :  * @attr:       unused
    1408             :  * @buf:        char buffer to be filled with clocksource list
    1409             :  *
    1410             :  * Provides sysfs interface for listing registered clocksources
    1411             :  */
    1412           0 : static ssize_t available_clocksource_show(struct device *dev,
    1413             :                                           struct device_attribute *attr,
    1414             :                                           char *buf)
    1415             : {
    1416             :         struct clocksource *src;
    1417           0 :         ssize_t count = 0;
    1418             : 
    1419           0 :         mutex_lock(&clocksource_mutex);
    1420           0 :         list_for_each_entry(src, &clocksource_list, list) {
    1421             :                 /*
    1422             :                  * Don't show non-HRES clocksource if the tick code is
    1423             :                  * in one shot mode (highres=on or nohz=on)
    1424             :                  */
    1425             :                 if (!tick_oneshot_mode_active() ||
    1426             :                     (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
    1427           0 :                         count += snprintf(buf + count,
    1428           0 :                                   max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
    1429             :                                   "%s ", src->name);
    1430             :         }
    1431           0 :         mutex_unlock(&clocksource_mutex);
    1432             : 
    1433           0 :         count += snprintf(buf + count,
    1434           0 :                           max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
    1435             : 
    1436           0 :         return count;
    1437             : }
    1438             : static DEVICE_ATTR_RO(available_clocksource);
    1439             : 
    1440             : static struct attribute *clocksource_attrs[] = {
    1441             :         &dev_attr_current_clocksource.attr,
    1442             :         &dev_attr_unbind_clocksource.attr,
    1443             :         &dev_attr_available_clocksource.attr,
    1444             :         NULL
    1445             : };
    1446             : ATTRIBUTE_GROUPS(clocksource);
    1447             : 
    1448             : static struct bus_type clocksource_subsys = {
    1449             :         .name = "clocksource",
    1450             :         .dev_name = "clocksource",
    1451             : };
    1452             : 
    1453             : static struct device device_clocksource = {
    1454             :         .id     = 0,
    1455             :         .bus    = &clocksource_subsys,
    1456             :         .groups = clocksource_groups,
    1457             : };
    1458             : 
    1459           1 : static int __init init_clocksource_sysfs(void)
    1460             : {
    1461           1 :         int error = subsys_system_register(&clocksource_subsys, NULL);
    1462             : 
    1463           1 :         if (!error)
    1464           1 :                 error = device_register(&device_clocksource);
    1465             : 
    1466           1 :         return error;
    1467             : }
    1468             : 
    1469             : device_initcall(init_clocksource_sysfs);
    1470             : #endif /* CONFIG_SYSFS */
    1471             : 
    1472             : /**
    1473             :  * boot_override_clocksource - boot clock override
    1474             :  * @str:        override name
    1475             :  *
    1476             :  * Takes a clocksource= boot argument and uses it
    1477             :  * as the clocksource override name.
    1478             :  */
    1479           0 : static int __init boot_override_clocksource(char* str)
    1480             : {
    1481           0 :         mutex_lock(&clocksource_mutex);
    1482           0 :         if (str)
    1483           0 :                 strlcpy(override_name, str, sizeof(override_name));
    1484           0 :         mutex_unlock(&clocksource_mutex);
    1485           0 :         return 1;
    1486             : }
    1487             : 
    1488             : __setup("clocksource=", boot_override_clocksource);
    1489             : 
    1490             : /**
    1491             :  * boot_override_clock - Compatibility layer for deprecated boot option
    1492             :  * @str:        override name
    1493             :  *
    1494             :  * DEPRECATED! Takes a clock= boot argument and uses it
    1495             :  * as the clocksource override name
    1496             :  */
    1497           0 : static int __init boot_override_clock(char* str)
    1498             : {
    1499           0 :         if (!strcmp(str, "pmtmr")) {
    1500           0 :                 pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
    1501           0 :                 return boot_override_clocksource("acpi_pm");
    1502             :         }
    1503           0 :         pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
    1504           0 :         return boot_override_clocksource(str);
    1505             : }
    1506             : 
    1507             : __setup("clock=", boot_override_clock);

Generated by: LCOV version 1.14