Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
4 : * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
5 : * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
6 : *
7 : * High-resolution kernel timers
8 : *
9 : * In contrast to the low-resolution timeout API, aka timer wheel,
10 : * hrtimers provide finer resolution and accuracy depending on system
11 : * configuration and capabilities.
12 : *
13 : * Started by: Thomas Gleixner and Ingo Molnar
14 : *
15 : * Credits:
16 : * Based on the original timer wheel code
17 : *
18 : * Help, testing, suggestions, bugfixes, improvements were
19 : * provided by:
20 : *
21 : * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
22 : * et. al.
23 : */
24 :
25 : #include <linux/cpu.h>
26 : #include <linux/export.h>
27 : #include <linux/percpu.h>
28 : #include <linux/hrtimer.h>
29 : #include <linux/notifier.h>
30 : #include <linux/syscalls.h>
31 : #include <linux/interrupt.h>
32 : #include <linux/tick.h>
33 : #include <linux/err.h>
34 : #include <linux/debugobjects.h>
35 : #include <linux/sched/signal.h>
36 : #include <linux/sched/sysctl.h>
37 : #include <linux/sched/rt.h>
38 : #include <linux/sched/deadline.h>
39 : #include <linux/sched/nohz.h>
40 : #include <linux/sched/debug.h>
41 : #include <linux/timer.h>
42 : #include <linux/freezer.h>
43 : #include <linux/compat.h>
44 :
45 : #include <linux/uaccess.h>
46 :
47 : #include <trace/events/timer.h>
48 :
49 : #include "tick-internal.h"
50 :
51 : /*
52 : * Masks for selecting the soft and hard context timers from
53 : * cpu_base->active
54 : */
55 : #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT)
56 : #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1)
57 : #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
58 : #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
59 :
60 : /*
61 : * The timer bases:
62 : *
63 : * There are more clockids than hrtimer bases. Thus, we index
64 : * into the timer bases by the hrtimer_base_type enum. When trying
65 : * to reach a base using a clockid, hrtimer_clockid_to_base()
66 : * is used to convert from clockid to the proper hrtimer_base_type.
67 : */
68 : DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
69 : {
70 : .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
71 : .clock_base =
72 : {
73 : {
74 : .index = HRTIMER_BASE_MONOTONIC,
75 : .clockid = CLOCK_MONOTONIC,
76 : .get_time = &ktime_get,
77 : },
78 : {
79 : .index = HRTIMER_BASE_REALTIME,
80 : .clockid = CLOCK_REALTIME,
81 : .get_time = &ktime_get_real,
82 : },
83 : {
84 : .index = HRTIMER_BASE_BOOTTIME,
85 : .clockid = CLOCK_BOOTTIME,
86 : .get_time = &ktime_get_boottime,
87 : },
88 : {
89 : .index = HRTIMER_BASE_TAI,
90 : .clockid = CLOCK_TAI,
91 : .get_time = &ktime_get_clocktai,
92 : },
93 : {
94 : .index = HRTIMER_BASE_MONOTONIC_SOFT,
95 : .clockid = CLOCK_MONOTONIC,
96 : .get_time = &ktime_get,
97 : },
98 : {
99 : .index = HRTIMER_BASE_REALTIME_SOFT,
100 : .clockid = CLOCK_REALTIME,
101 : .get_time = &ktime_get_real,
102 : },
103 : {
104 : .index = HRTIMER_BASE_BOOTTIME_SOFT,
105 : .clockid = CLOCK_BOOTTIME,
106 : .get_time = &ktime_get_boottime,
107 : },
108 : {
109 : .index = HRTIMER_BASE_TAI_SOFT,
110 : .clockid = CLOCK_TAI,
111 : .get_time = &ktime_get_clocktai,
112 : },
113 : }
114 : };
115 :
116 : static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
117 : /* Make sure we catch unsupported clockids */
118 : [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES,
119 :
120 : [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
121 : [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
122 : [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
123 : [CLOCK_TAI] = HRTIMER_BASE_TAI,
124 : };
125 :
126 : /*
127 : * Functions and macros which are different for UP/SMP systems are kept in a
128 : * single place
129 : */
130 : #ifdef CONFIG_SMP
131 :
132 : /*
133 : * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
134 : * such that hrtimer_callback_running() can unconditionally dereference
135 : * timer->base->cpu_base
136 : */
137 : static struct hrtimer_cpu_base migration_cpu_base = {
138 : .clock_base = { {
139 : .cpu_base = &migration_cpu_base,
140 : .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq,
141 : &migration_cpu_base.lock),
142 : }, },
143 : };
144 :
145 : #define migration_base migration_cpu_base.clock_base[0]
146 :
147 : static inline bool is_migration_base(struct hrtimer_clock_base *base)
148 : {
149 : return base == &migration_base;
150 : }
151 :
152 : /*
153 : * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
154 : * means that all timers which are tied to this base via timer->base are
155 : * locked, and the base itself is locked too.
156 : *
157 : * So __run_timers/migrate_timers can safely modify all timers which could
158 : * be found on the lists/queues.
159 : *
160 : * When the timer's base is locked, and the timer removed from list, it is
161 : * possible to set timer->base = &migration_base and drop the lock: the timer
162 : * remains locked.
163 : */
164 : static
165 : struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
166 : unsigned long *flags)
167 : {
168 : struct hrtimer_clock_base *base;
169 :
170 : for (;;) {
171 : base = READ_ONCE(timer->base);
172 : if (likely(base != &migration_base)) {
173 : raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
174 : if (likely(base == timer->base))
175 : return base;
176 : /* The timer has migrated to another CPU: */
177 : raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags);
178 : }
179 : cpu_relax();
180 : }
181 : }
182 :
183 : /*
184 : * We do not migrate the timer when it is expiring before the next
185 : * event on the target cpu. When high resolution is enabled, we cannot
186 : * reprogram the target cpu hardware and we would cause it to fire
187 : * late. To keep it simple, we handle the high resolution enabled and
188 : * disabled case similar.
189 : *
190 : * Called with cpu_base->lock of target cpu held.
191 : */
192 : static int
193 : hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
194 : {
195 : ktime_t expires;
196 :
197 : expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
198 : return expires < new_base->cpu_base->expires_next;
199 : }
200 :
201 : static inline
202 : struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
203 : int pinned)
204 : {
205 : #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
206 : if (static_branch_likely(&timers_migration_enabled) && !pinned)
207 : return &per_cpu(hrtimer_bases, get_nohz_timer_target());
208 : #endif
209 : return base;
210 : }
211 :
212 : /*
213 : * We switch the timer base to a power-optimized selected CPU target,
214 : * if:
215 : * - NO_HZ_COMMON is enabled
216 : * - timer migration is enabled
217 : * - the timer callback is not running
218 : * - the timer is not the first expiring timer on the new target
219 : *
220 : * If one of the above requirements is not fulfilled we move the timer
221 : * to the current CPU or leave it on the previously assigned CPU if
222 : * the timer callback is currently running.
223 : */
224 : static inline struct hrtimer_clock_base *
225 : switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
226 : int pinned)
227 : {
228 : struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base;
229 : struct hrtimer_clock_base *new_base;
230 : int basenum = base->index;
231 :
232 : this_cpu_base = this_cpu_ptr(&hrtimer_bases);
233 : new_cpu_base = get_target_base(this_cpu_base, pinned);
234 : again:
235 : new_base = &new_cpu_base->clock_base[basenum];
236 :
237 : if (base != new_base) {
238 : /*
239 : * We are trying to move timer to new_base.
240 : * However we can't change timer's base while it is running,
241 : * so we keep it on the same CPU. No hassle vs. reprogramming
242 : * the event source in the high resolution case. The softirq
243 : * code will take care of this when the timer function has
244 : * completed. There is no conflict as we hold the lock until
245 : * the timer is enqueued.
246 : */
247 : if (unlikely(hrtimer_callback_running(timer)))
248 : return base;
249 :
250 : /* See the comment in lock_hrtimer_base() */
251 : WRITE_ONCE(timer->base, &migration_base);
252 : raw_spin_unlock(&base->cpu_base->lock);
253 : raw_spin_lock(&new_base->cpu_base->lock);
254 :
255 : if (new_cpu_base != this_cpu_base &&
256 : hrtimer_check_target(timer, new_base)) {
257 : raw_spin_unlock(&new_base->cpu_base->lock);
258 : raw_spin_lock(&base->cpu_base->lock);
259 : new_cpu_base = this_cpu_base;
260 : WRITE_ONCE(timer->base, base);
261 : goto again;
262 : }
263 : WRITE_ONCE(timer->base, new_base);
264 : } else {
265 : if (new_cpu_base != this_cpu_base &&
266 : hrtimer_check_target(timer, new_base)) {
267 : new_cpu_base = this_cpu_base;
268 : goto again;
269 : }
270 : }
271 : return new_base;
272 : }
273 :
274 : #else /* CONFIG_SMP */
275 :
276 : static inline bool is_migration_base(struct hrtimer_clock_base *base)
277 : {
278 : return false;
279 : }
280 :
281 : static inline struct hrtimer_clock_base *
282 : lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
283 : {
284 0 : struct hrtimer_clock_base *base = timer->base;
285 :
286 0 : raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
287 :
288 : return base;
289 : }
290 :
291 : # define switch_hrtimer_base(t, b, p) (b)
292 :
293 : #endif /* !CONFIG_SMP */
294 :
295 : /*
296 : * Functions for the union type storage format of ktime_t which are
297 : * too large for inlining:
298 : */
299 : #if BITS_PER_LONG < 64
300 : /*
301 : * Divide a ktime value by a nanosecond value
302 : */
303 : s64 __ktime_divns(const ktime_t kt, s64 div)
304 : {
305 : int sft = 0;
306 : s64 dclc;
307 : u64 tmp;
308 :
309 : dclc = ktime_to_ns(kt);
310 : tmp = dclc < 0 ? -dclc : dclc;
311 :
312 : /* Make sure the divisor is less than 2^32: */
313 : while (div >> 32) {
314 : sft++;
315 : div >>= 1;
316 : }
317 : tmp >>= sft;
318 : do_div(tmp, (u32) div);
319 : return dclc < 0 ? -tmp : tmp;
320 : }
321 : EXPORT_SYMBOL_GPL(__ktime_divns);
322 : #endif /* BITS_PER_LONG >= 64 */
323 :
324 : /*
325 : * Add two ktime values and do a safety check for overflow:
326 : */
327 0 : ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
328 : {
329 0 : ktime_t res = ktime_add_unsafe(lhs, rhs);
330 :
331 : /*
332 : * We use KTIME_SEC_MAX here, the maximum timeout which we can
333 : * return to user space in a timespec:
334 : */
335 0 : if (res < 0 || res < lhs || res < rhs)
336 0 : res = ktime_set(KTIME_SEC_MAX, 0);
337 :
338 0 : return res;
339 : }
340 :
341 : EXPORT_SYMBOL_GPL(ktime_add_safe);
342 :
343 : #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
344 :
345 : static const struct debug_obj_descr hrtimer_debug_descr;
346 :
347 : static void *hrtimer_debug_hint(void *addr)
348 : {
349 : return ((struct hrtimer *) addr)->function;
350 : }
351 :
352 : /*
353 : * fixup_init is called when:
354 : * - an active object is initialized
355 : */
356 : static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state)
357 : {
358 : struct hrtimer *timer = addr;
359 :
360 : switch (state) {
361 : case ODEBUG_STATE_ACTIVE:
362 : hrtimer_cancel(timer);
363 : debug_object_init(timer, &hrtimer_debug_descr);
364 : return true;
365 : default:
366 : return false;
367 : }
368 : }
369 :
370 : /*
371 : * fixup_activate is called when:
372 : * - an active object is activated
373 : * - an unknown non-static object is activated
374 : */
375 : static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state)
376 : {
377 : switch (state) {
378 : case ODEBUG_STATE_ACTIVE:
379 : WARN_ON(1);
380 : fallthrough;
381 : default:
382 : return false;
383 : }
384 : }
385 :
386 : /*
387 : * fixup_free is called when:
388 : * - an active object is freed
389 : */
390 : static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
391 : {
392 : struct hrtimer *timer = addr;
393 :
394 : switch (state) {
395 : case ODEBUG_STATE_ACTIVE:
396 : hrtimer_cancel(timer);
397 : debug_object_free(timer, &hrtimer_debug_descr);
398 : return true;
399 : default:
400 : return false;
401 : }
402 : }
403 :
404 : static const struct debug_obj_descr hrtimer_debug_descr = {
405 : .name = "hrtimer",
406 : .debug_hint = hrtimer_debug_hint,
407 : .fixup_init = hrtimer_fixup_init,
408 : .fixup_activate = hrtimer_fixup_activate,
409 : .fixup_free = hrtimer_fixup_free,
410 : };
411 :
412 : static inline void debug_hrtimer_init(struct hrtimer *timer)
413 : {
414 : debug_object_init(timer, &hrtimer_debug_descr);
415 : }
416 :
417 : static inline void debug_hrtimer_activate(struct hrtimer *timer,
418 : enum hrtimer_mode mode)
419 : {
420 : debug_object_activate(timer, &hrtimer_debug_descr);
421 : }
422 :
423 : static inline void debug_hrtimer_deactivate(struct hrtimer *timer)
424 : {
425 : debug_object_deactivate(timer, &hrtimer_debug_descr);
426 : }
427 :
428 : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
429 : enum hrtimer_mode mode);
430 :
431 : void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
432 : enum hrtimer_mode mode)
433 : {
434 : debug_object_init_on_stack(timer, &hrtimer_debug_descr);
435 : __hrtimer_init(timer, clock_id, mode);
436 : }
437 : EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
438 :
439 : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
440 : clockid_t clock_id, enum hrtimer_mode mode);
441 :
442 : void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
443 : clockid_t clock_id, enum hrtimer_mode mode)
444 : {
445 : debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
446 : __hrtimer_init_sleeper(sl, clock_id, mode);
447 : }
448 : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
449 :
450 : void destroy_hrtimer_on_stack(struct hrtimer *timer)
451 : {
452 : debug_object_free(timer, &hrtimer_debug_descr);
453 : }
454 : EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack);
455 :
456 : #else
457 :
458 : static inline void debug_hrtimer_init(struct hrtimer *timer) { }
459 : static inline void debug_hrtimer_activate(struct hrtimer *timer,
460 : enum hrtimer_mode mode) { }
461 : static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
462 : #endif
463 :
464 : static inline void
465 : debug_init(struct hrtimer *timer, clockid_t clockid,
466 : enum hrtimer_mode mode)
467 : {
468 1729 : debug_hrtimer_init(timer);
469 1729 : trace_hrtimer_init(timer, clockid, mode);
470 : }
471 :
472 : static inline void debug_activate(struct hrtimer *timer,
473 : enum hrtimer_mode mode)
474 : {
475 0 : debug_hrtimer_activate(timer, mode);
476 0 : trace_hrtimer_start(timer, mode);
477 : }
478 :
479 : static inline void debug_deactivate(struct hrtimer *timer)
480 : {
481 0 : debug_hrtimer_deactivate(timer);
482 0 : trace_hrtimer_cancel(timer);
483 : }
484 :
485 : static struct hrtimer_clock_base *
486 : __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
487 : {
488 : unsigned int idx;
489 :
490 2943 : if (!*active)
491 : return NULL;
492 :
493 0 : idx = __ffs(*active);
494 0 : *active &= ~(1U << idx);
495 :
496 0 : return &cpu_base->clock_base[idx];
497 : }
498 :
499 : #define for_each_active_base(base, cpu_base, active) \
500 : while ((base = __next_base((cpu_base), &(active))))
501 :
502 0 : static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
503 : const struct hrtimer *exclude,
504 : unsigned int active,
505 : ktime_t expires_next)
506 : {
507 : struct hrtimer_clock_base *base;
508 : ktime_t expires;
509 :
510 0 : for_each_active_base(base, cpu_base, active) {
511 : struct timerqueue_node *next;
512 : struct hrtimer *timer;
513 :
514 0 : next = timerqueue_getnext(&base->active);
515 0 : timer = container_of(next, struct hrtimer, node);
516 0 : if (timer == exclude) {
517 : /* Get to the next timer in the queue. */
518 0 : next = timerqueue_iterate_next(next);
519 0 : if (!next)
520 0 : continue;
521 :
522 : timer = container_of(next, struct hrtimer, node);
523 : }
524 0 : expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
525 0 : if (expires < expires_next) {
526 0 : expires_next = expires;
527 :
528 : /* Skip cpu_base update if a timer is being excluded. */
529 0 : if (exclude)
530 0 : continue;
531 :
532 0 : if (timer->is_soft)
533 0 : cpu_base->softirq_next_timer = timer;
534 : else
535 0 : cpu_base->next_timer = timer;
536 : }
537 : }
538 : /*
539 : * clock_was_set() might have changed base->offset of any of
540 : * the clock bases so the result might be negative. Fix it up
541 : * to prevent a false positive in clockevents_program_event().
542 : */
543 0 : if (expires_next < 0)
544 0 : expires_next = 0;
545 0 : return expires_next;
546 : }
547 :
548 : /*
549 : * Recomputes cpu_base::*next_timer and returns the earliest expires_next
550 : * but does not set cpu_base::*expires_next, that is done by
551 : * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
552 : * cpu_base::*expires_next right away, reprogramming logic would no longer
553 : * work.
554 : *
555 : * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
556 : * those timers will get run whenever the softirq gets handled, at the end of
557 : * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases.
558 : *
559 : * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases.
560 : * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual
561 : * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD.
562 : *
563 : * @active_mask must be one of:
564 : * - HRTIMER_ACTIVE_ALL,
565 : * - HRTIMER_ACTIVE_SOFT, or
566 : * - HRTIMER_ACTIVE_HARD.
567 : */
568 : static ktime_t
569 0 : __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask)
570 : {
571 : unsigned int active;
572 0 : struct hrtimer *next_timer = NULL;
573 0 : ktime_t expires_next = KTIME_MAX;
574 :
575 0 : if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
576 0 : active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
577 0 : cpu_base->softirq_next_timer = NULL;
578 0 : expires_next = __hrtimer_next_event_base(cpu_base, NULL,
579 : active, KTIME_MAX);
580 :
581 0 : next_timer = cpu_base->softirq_next_timer;
582 : }
583 :
584 0 : if (active_mask & HRTIMER_ACTIVE_HARD) {
585 0 : active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
586 0 : cpu_base->next_timer = next_timer;
587 0 : expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
588 : expires_next);
589 : }
590 :
591 0 : return expires_next;
592 : }
593 :
594 0 : static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
595 : {
596 0 : ktime_t expires_next, soft = KTIME_MAX;
597 :
598 : /*
599 : * If the soft interrupt has already been activated, ignore the
600 : * soft bases. They will be handled in the already raised soft
601 : * interrupt.
602 : */
603 0 : if (!cpu_base->softirq_activated) {
604 0 : soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
605 : /*
606 : * Update the soft expiry time. clock_settime() might have
607 : * affected it.
608 : */
609 0 : cpu_base->softirq_expires_next = soft;
610 : }
611 :
612 0 : expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
613 : /*
614 : * If a softirq timer is expiring first, update cpu_base->next_timer
615 : * and program the hardware with the soft expiry time.
616 : */
617 0 : if (expires_next > soft) {
618 0 : cpu_base->next_timer = cpu_base->softirq_next_timer;
619 0 : expires_next = soft;
620 : }
621 :
622 0 : return expires_next;
623 : }
624 :
625 : static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
626 : {
627 2943 : ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
628 2943 : ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
629 2943 : ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
630 :
631 2943 : ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
632 : offs_real, offs_boot, offs_tai);
633 :
634 2943 : base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
635 2943 : base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
636 2943 : base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
637 :
638 : return now;
639 : }
640 :
641 : /*
642 : * Is the high resolution mode active ?
643 : */
644 : static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
645 : {
646 : return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ?
647 : cpu_base->hres_active : 0;
648 : }
649 :
650 : static inline int hrtimer_hres_active(void)
651 : {
652 : return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases));
653 : }
654 :
655 : static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base,
656 : struct hrtimer *next_timer,
657 : ktime_t expires_next)
658 : {
659 0 : cpu_base->expires_next = expires_next;
660 :
661 : /*
662 : * If hres is not active, hardware does not have to be
663 : * reprogrammed yet.
664 : *
665 : * If a hang was detected in the last timer interrupt then we
666 : * leave the hang delay active in the hardware. We want the
667 : * system to make progress. That also prevents the following
668 : * scenario:
669 : * T1 expires 50ms from now
670 : * T2 expires 5s from now
671 : *
672 : * T1 is removed, so this code is called and would reprogram
673 : * the hardware to 5s from now. Any hrtimer_start after that
674 : * will not reprogram the hardware due to hang_detected being
675 : * set. So we'd effectively block all timers until the T2 event
676 : * fires.
677 : */
678 0 : if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected)
679 : return;
680 :
681 : tick_program_event(expires_next, 1);
682 : }
683 :
684 : /*
685 : * Reprogram the event source with checking both queues for the
686 : * next event
687 : * Called with interrupts disabled and base->lock held
688 : */
689 : static void
690 : hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
691 : {
692 : ktime_t expires_next;
693 :
694 0 : expires_next = hrtimer_update_next_event(cpu_base);
695 :
696 0 : if (skip_equal && expires_next == cpu_base->expires_next)
697 : return;
698 :
699 0 : __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next);
700 : }
701 :
702 : /* High resolution timer related functions */
703 : #ifdef CONFIG_HIGH_RES_TIMERS
704 :
705 : /*
706 : * High resolution timer enabled ?
707 : */
708 : static bool hrtimer_hres_enabled __read_mostly = true;
709 : unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC;
710 : EXPORT_SYMBOL_GPL(hrtimer_resolution);
711 :
712 : /*
713 : * Enable / Disable high resolution mode
714 : */
715 : static int __init setup_hrtimer_hres(char *str)
716 : {
717 : return (kstrtobool(str, &hrtimer_hres_enabled) == 0);
718 : }
719 :
720 : __setup("highres=", setup_hrtimer_hres);
721 :
722 : /*
723 : * hrtimer_high_res_enabled - query, if the highres mode is enabled
724 : */
725 : static inline int hrtimer_is_hres_enabled(void)
726 : {
727 : return hrtimer_hres_enabled;
728 : }
729 :
730 : static void retrigger_next_event(void *arg);
731 :
732 : /*
733 : * Switch to high resolution mode
734 : */
735 : static void hrtimer_switch_to_hres(void)
736 : {
737 : struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
738 :
739 : if (tick_init_highres()) {
740 : pr_warn("Could not switch to high resolution mode on CPU %u\n",
741 : base->cpu);
742 : return;
743 : }
744 : base->hres_active = 1;
745 : hrtimer_resolution = HIGH_RES_NSEC;
746 :
747 : tick_setup_sched_timer();
748 : /* "Retrigger" the interrupt to get things going */
749 : retrigger_next_event(NULL);
750 : }
751 :
752 : #else
753 :
754 : static inline int hrtimer_is_hres_enabled(void) { return 0; }
755 : static inline void hrtimer_switch_to_hres(void) { }
756 :
757 : #endif /* CONFIG_HIGH_RES_TIMERS */
758 : /*
759 : * Retrigger next event is called after clock was set with interrupts
760 : * disabled through an SMP function call or directly from low level
761 : * resume code.
762 : *
763 : * This is only invoked when:
764 : * - CONFIG_HIGH_RES_TIMERS is enabled.
765 : * - CONFIG_NOHZ_COMMON is enabled
766 : *
767 : * For the other cases this function is empty and because the call sites
768 : * are optimized out it vanishes as well, i.e. no need for lots of
769 : * #ifdeffery.
770 : */
771 : static void retrigger_next_event(void *arg)
772 : {
773 0 : struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases);
774 :
775 : /*
776 : * When high resolution mode or nohz is active, then the offsets of
777 : * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the
778 : * next tick will take care of that.
779 : *
780 : * If high resolution mode is active then the next expiring timer
781 : * must be reevaluated and the clock event device reprogrammed if
782 : * necessary.
783 : *
784 : * In the NOHZ case the update of the offset and the reevaluation
785 : * of the next expiring timer is enough. The return from the SMP
786 : * function call will take care of the reprogramming in case the
787 : * CPU was in a NOHZ idle sleep.
788 : */
789 0 : if (!__hrtimer_hres_active(base) && !tick_nohz_active)
790 : return;
791 :
792 : raw_spin_lock(&base->lock);
793 : hrtimer_update_base(base);
794 : if (__hrtimer_hres_active(base))
795 : hrtimer_force_reprogram(base, 0);
796 : else
797 : hrtimer_update_next_event(base);
798 : raw_spin_unlock(&base->lock);
799 : }
800 :
801 : /*
802 : * When a timer is enqueued and expires earlier than the already enqueued
803 : * timers, we have to check, whether it expires earlier than the timer for
804 : * which the clock event device was armed.
805 : *
806 : * Called with interrupts disabled and base->cpu_base.lock held
807 : */
808 0 : static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram)
809 : {
810 0 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
811 0 : struct hrtimer_clock_base *base = timer->base;
812 0 : ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
813 :
814 0 : WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0);
815 :
816 : /*
817 : * CLOCK_REALTIME timer might be requested with an absolute
818 : * expiry time which is less than base->offset. Set it to 0.
819 : */
820 0 : if (expires < 0)
821 0 : expires = 0;
822 :
823 0 : if (timer->is_soft) {
824 : /*
825 : * soft hrtimer could be started on a remote CPU. In this
826 : * case softirq_expires_next needs to be updated on the
827 : * remote CPU. The soft hrtimer will not expire before the
828 : * first hard hrtimer on the remote CPU -
829 : * hrtimer_check_target() prevents this case.
830 : */
831 0 : struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base;
832 :
833 0 : if (timer_cpu_base->softirq_activated)
834 : return;
835 :
836 0 : if (!ktime_before(expires, timer_cpu_base->softirq_expires_next))
837 : return;
838 :
839 0 : timer_cpu_base->softirq_next_timer = timer;
840 0 : timer_cpu_base->softirq_expires_next = expires;
841 :
842 0 : if (!ktime_before(expires, timer_cpu_base->expires_next) ||
843 : !reprogram)
844 : return;
845 : }
846 :
847 : /*
848 : * If the timer is not on the current cpu, we cannot reprogram
849 : * the other cpus clock event device.
850 : */
851 0 : if (base->cpu_base != cpu_base)
852 : return;
853 :
854 0 : if (expires >= cpu_base->expires_next)
855 : return;
856 :
857 : /*
858 : * If the hrtimer interrupt is running, then it will reevaluate the
859 : * clock bases and reprogram the clock event device.
860 : */
861 0 : if (cpu_base->in_hrtirq)
862 : return;
863 :
864 0 : cpu_base->next_timer = timer;
865 :
866 0 : __hrtimer_reprogram(cpu_base, timer, expires);
867 : }
868 :
869 : static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base,
870 : unsigned int active)
871 : {
872 : struct hrtimer_clock_base *base;
873 : unsigned int seq;
874 : ktime_t expires;
875 :
876 : /*
877 : * Update the base offsets unconditionally so the following
878 : * checks whether the SMP function call is required works.
879 : *
880 : * The update is safe even when the remote CPU is in the hrtimer
881 : * interrupt or the hrtimer soft interrupt and expiring affected
882 : * bases. Either it will see the update before handling a base or
883 : * it will see it when it finishes the processing and reevaluates
884 : * the next expiring timer.
885 : */
886 : seq = cpu_base->clock_was_set_seq;
887 : hrtimer_update_base(cpu_base);
888 :
889 : /*
890 : * If the sequence did not change over the update then the
891 : * remote CPU already handled it.
892 : */
893 : if (seq == cpu_base->clock_was_set_seq)
894 : return false;
895 :
896 : /*
897 : * If the remote CPU is currently handling an hrtimer interrupt, it
898 : * will reevaluate the first expiring timer of all clock bases
899 : * before reprogramming. Nothing to do here.
900 : */
901 : if (cpu_base->in_hrtirq)
902 : return false;
903 :
904 : /*
905 : * Walk the affected clock bases and check whether the first expiring
906 : * timer in a clock base is moving ahead of the first expiring timer of
907 : * @cpu_base. If so, the IPI must be invoked because per CPU clock
908 : * event devices cannot be remotely reprogrammed.
909 : */
910 : active &= cpu_base->active_bases;
911 :
912 : for_each_active_base(base, cpu_base, active) {
913 : struct timerqueue_node *next;
914 :
915 : next = timerqueue_getnext(&base->active);
916 : expires = ktime_sub(next->expires, base->offset);
917 : if (expires < cpu_base->expires_next)
918 : return true;
919 :
920 : /* Extra check for softirq clock bases */
921 : if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT)
922 : continue;
923 : if (cpu_base->softirq_activated)
924 : continue;
925 : if (expires < cpu_base->softirq_expires_next)
926 : return true;
927 : }
928 : return false;
929 : }
930 :
931 : /*
932 : * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and
933 : * CLOCK_BOOTTIME (for late sleep time injection).
934 : *
935 : * This requires to update the offsets for these clocks
936 : * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this
937 : * also requires to eventually reprogram the per CPU clock event devices
938 : * when the change moves an affected timer ahead of the first expiring
939 : * timer on that CPU. Obviously remote per CPU clock event devices cannot
940 : * be reprogrammed. The other reason why an IPI has to be sent is when the
941 : * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets
942 : * in the tick, which obviously might be stopped, so this has to bring out
943 : * the remote CPU which might sleep in idle to get this sorted.
944 : */
945 0 : void clock_was_set(unsigned int bases)
946 : {
947 0 : struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases);
948 : cpumask_var_t mask;
949 : int cpu;
950 :
951 0 : if (!__hrtimer_hres_active(cpu_base) && !tick_nohz_active)
952 : goto out_timerfd;
953 :
954 : if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
955 : on_each_cpu(retrigger_next_event, NULL, 1);
956 : goto out_timerfd;
957 : }
958 :
959 : /* Avoid interrupting CPUs if possible */
960 : cpus_read_lock();
961 : for_each_online_cpu(cpu) {
962 : unsigned long flags;
963 :
964 : cpu_base = &per_cpu(hrtimer_bases, cpu);
965 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
966 :
967 : if (update_needs_ipi(cpu_base, bases))
968 : cpumask_set_cpu(cpu, mask);
969 :
970 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
971 : }
972 :
973 : preempt_disable();
974 : smp_call_function_many(mask, retrigger_next_event, NULL, 1);
975 : preempt_enable();
976 : cpus_read_unlock();
977 : free_cpumask_var(mask);
978 :
979 : out_timerfd:
980 0 : timerfd_clock_was_set();
981 0 : }
982 :
983 0 : static void clock_was_set_work(struct work_struct *work)
984 : {
985 0 : clock_was_set(CLOCK_SET_WALL);
986 0 : }
987 :
988 : static DECLARE_WORK(hrtimer_work, clock_was_set_work);
989 :
990 : /*
991 : * Called from timekeeping code to reprogram the hrtimer interrupt device
992 : * on all cpus and to notify timerfd.
993 : */
994 0 : void clock_was_set_delayed(void)
995 : {
996 0 : schedule_work(&hrtimer_work);
997 0 : }
998 :
999 : /*
1000 : * Called during resume either directly from via timekeeping_resume()
1001 : * or in the case of s2idle from tick_unfreeze() to ensure that the
1002 : * hrtimers are up to date.
1003 : */
1004 0 : void hrtimers_resume_local(void)
1005 : {
1006 : lockdep_assert_irqs_disabled();
1007 : /* Retrigger on the local CPU */
1008 0 : retrigger_next_event(NULL);
1009 0 : }
1010 :
1011 : /*
1012 : * Counterpart to lock_hrtimer_base above:
1013 : */
1014 : static inline
1015 : void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
1016 : {
1017 0 : raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags);
1018 : }
1019 :
1020 : /**
1021 : * hrtimer_forward - forward the timer expiry
1022 : * @timer: hrtimer to forward
1023 : * @now: forward past this time
1024 : * @interval: the interval to forward
1025 : *
1026 : * Forward the timer expiry so it will expire in the future.
1027 : * Returns the number of overruns.
1028 : *
1029 : * Can be safely called from the callback function of @timer. If
1030 : * called from other contexts @timer must neither be enqueued nor
1031 : * running the callback and the caller needs to take care of
1032 : * serialization.
1033 : *
1034 : * Note: This only updates the timer expiry value and does not requeue
1035 : * the timer.
1036 : */
1037 0 : u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval)
1038 : {
1039 0 : u64 orun = 1;
1040 : ktime_t delta;
1041 :
1042 0 : delta = ktime_sub(now, hrtimer_get_expires(timer));
1043 :
1044 0 : if (delta < 0)
1045 : return 0;
1046 :
1047 0 : if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED))
1048 : return 0;
1049 :
1050 0 : if (interval < hrtimer_resolution)
1051 0 : interval = hrtimer_resolution;
1052 :
1053 0 : if (unlikely(delta >= interval)) {
1054 0 : s64 incr = ktime_to_ns(interval);
1055 :
1056 0 : orun = ktime_divns(delta, incr);
1057 0 : hrtimer_add_expires_ns(timer, incr * orun);
1058 0 : if (hrtimer_get_expires_tv64(timer) > now)
1059 : return orun;
1060 : /*
1061 : * This (and the ktime_add() below) is the
1062 : * correction for exact:
1063 : */
1064 0 : orun++;
1065 : }
1066 0 : hrtimer_add_expires(timer, interval);
1067 :
1068 0 : return orun;
1069 : }
1070 : EXPORT_SYMBOL_GPL(hrtimer_forward);
1071 :
1072 : /*
1073 : * enqueue_hrtimer - internal function to (re)start a timer
1074 : *
1075 : * The timer is inserted in expiry order. Insertion into the
1076 : * red black tree is O(log(n)). Must hold the base lock.
1077 : *
1078 : * Returns 1 when the new timer is the leftmost timer in the tree.
1079 : */
1080 : static int enqueue_hrtimer(struct hrtimer *timer,
1081 : struct hrtimer_clock_base *base,
1082 : enum hrtimer_mode mode)
1083 : {
1084 0 : debug_activate(timer, mode);
1085 :
1086 0 : base->cpu_base->active_bases |= 1 << base->index;
1087 :
1088 : /* Pairs with the lockless read in hrtimer_is_queued() */
1089 0 : WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
1090 :
1091 0 : return timerqueue_add(&base->active, &timer->node);
1092 : }
1093 :
1094 : /*
1095 : * __remove_hrtimer - internal function to remove a timer
1096 : *
1097 : * Caller must hold the base lock.
1098 : *
1099 : * High resolution timer mode reprograms the clock event device when the
1100 : * timer is the one which expires next. The caller can disable this by setting
1101 : * reprogram to zero. This is useful, when the context does a reprogramming
1102 : * anyway (e.g. timer interrupt)
1103 : */
1104 0 : static void __remove_hrtimer(struct hrtimer *timer,
1105 : struct hrtimer_clock_base *base,
1106 : u8 newstate, int reprogram)
1107 : {
1108 0 : struct hrtimer_cpu_base *cpu_base = base->cpu_base;
1109 0 : u8 state = timer->state;
1110 :
1111 : /* Pairs with the lockless read in hrtimer_is_queued() */
1112 0 : WRITE_ONCE(timer->state, newstate);
1113 0 : if (!(state & HRTIMER_STATE_ENQUEUED))
1114 : return;
1115 :
1116 0 : if (!timerqueue_del(&base->active, &timer->node))
1117 0 : cpu_base->active_bases &= ~(1 << base->index);
1118 :
1119 : /*
1120 : * Note: If reprogram is false we do not update
1121 : * cpu_base->next_timer. This happens when we remove the first
1122 : * timer on a remote cpu. No harm as we never dereference
1123 : * cpu_base->next_timer. So the worst thing what can happen is
1124 : * an superfluous call to hrtimer_force_reprogram() on the
1125 : * remote cpu later on if the same timer gets enqueued again.
1126 : */
1127 0 : if (reprogram && timer == cpu_base->next_timer)
1128 : hrtimer_force_reprogram(cpu_base, 1);
1129 : }
1130 :
1131 : /*
1132 : * remove hrtimer, called with base lock held
1133 : */
1134 : static inline int
1135 : remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
1136 : bool restart, bool keep_local)
1137 : {
1138 0 : u8 state = timer->state;
1139 :
1140 0 : if (state & HRTIMER_STATE_ENQUEUED) {
1141 : bool reprogram;
1142 :
1143 : /*
1144 : * Remove the timer and force reprogramming when high
1145 : * resolution mode is active and the timer is on the current
1146 : * CPU. If we remove a timer on another CPU, reprogramming is
1147 : * skipped. The interrupt event on this CPU is fired and
1148 : * reprogramming happens in the interrupt handler. This is a
1149 : * rare case and less expensive than a smp call.
1150 : */
1151 0 : debug_deactivate(timer);
1152 0 : reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
1153 :
1154 : /*
1155 : * If the timer is not restarted then reprogramming is
1156 : * required if the timer is local. If it is local and about
1157 : * to be restarted, avoid programming it twice (on removal
1158 : * and a moment later when it's requeued).
1159 : */
1160 : if (!restart)
1161 : state = HRTIMER_STATE_INACTIVE;
1162 : else
1163 0 : reprogram &= !keep_local;
1164 :
1165 0 : __remove_hrtimer(timer, base, state, reprogram);
1166 : return 1;
1167 : }
1168 : return 0;
1169 : }
1170 :
1171 : static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim,
1172 : const enum hrtimer_mode mode)
1173 : {
1174 : #ifdef CONFIG_TIME_LOW_RES
1175 : /*
1176 : * CONFIG_TIME_LOW_RES indicates that the system has no way to return
1177 : * granular time values. For relative timers we add hrtimer_resolution
1178 : * (i.e. one jiffie) to prevent short timeouts.
1179 : */
1180 : timer->is_rel = mode & HRTIMER_MODE_REL;
1181 : if (timer->is_rel)
1182 : tim = ktime_add_safe(tim, hrtimer_resolution);
1183 : #endif
1184 : return tim;
1185 : }
1186 :
1187 : static void
1188 0 : hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram)
1189 : {
1190 : ktime_t expires;
1191 :
1192 : /*
1193 : * Find the next SOFT expiration.
1194 : */
1195 0 : expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
1196 :
1197 : /*
1198 : * reprogramming needs to be triggered, even if the next soft
1199 : * hrtimer expires at the same time than the next hard
1200 : * hrtimer. cpu_base->softirq_expires_next needs to be updated!
1201 : */
1202 0 : if (expires == KTIME_MAX)
1203 : return;
1204 :
1205 : /*
1206 : * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event()
1207 : * cpu_base->*expires_next is only set by hrtimer_reprogram()
1208 : */
1209 0 : hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram);
1210 : }
1211 :
1212 0 : static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1213 : u64 delta_ns, const enum hrtimer_mode mode,
1214 : struct hrtimer_clock_base *base)
1215 : {
1216 : struct hrtimer_clock_base *new_base;
1217 : bool force_local, first;
1218 :
1219 : /*
1220 : * If the timer is on the local cpu base and is the first expiring
1221 : * timer then this might end up reprogramming the hardware twice
1222 : * (on removal and on enqueue). To avoid that by prevent the
1223 : * reprogram on removal, keep the timer local to the current CPU
1224 : * and enforce reprogramming after it is queued no matter whether
1225 : * it is the new first expiring timer again or not.
1226 : */
1227 0 : force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
1228 0 : force_local &= base->cpu_base->next_timer == timer;
1229 :
1230 : /*
1231 : * Remove an active timer from the queue. In case it is not queued
1232 : * on the current CPU, make sure that remove_hrtimer() updates the
1233 : * remote data correctly.
1234 : *
1235 : * If it's on the current CPU and the first expiring timer, then
1236 : * skip reprogramming, keep the timer local and enforce
1237 : * reprogramming later if it was the first expiring timer. This
1238 : * avoids programming the underlying clock event twice (once at
1239 : * removal and once after enqueue).
1240 : */
1241 0 : remove_hrtimer(timer, base, true, force_local);
1242 :
1243 0 : if (mode & HRTIMER_MODE_REL)
1244 0 : tim = ktime_add_safe(tim, base->get_time());
1245 :
1246 0 : tim = hrtimer_update_lowres(timer, tim, mode);
1247 :
1248 0 : hrtimer_set_expires_range_ns(timer, tim, delta_ns);
1249 :
1250 : /* Switch the timer base, if necessary: */
1251 : if (!force_local) {
1252 : new_base = switch_hrtimer_base(timer, base,
1253 : mode & HRTIMER_MODE_PINNED);
1254 : } else {
1255 : new_base = base;
1256 : }
1257 :
1258 0 : first = enqueue_hrtimer(timer, new_base, mode);
1259 0 : if (!force_local)
1260 : return first;
1261 :
1262 : /*
1263 : * Timer was forced to stay on the current CPU to avoid
1264 : * reprogramming on removal and enqueue. Force reprogram the
1265 : * hardware by evaluating the new first expiring timer.
1266 : */
1267 0 : hrtimer_force_reprogram(new_base->cpu_base, 1);
1268 : return 0;
1269 : }
1270 :
1271 : /**
1272 : * hrtimer_start_range_ns - (re)start an hrtimer
1273 : * @timer: the timer to be added
1274 : * @tim: expiry time
1275 : * @delta_ns: "slack" range for the timer
1276 : * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or
1277 : * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED);
1278 : * softirq based mode is considered for debug purpose only!
1279 : */
1280 0 : void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1281 : u64 delta_ns, const enum hrtimer_mode mode)
1282 : {
1283 : struct hrtimer_clock_base *base;
1284 : unsigned long flags;
1285 :
1286 : /*
1287 : * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
1288 : * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
1289 : * expiry mode because unmarked timers are moved to softirq expiry.
1290 : */
1291 : if (!IS_ENABLED(CONFIG_PREEMPT_RT))
1292 0 : WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
1293 : else
1294 : WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
1295 :
1296 0 : base = lock_hrtimer_base(timer, &flags);
1297 :
1298 0 : if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base))
1299 0 : hrtimer_reprogram(timer, true);
1300 :
1301 0 : unlock_hrtimer_base(timer, &flags);
1302 0 : }
1303 : EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
1304 :
1305 : /**
1306 : * hrtimer_try_to_cancel - try to deactivate a timer
1307 : * @timer: hrtimer to stop
1308 : *
1309 : * Returns:
1310 : *
1311 : * * 0 when the timer was not active
1312 : * * 1 when the timer was active
1313 : * * -1 when the timer is currently executing the callback function and
1314 : * cannot be stopped
1315 : */
1316 367 : int hrtimer_try_to_cancel(struct hrtimer *timer)
1317 : {
1318 : struct hrtimer_clock_base *base;
1319 : unsigned long flags;
1320 367 : int ret = -1;
1321 :
1322 : /*
1323 : * Check lockless first. If the timer is not active (neither
1324 : * enqueued nor running the callback, nothing to do here. The
1325 : * base lock does not serialize against a concurrent enqueue,
1326 : * so we can avoid taking it.
1327 : */
1328 367 : if (!hrtimer_active(timer))
1329 : return 0;
1330 :
1331 0 : base = lock_hrtimer_base(timer, &flags);
1332 :
1333 0 : if (!hrtimer_callback_running(timer))
1334 : ret = remove_hrtimer(timer, base, false, false);
1335 :
1336 0 : unlock_hrtimer_base(timer, &flags);
1337 :
1338 0 : return ret;
1339 :
1340 : }
1341 : EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1342 :
1343 : #ifdef CONFIG_PREEMPT_RT
1344 : static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
1345 : {
1346 : spin_lock_init(&base->softirq_expiry_lock);
1347 : }
1348 :
1349 : static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
1350 : {
1351 : spin_lock(&base->softirq_expiry_lock);
1352 : }
1353 :
1354 : static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
1355 : {
1356 : spin_unlock(&base->softirq_expiry_lock);
1357 : }
1358 :
1359 : /*
1360 : * The counterpart to hrtimer_cancel_wait_running().
1361 : *
1362 : * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
1363 : * the timer callback to finish. Drop expiry_lock and reacquire it. That
1364 : * allows the waiter to acquire the lock and make progress.
1365 : */
1366 : static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
1367 : unsigned long flags)
1368 : {
1369 : if (atomic_read(&cpu_base->timer_waiters)) {
1370 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1371 : spin_unlock(&cpu_base->softirq_expiry_lock);
1372 : spin_lock(&cpu_base->softirq_expiry_lock);
1373 : raw_spin_lock_irq(&cpu_base->lock);
1374 : }
1375 : }
1376 :
1377 : /*
1378 : * This function is called on PREEMPT_RT kernels when the fast path
1379 : * deletion of a timer failed because the timer callback function was
1380 : * running.
1381 : *
1382 : * This prevents priority inversion: if the soft irq thread is preempted
1383 : * in the middle of a timer callback, then calling del_timer_sync() can
1384 : * lead to two issues:
1385 : *
1386 : * - If the caller is on a remote CPU then it has to spin wait for the timer
1387 : * handler to complete. This can result in unbound priority inversion.
1388 : *
1389 : * - If the caller originates from the task which preempted the timer
1390 : * handler on the same CPU, then spin waiting for the timer handler to
1391 : * complete is never going to end.
1392 : */
1393 : void hrtimer_cancel_wait_running(const struct hrtimer *timer)
1394 : {
1395 : /* Lockless read. Prevent the compiler from reloading it below */
1396 : struct hrtimer_clock_base *base = READ_ONCE(timer->base);
1397 :
1398 : /*
1399 : * Just relax if the timer expires in hard interrupt context or if
1400 : * it is currently on the migration base.
1401 : */
1402 : if (!timer->is_soft || is_migration_base(base)) {
1403 : cpu_relax();
1404 : return;
1405 : }
1406 :
1407 : /*
1408 : * Mark the base as contended and grab the expiry lock, which is
1409 : * held by the softirq across the timer callback. Drop the lock
1410 : * immediately so the softirq can expire the next timer. In theory
1411 : * the timer could already be running again, but that's more than
1412 : * unlikely and just causes another wait loop.
1413 : */
1414 : atomic_inc(&base->cpu_base->timer_waiters);
1415 : spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
1416 : atomic_dec(&base->cpu_base->timer_waiters);
1417 : spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
1418 : }
1419 : #else
1420 : static inline void
1421 : hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
1422 : static inline void
1423 : hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
1424 : static inline void
1425 : hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
1426 : static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
1427 : unsigned long flags) { }
1428 : #endif
1429 :
1430 : /**
1431 : * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1432 : * @timer: the timer to be cancelled
1433 : *
1434 : * Returns:
1435 : * 0 when the timer was not active
1436 : * 1 when the timer was active
1437 : */
1438 367 : int hrtimer_cancel(struct hrtimer *timer)
1439 : {
1440 : int ret;
1441 :
1442 : do {
1443 367 : ret = hrtimer_try_to_cancel(timer);
1444 :
1445 367 : if (ret < 0)
1446 0 : hrtimer_cancel_wait_running(timer);
1447 367 : } while (ret < 0);
1448 367 : return ret;
1449 : }
1450 : EXPORT_SYMBOL_GPL(hrtimer_cancel);
1451 :
1452 : /**
1453 : * __hrtimer_get_remaining - get remaining time for the timer
1454 : * @timer: the timer to read
1455 : * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y
1456 : */
1457 0 : ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust)
1458 : {
1459 : unsigned long flags;
1460 : ktime_t rem;
1461 :
1462 0 : lock_hrtimer_base(timer, &flags);
1463 : if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust)
1464 : rem = hrtimer_expires_remaining_adjusted(timer);
1465 : else
1466 0 : rem = hrtimer_expires_remaining(timer);
1467 0 : unlock_hrtimer_base(timer, &flags);
1468 :
1469 0 : return rem;
1470 : }
1471 : EXPORT_SYMBOL_GPL(__hrtimer_get_remaining);
1472 :
1473 : #ifdef CONFIG_NO_HZ_COMMON
1474 : /**
1475 : * hrtimer_get_next_event - get the time until next expiry event
1476 : *
1477 : * Returns the next expiry time or KTIME_MAX if no timer is pending.
1478 : */
1479 : u64 hrtimer_get_next_event(void)
1480 : {
1481 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1482 : u64 expires = KTIME_MAX;
1483 : unsigned long flags;
1484 :
1485 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1486 :
1487 : if (!__hrtimer_hres_active(cpu_base))
1488 : expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
1489 :
1490 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1491 :
1492 : return expires;
1493 : }
1494 :
1495 : /**
1496 : * hrtimer_next_event_without - time until next expiry event w/o one timer
1497 : * @exclude: timer to exclude
1498 : *
1499 : * Returns the next expiry time over all timers except for the @exclude one or
1500 : * KTIME_MAX if none of them is pending.
1501 : */
1502 : u64 hrtimer_next_event_without(const struct hrtimer *exclude)
1503 : {
1504 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1505 : u64 expires = KTIME_MAX;
1506 : unsigned long flags;
1507 :
1508 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1509 :
1510 : if (__hrtimer_hres_active(cpu_base)) {
1511 : unsigned int active;
1512 :
1513 : if (!cpu_base->softirq_activated) {
1514 : active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
1515 : expires = __hrtimer_next_event_base(cpu_base, exclude,
1516 : active, KTIME_MAX);
1517 : }
1518 : active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
1519 : expires = __hrtimer_next_event_base(cpu_base, exclude, active,
1520 : expires);
1521 : }
1522 :
1523 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1524 :
1525 : return expires;
1526 : }
1527 : #endif
1528 :
1529 1729 : static inline int hrtimer_clockid_to_base(clockid_t clock_id)
1530 : {
1531 1729 : if (likely(clock_id < MAX_CLOCKS)) {
1532 1729 : int base = hrtimer_clock_to_base_table[clock_id];
1533 :
1534 1729 : if (likely(base != HRTIMER_MAX_CLOCK_BASES))
1535 : return base;
1536 : }
1537 0 : WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id);
1538 0 : return HRTIMER_BASE_MONOTONIC;
1539 : }
1540 :
1541 1729 : static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1542 : enum hrtimer_mode mode)
1543 : {
1544 1729 : bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
1545 : struct hrtimer_cpu_base *cpu_base;
1546 : int base;
1547 :
1548 : /*
1549 : * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
1550 : * marked for hard interrupt expiry mode are moved into soft
1551 : * interrupt context for latency reasons and because the callbacks
1552 : * can invoke functions which might sleep on RT, e.g. spin_lock().
1553 : */
1554 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
1555 : softtimer = true;
1556 :
1557 1729 : memset(timer, 0, sizeof(struct hrtimer));
1558 :
1559 1729 : cpu_base = raw_cpu_ptr(&hrtimer_bases);
1560 :
1561 : /*
1562 : * POSIX magic: Relative CLOCK_REALTIME timers are not affected by
1563 : * clock modifications, so they needs to become CLOCK_MONOTONIC to
1564 : * ensure POSIX compliance.
1565 : */
1566 1729 : if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
1567 0 : clock_id = CLOCK_MONOTONIC;
1568 :
1569 1729 : base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
1570 1729 : base += hrtimer_clockid_to_base(clock_id);
1571 1729 : timer->is_soft = softtimer;
1572 1729 : timer->is_hard = !!(mode & HRTIMER_MODE_HARD);
1573 1729 : timer->base = &cpu_base->clock_base[base];
1574 3458 : timerqueue_init(&timer->node);
1575 1729 : }
1576 :
1577 : /**
1578 : * hrtimer_init - initialize a timer to the given clock
1579 : * @timer: the timer to be initialized
1580 : * @clock_id: the clock to be used
1581 : * @mode: The modes which are relevant for initialization:
1582 : * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT,
1583 : * HRTIMER_MODE_REL_SOFT
1584 : *
1585 : * The PINNED variants of the above can be handed in,
1586 : * but the PINNED bit is ignored as pinning happens
1587 : * when the hrtimer is started
1588 : */
1589 1729 : void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1590 : enum hrtimer_mode mode)
1591 : {
1592 1729 : debug_init(timer, clock_id, mode);
1593 1729 : __hrtimer_init(timer, clock_id, mode);
1594 1729 : }
1595 : EXPORT_SYMBOL_GPL(hrtimer_init);
1596 :
1597 : /*
1598 : * A timer is active, when it is enqueued into the rbtree or the
1599 : * callback function is running or it's in the state of being migrated
1600 : * to another cpu.
1601 : *
1602 : * It is important for this function to not return a false negative.
1603 : */
1604 367 : bool hrtimer_active(const struct hrtimer *timer)
1605 : {
1606 : struct hrtimer_clock_base *base;
1607 : unsigned int seq;
1608 :
1609 : do {
1610 367 : base = READ_ONCE(timer->base);
1611 1101 : seq = raw_read_seqcount_begin(&base->seq);
1612 :
1613 734 : if (timer->state != HRTIMER_STATE_INACTIVE ||
1614 367 : base->running == timer)
1615 : return true;
1616 :
1617 1468 : } while (read_seqcount_retry(&base->seq, seq) ||
1618 367 : base != READ_ONCE(timer->base));
1619 :
1620 : return false;
1621 : }
1622 : EXPORT_SYMBOL_GPL(hrtimer_active);
1623 :
1624 : /*
1625 : * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
1626 : * distinct sections:
1627 : *
1628 : * - queued: the timer is queued
1629 : * - callback: the timer is being ran
1630 : * - post: the timer is inactive or (re)queued
1631 : *
1632 : * On the read side we ensure we observe timer->state and cpu_base->running
1633 : * from the same section, if anything changed while we looked at it, we retry.
1634 : * This includes timer->base changing because sequence numbers alone are
1635 : * insufficient for that.
1636 : *
1637 : * The sequence numbers are required because otherwise we could still observe
1638 : * a false negative if the read side got smeared over multiple consecutive
1639 : * __run_hrtimer() invocations.
1640 : */
1641 :
1642 0 : static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1643 : struct hrtimer_clock_base *base,
1644 : struct hrtimer *timer, ktime_t *now,
1645 : unsigned long flags) __must_hold(&cpu_base->lock)
1646 : {
1647 : enum hrtimer_restart (*fn)(struct hrtimer *);
1648 : bool expires_in_hardirq;
1649 : int restart;
1650 :
1651 : lockdep_assert_held(&cpu_base->lock);
1652 :
1653 0 : debug_deactivate(timer);
1654 0 : base->running = timer;
1655 :
1656 : /*
1657 : * Separate the ->running assignment from the ->state assignment.
1658 : *
1659 : * As with a regular write barrier, this ensures the read side in
1660 : * hrtimer_active() cannot observe base->running == NULL &&
1661 : * timer->state == INACTIVE.
1662 : */
1663 0 : raw_write_seqcount_barrier(&base->seq);
1664 :
1665 0 : __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
1666 0 : fn = timer->function;
1667 :
1668 : /*
1669 : * Clear the 'is relative' flag for the TIME_LOW_RES case. If the
1670 : * timer is restarted with a period then it becomes an absolute
1671 : * timer. If its not restarted it does not matter.
1672 : */
1673 : if (IS_ENABLED(CONFIG_TIME_LOW_RES))
1674 : timer->is_rel = false;
1675 :
1676 : /*
1677 : * The timer is marked as running in the CPU base, so it is
1678 : * protected against migration to a different CPU even if the lock
1679 : * is dropped.
1680 : */
1681 0 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1682 0 : trace_hrtimer_expire_entry(timer, now);
1683 0 : expires_in_hardirq = lockdep_hrtimer_enter(timer);
1684 :
1685 0 : restart = fn(timer);
1686 :
1687 : lockdep_hrtimer_exit(expires_in_hardirq);
1688 0 : trace_hrtimer_expire_exit(timer);
1689 0 : raw_spin_lock_irq(&cpu_base->lock);
1690 :
1691 : /*
1692 : * Note: We clear the running state after enqueue_hrtimer and
1693 : * we do not reprogram the event hardware. Happens either in
1694 : * hrtimer_start_range_ns() or in hrtimer_interrupt()
1695 : *
1696 : * Note: Because we dropped the cpu_base->lock above,
1697 : * hrtimer_start_range_ns() can have popped in and enqueued the timer
1698 : * for us already.
1699 : */
1700 0 : if (restart != HRTIMER_NORESTART &&
1701 0 : !(timer->state & HRTIMER_STATE_ENQUEUED))
1702 0 : enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS);
1703 :
1704 : /*
1705 : * Separate the ->running assignment from the ->state assignment.
1706 : *
1707 : * As with a regular write barrier, this ensures the read side in
1708 : * hrtimer_active() cannot observe base->running.timer == NULL &&
1709 : * timer->state == INACTIVE.
1710 : */
1711 0 : raw_write_seqcount_barrier(&base->seq);
1712 :
1713 0 : WARN_ON_ONCE(base->running != timer);
1714 0 : base->running = NULL;
1715 0 : }
1716 :
1717 2943 : static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
1718 : unsigned long flags, unsigned int active_mask)
1719 : {
1720 : struct hrtimer_clock_base *base;
1721 2943 : unsigned int active = cpu_base->active_bases & active_mask;
1722 :
1723 5886 : for_each_active_base(base, cpu_base, active) {
1724 : struct timerqueue_node *node;
1725 : ktime_t basenow;
1726 :
1727 0 : basenow = ktime_add(now, base->offset);
1728 :
1729 0 : while ((node = timerqueue_getnext(&base->active))) {
1730 : struct hrtimer *timer;
1731 :
1732 0 : timer = container_of(node, struct hrtimer, node);
1733 :
1734 : /*
1735 : * The immediate goal for using the softexpires is
1736 : * minimizing wakeups, not running timers at the
1737 : * earliest interrupt after their soft expiration.
1738 : * This allows us to avoid using a Priority Search
1739 : * Tree, which can answer a stabbing query for
1740 : * overlapping intervals and instead use the simple
1741 : * BST we already have.
1742 : * We don't add extra wakeups by delaying timers that
1743 : * are right-of a not yet expired timer, because that
1744 : * timer will have to trigger a wakeup anyway.
1745 : */
1746 0 : if (basenow < hrtimer_get_softexpires_tv64(timer))
1747 : break;
1748 :
1749 0 : __run_hrtimer(cpu_base, base, timer, &basenow, flags);
1750 : if (active_mask == HRTIMER_ACTIVE_SOFT)
1751 : hrtimer_sync_wait_running(cpu_base, flags);
1752 : }
1753 : }
1754 2943 : }
1755 :
1756 0 : static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
1757 : {
1758 0 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1759 : unsigned long flags;
1760 : ktime_t now;
1761 :
1762 0 : hrtimer_cpu_base_lock_expiry(cpu_base);
1763 0 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1764 :
1765 0 : now = hrtimer_update_base(cpu_base);
1766 0 : __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT);
1767 :
1768 0 : cpu_base->softirq_activated = 0;
1769 0 : hrtimer_update_softirq_timer(cpu_base, true);
1770 :
1771 0 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1772 0 : hrtimer_cpu_base_unlock_expiry(cpu_base);
1773 0 : }
1774 :
1775 : #ifdef CONFIG_HIGH_RES_TIMERS
1776 :
1777 : /*
1778 : * High resolution timer interrupt
1779 : * Called with interrupts disabled
1780 : */
1781 : void hrtimer_interrupt(struct clock_event_device *dev)
1782 : {
1783 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1784 : ktime_t expires_next, now, entry_time, delta;
1785 : unsigned long flags;
1786 : int retries = 0;
1787 :
1788 : BUG_ON(!cpu_base->hres_active);
1789 : cpu_base->nr_events++;
1790 : dev->next_event = KTIME_MAX;
1791 :
1792 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1793 : entry_time = now = hrtimer_update_base(cpu_base);
1794 : retry:
1795 : cpu_base->in_hrtirq = 1;
1796 : /*
1797 : * We set expires_next to KTIME_MAX here with cpu_base->lock
1798 : * held to prevent that a timer is enqueued in our queue via
1799 : * the migration code. This does not affect enqueueing of
1800 : * timers which run their callback and need to be requeued on
1801 : * this CPU.
1802 : */
1803 : cpu_base->expires_next = KTIME_MAX;
1804 :
1805 : if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1806 : cpu_base->softirq_expires_next = KTIME_MAX;
1807 : cpu_base->softirq_activated = 1;
1808 : raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1809 : }
1810 :
1811 : __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1812 :
1813 : /* Reevaluate the clock bases for the [soft] next expiry */
1814 : expires_next = hrtimer_update_next_event(cpu_base);
1815 : /*
1816 : * Store the new expiry value so the migration code can verify
1817 : * against it.
1818 : */
1819 : cpu_base->expires_next = expires_next;
1820 : cpu_base->in_hrtirq = 0;
1821 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1822 :
1823 : /* Reprogramming necessary ? */
1824 : if (!tick_program_event(expires_next, 0)) {
1825 : cpu_base->hang_detected = 0;
1826 : return;
1827 : }
1828 :
1829 : /*
1830 : * The next timer was already expired due to:
1831 : * - tracing
1832 : * - long lasting callbacks
1833 : * - being scheduled away when running in a VM
1834 : *
1835 : * We need to prevent that we loop forever in the hrtimer
1836 : * interrupt routine. We give it 3 attempts to avoid
1837 : * overreacting on some spurious event.
1838 : *
1839 : * Acquire base lock for updating the offsets and retrieving
1840 : * the current time.
1841 : */
1842 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1843 : now = hrtimer_update_base(cpu_base);
1844 : cpu_base->nr_retries++;
1845 : if (++retries < 3)
1846 : goto retry;
1847 : /*
1848 : * Give the system a chance to do something else than looping
1849 : * here. We stored the entry time, so we know exactly how long
1850 : * we spent here. We schedule the next event this amount of
1851 : * time away.
1852 : */
1853 : cpu_base->nr_hangs++;
1854 : cpu_base->hang_detected = 1;
1855 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1856 :
1857 : delta = ktime_sub(now, entry_time);
1858 : if ((unsigned int)delta > cpu_base->max_hang_time)
1859 : cpu_base->max_hang_time = (unsigned int) delta;
1860 : /*
1861 : * Limit it to a sensible value as we enforce a longer
1862 : * delay. Give the CPU at least 100ms to catch up.
1863 : */
1864 : if (delta > 100 * NSEC_PER_MSEC)
1865 : expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
1866 : else
1867 : expires_next = ktime_add(now, delta);
1868 : tick_program_event(expires_next, 1);
1869 : pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta));
1870 : }
1871 :
1872 : /* called with interrupts disabled */
1873 : static inline void __hrtimer_peek_ahead_timers(void)
1874 : {
1875 : struct tick_device *td;
1876 :
1877 : if (!hrtimer_hres_active())
1878 : return;
1879 :
1880 : td = this_cpu_ptr(&tick_cpu_device);
1881 : if (td && td->evtdev)
1882 : hrtimer_interrupt(td->evtdev);
1883 : }
1884 :
1885 : #else /* CONFIG_HIGH_RES_TIMERS */
1886 :
1887 : static inline void __hrtimer_peek_ahead_timers(void) { }
1888 :
1889 : #endif /* !CONFIG_HIGH_RES_TIMERS */
1890 :
1891 : /*
1892 : * Called from run_local_timers in hardirq context every jiffy
1893 : */
1894 2943 : void hrtimer_run_queues(void)
1895 : {
1896 2943 : struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
1897 : unsigned long flags;
1898 : ktime_t now;
1899 :
1900 2943 : if (__hrtimer_hres_active(cpu_base))
1901 : return;
1902 :
1903 : /*
1904 : * This _is_ ugly: We have to check periodically, whether we
1905 : * can switch to highres and / or nohz mode. The clocksource
1906 : * switch happens with xtime_lock held. Notification from
1907 : * there only sets the check bit in the tick_oneshot code,
1908 : * otherwise we might deadlock vs. xtime_lock.
1909 : */
1910 2943 : if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) {
1911 : hrtimer_switch_to_hres();
1912 : return;
1913 : }
1914 :
1915 2943 : raw_spin_lock_irqsave(&cpu_base->lock, flags);
1916 2943 : now = hrtimer_update_base(cpu_base);
1917 :
1918 5886 : if (!ktime_before(now, cpu_base->softirq_expires_next)) {
1919 0 : cpu_base->softirq_expires_next = KTIME_MAX;
1920 0 : cpu_base->softirq_activated = 1;
1921 0 : raise_softirq_irqoff(HRTIMER_SOFTIRQ);
1922 : }
1923 :
1924 2943 : __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
1925 5886 : raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1926 : }
1927 :
1928 : /*
1929 : * Sleep related functions:
1930 : */
1931 0 : static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1932 : {
1933 0 : struct hrtimer_sleeper *t =
1934 0 : container_of(timer, struct hrtimer_sleeper, timer);
1935 0 : struct task_struct *task = t->task;
1936 :
1937 0 : t->task = NULL;
1938 0 : if (task)
1939 0 : wake_up_process(task);
1940 :
1941 0 : return HRTIMER_NORESTART;
1942 : }
1943 :
1944 : /**
1945 : * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
1946 : * @sl: sleeper to be started
1947 : * @mode: timer mode abs/rel
1948 : *
1949 : * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
1950 : * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
1951 : */
1952 0 : void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
1953 : enum hrtimer_mode mode)
1954 : {
1955 : /*
1956 : * Make the enqueue delivery mode check work on RT. If the sleeper
1957 : * was initialized for hard interrupt delivery, force the mode bit.
1958 : * This is a special case for hrtimer_sleepers because
1959 : * hrtimer_init_sleeper() determines the delivery mode on RT so the
1960 : * fiddling with this decision is avoided at the call sites.
1961 : */
1962 : if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
1963 : mode |= HRTIMER_MODE_HARD;
1964 :
1965 0 : hrtimer_start_expires(&sl->timer, mode);
1966 0 : }
1967 : EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
1968 :
1969 : static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
1970 : clockid_t clock_id, enum hrtimer_mode mode)
1971 : {
1972 : /*
1973 : * On PREEMPT_RT enabled kernels hrtimers which are not explicitly
1974 : * marked for hard interrupt expiry mode are moved into soft
1975 : * interrupt context either for latency reasons or because the
1976 : * hrtimer callback takes regular spinlocks or invokes other
1977 : * functions which are not suitable for hard interrupt context on
1978 : * PREEMPT_RT.
1979 : *
1980 : * The hrtimer_sleeper callback is RT compatible in hard interrupt
1981 : * context, but there is a latency concern: Untrusted userspace can
1982 : * spawn many threads which arm timers for the same expiry time on
1983 : * the same CPU. That causes a latency spike due to the wakeup of
1984 : * a gazillion threads.
1985 : *
1986 : * OTOH, privileged real-time user space applications rely on the
1987 : * low latency of hard interrupt wakeups. If the current task is in
1988 : * a real-time scheduling class, mark the mode for hard interrupt
1989 : * expiry.
1990 : */
1991 : if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
1992 : if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
1993 : mode |= HRTIMER_MODE_HARD;
1994 : }
1995 :
1996 0 : __hrtimer_init(&sl->timer, clock_id, mode);
1997 0 : sl->timer.function = hrtimer_wakeup;
1998 0 : sl->task = current;
1999 : }
2000 :
2001 : /**
2002 : * hrtimer_init_sleeper - initialize sleeper to the given clock
2003 : * @sl: sleeper to be initialized
2004 : * @clock_id: the clock to be used
2005 : * @mode: timer mode abs/rel
2006 : */
2007 0 : void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
2008 : enum hrtimer_mode mode)
2009 : {
2010 0 : debug_init(&sl->timer, clock_id, mode);
2011 0 : __hrtimer_init_sleeper(sl, clock_id, mode);
2012 :
2013 0 : }
2014 : EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
2015 :
2016 0 : int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
2017 : {
2018 0 : switch(restart->nanosleep.type) {
2019 : #ifdef CONFIG_COMPAT_32BIT_TIME
2020 : case TT_COMPAT:
2021 : if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
2022 : return -EFAULT;
2023 : break;
2024 : #endif
2025 : case TT_NATIVE:
2026 0 : if (put_timespec64(ts, restart->nanosleep.rmtp))
2027 : return -EFAULT;
2028 : break;
2029 : default:
2030 0 : BUG();
2031 : }
2032 0 : return -ERESTART_RESTARTBLOCK;
2033 : }
2034 :
2035 0 : static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
2036 : {
2037 : struct restart_block *restart;
2038 :
2039 : do {
2040 0 : set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
2041 0 : hrtimer_sleeper_start_expires(t, mode);
2042 :
2043 0 : if (likely(t->task))
2044 0 : schedule();
2045 :
2046 0 : hrtimer_cancel(&t->timer);
2047 0 : mode = HRTIMER_MODE_ABS;
2048 :
2049 0 : } while (t->task && !signal_pending(current));
2050 :
2051 0 : __set_current_state(TASK_RUNNING);
2052 :
2053 0 : if (!t->task)
2054 : return 0;
2055 :
2056 0 : restart = ¤t->restart_block;
2057 0 : if (restart->nanosleep.type != TT_NONE) {
2058 0 : ktime_t rem = hrtimer_expires_remaining(&t->timer);
2059 : struct timespec64 rmt;
2060 :
2061 0 : if (rem <= 0)
2062 : return 0;
2063 0 : rmt = ktime_to_timespec64(rem);
2064 :
2065 0 : return nanosleep_copyout(restart, &rmt);
2066 : }
2067 : return -ERESTART_RESTARTBLOCK;
2068 : }
2069 :
2070 0 : static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
2071 : {
2072 : struct hrtimer_sleeper t;
2073 : int ret;
2074 :
2075 0 : hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
2076 : HRTIMER_MODE_ABS);
2077 0 : hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
2078 0 : ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
2079 0 : destroy_hrtimer_on_stack(&t.timer);
2080 0 : return ret;
2081 : }
2082 :
2083 0 : long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
2084 : const clockid_t clockid)
2085 : {
2086 : struct restart_block *restart;
2087 : struct hrtimer_sleeper t;
2088 0 : int ret = 0;
2089 : u64 slack;
2090 :
2091 0 : slack = current->timer_slack_ns;
2092 0 : if (rt_task(current))
2093 0 : slack = 0;
2094 :
2095 0 : hrtimer_init_sleeper_on_stack(&t, clockid, mode);
2096 0 : hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
2097 0 : ret = do_nanosleep(&t, mode);
2098 0 : if (ret != -ERESTART_RESTARTBLOCK)
2099 : goto out;
2100 :
2101 : /* Absolute timers do not update the rmtp value and restart: */
2102 0 : if (mode == HRTIMER_MODE_ABS) {
2103 : ret = -ERESTARTNOHAND;
2104 : goto out;
2105 : }
2106 :
2107 0 : restart = ¤t->restart_block;
2108 0 : restart->nanosleep.clockid = t.timer.base->clockid;
2109 0 : restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
2110 0 : set_restart_fn(restart, hrtimer_nanosleep_restart);
2111 : out:
2112 0 : destroy_hrtimer_on_stack(&t.timer);
2113 0 : return ret;
2114 : }
2115 :
2116 : #ifdef CONFIG_64BIT
2117 :
2118 0 : SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
2119 : struct __kernel_timespec __user *, rmtp)
2120 : {
2121 : struct timespec64 tu;
2122 :
2123 0 : if (get_timespec64(&tu, rqtp))
2124 : return -EFAULT;
2125 :
2126 0 : if (!timespec64_valid(&tu))
2127 : return -EINVAL;
2128 :
2129 0 : current->restart_block.fn = do_no_restart_syscall;
2130 0 : current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
2131 0 : current->restart_block.nanosleep.rmtp = rmtp;
2132 0 : return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
2133 : CLOCK_MONOTONIC);
2134 : }
2135 :
2136 : #endif
2137 :
2138 : #ifdef CONFIG_COMPAT_32BIT_TIME
2139 :
2140 : SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
2141 : struct old_timespec32 __user *, rmtp)
2142 : {
2143 : struct timespec64 tu;
2144 :
2145 : if (get_old_timespec32(&tu, rqtp))
2146 : return -EFAULT;
2147 :
2148 : if (!timespec64_valid(&tu))
2149 : return -EINVAL;
2150 :
2151 : current->restart_block.fn = do_no_restart_syscall;
2152 : current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
2153 : current->restart_block.nanosleep.compat_rmtp = rmtp;
2154 : return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
2155 : CLOCK_MONOTONIC);
2156 : }
2157 : #endif
2158 :
2159 : /*
2160 : * Functions related to boot-time initialization:
2161 : */
2162 0 : int hrtimers_prepare_cpu(unsigned int cpu)
2163 : {
2164 1 : struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
2165 : int i;
2166 :
2167 9 : for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2168 8 : struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i];
2169 :
2170 8 : clock_b->cpu_base = cpu_base;
2171 16 : seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock);
2172 16 : timerqueue_init_head(&clock_b->active);
2173 : }
2174 :
2175 1 : cpu_base->cpu = cpu;
2176 1 : cpu_base->active_bases = 0;
2177 1 : cpu_base->hres_active = 0;
2178 1 : cpu_base->hang_detected = 0;
2179 1 : cpu_base->next_timer = NULL;
2180 1 : cpu_base->softirq_next_timer = NULL;
2181 1 : cpu_base->expires_next = KTIME_MAX;
2182 1 : cpu_base->softirq_expires_next = KTIME_MAX;
2183 1 : hrtimer_cpu_base_init_expiry_lock(cpu_base);
2184 0 : return 0;
2185 : }
2186 :
2187 : #ifdef CONFIG_HOTPLUG_CPU
2188 :
2189 : static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
2190 : struct hrtimer_clock_base *new_base)
2191 : {
2192 : struct hrtimer *timer;
2193 : struct timerqueue_node *node;
2194 :
2195 : while ((node = timerqueue_getnext(&old_base->active))) {
2196 : timer = container_of(node, struct hrtimer, node);
2197 : BUG_ON(hrtimer_callback_running(timer));
2198 : debug_deactivate(timer);
2199 :
2200 : /*
2201 : * Mark it as ENQUEUED not INACTIVE otherwise the
2202 : * timer could be seen as !active and just vanish away
2203 : * under us on another CPU
2204 : */
2205 : __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0);
2206 : timer->base = new_base;
2207 : /*
2208 : * Enqueue the timers on the new cpu. This does not
2209 : * reprogram the event device in case the timer
2210 : * expires before the earliest on this CPU, but we run
2211 : * hrtimer_interrupt after we migrated everything to
2212 : * sort out already expired timers and reprogram the
2213 : * event device.
2214 : */
2215 : enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS);
2216 : }
2217 : }
2218 :
2219 : int hrtimers_dead_cpu(unsigned int scpu)
2220 : {
2221 : struct hrtimer_cpu_base *old_base, *new_base;
2222 : int i;
2223 :
2224 : BUG_ON(cpu_online(scpu));
2225 : tick_cancel_sched_timer(scpu);
2226 :
2227 : /*
2228 : * this BH disable ensures that raise_softirq_irqoff() does
2229 : * not wakeup ksoftirqd (and acquire the pi-lock) while
2230 : * holding the cpu_base lock
2231 : */
2232 : local_bh_disable();
2233 : local_irq_disable();
2234 : old_base = &per_cpu(hrtimer_bases, scpu);
2235 : new_base = this_cpu_ptr(&hrtimer_bases);
2236 : /*
2237 : * The caller is globally serialized and nobody else
2238 : * takes two locks at once, deadlock is not possible.
2239 : */
2240 : raw_spin_lock(&new_base->lock);
2241 : raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
2242 :
2243 : for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
2244 : migrate_hrtimer_list(&old_base->clock_base[i],
2245 : &new_base->clock_base[i]);
2246 : }
2247 :
2248 : /*
2249 : * The migration might have changed the first expiring softirq
2250 : * timer on this CPU. Update it.
2251 : */
2252 : hrtimer_update_softirq_timer(new_base, false);
2253 :
2254 : raw_spin_unlock(&old_base->lock);
2255 : raw_spin_unlock(&new_base->lock);
2256 :
2257 : /* Check, if we got expired work to do */
2258 : __hrtimer_peek_ahead_timers();
2259 : local_irq_enable();
2260 : local_bh_enable();
2261 : return 0;
2262 : }
2263 :
2264 : #endif /* CONFIG_HOTPLUG_CPU */
2265 :
2266 1 : void __init hrtimers_init(void)
2267 : {
2268 1 : hrtimers_prepare_cpu(smp_processor_id());
2269 1 : open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq);
2270 1 : }
2271 :
2272 : /**
2273 : * schedule_hrtimeout_range_clock - sleep until timeout
2274 : * @expires: timeout value (ktime_t)
2275 : * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks
2276 : * @mode: timer mode
2277 : * @clock_id: timer clock to be used
2278 : */
2279 : int __sched
2280 0 : schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
2281 : const enum hrtimer_mode mode, clockid_t clock_id)
2282 : {
2283 : struct hrtimer_sleeper t;
2284 :
2285 : /*
2286 : * Optimize when a zero timeout value is given. It does not
2287 : * matter whether this is an absolute or a relative time.
2288 : */
2289 0 : if (expires && *expires == 0) {
2290 0 : __set_current_state(TASK_RUNNING);
2291 0 : return 0;
2292 : }
2293 :
2294 : /*
2295 : * A NULL parameter means "infinite"
2296 : */
2297 0 : if (!expires) {
2298 0 : schedule();
2299 0 : return -EINTR;
2300 : }
2301 :
2302 : /*
2303 : * Override any slack passed by the user if under
2304 : * rt contraints.
2305 : */
2306 0 : if (rt_task(current))
2307 0 : delta = 0;
2308 :
2309 0 : hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
2310 0 : hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
2311 0 : hrtimer_sleeper_start_expires(&t, mode);
2312 :
2313 0 : if (likely(t.task))
2314 0 : schedule();
2315 :
2316 0 : hrtimer_cancel(&t.timer);
2317 0 : destroy_hrtimer_on_stack(&t.timer);
2318 :
2319 0 : __set_current_state(TASK_RUNNING);
2320 :
2321 0 : return !t.task ? 0 : -EINTR;
2322 : }
2323 : EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock);
2324 :
2325 : /**
2326 : * schedule_hrtimeout_range - sleep until timeout
2327 : * @expires: timeout value (ktime_t)
2328 : * @delta: slack in expires timeout (ktime_t) for SCHED_OTHER tasks
2329 : * @mode: timer mode
2330 : *
2331 : * Make the current task sleep until the given expiry time has
2332 : * elapsed. The routine will return immediately unless
2333 : * the current task state has been set (see set_current_state()).
2334 : *
2335 : * The @delta argument gives the kernel the freedom to schedule the
2336 : * actual wakeup to a time that is both power and performance friendly
2337 : * for regular (non RT/DL) tasks.
2338 : * The kernel give the normal best effort behavior for "@expires+@delta",
2339 : * but may decide to fire the timer earlier, but no earlier than @expires.
2340 : *
2341 : * You can set the task state as follows -
2342 : *
2343 : * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
2344 : * pass before the routine returns unless the current task is explicitly
2345 : * woken up, (e.g. by wake_up_process()).
2346 : *
2347 : * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
2348 : * delivered to the current task or the current task is explicitly woken
2349 : * up.
2350 : *
2351 : * The current task state is guaranteed to be TASK_RUNNING when this
2352 : * routine returns.
2353 : *
2354 : * Returns 0 when the timer has expired. If the task was woken before the
2355 : * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
2356 : * by an explicit wakeup, it returns -EINTR.
2357 : */
2358 0 : int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta,
2359 : const enum hrtimer_mode mode)
2360 : {
2361 0 : return schedule_hrtimeout_range_clock(expires, delta, mode,
2362 : CLOCK_MONOTONIC);
2363 : }
2364 : EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
2365 :
2366 : /**
2367 : * schedule_hrtimeout - sleep until timeout
2368 : * @expires: timeout value (ktime_t)
2369 : * @mode: timer mode
2370 : *
2371 : * Make the current task sleep until the given expiry time has
2372 : * elapsed. The routine will return immediately unless
2373 : * the current task state has been set (see set_current_state()).
2374 : *
2375 : * You can set the task state as follows -
2376 : *
2377 : * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
2378 : * pass before the routine returns unless the current task is explicitly
2379 : * woken up, (e.g. by wake_up_process()).
2380 : *
2381 : * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
2382 : * delivered to the current task or the current task is explicitly woken
2383 : * up.
2384 : *
2385 : * The current task state is guaranteed to be TASK_RUNNING when this
2386 : * routine returns.
2387 : *
2388 : * Returns 0 when the timer has expired. If the task was woken before the
2389 : * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or
2390 : * by an explicit wakeup, it returns -EINTR.
2391 : */
2392 0 : int __sched schedule_hrtimeout(ktime_t *expires,
2393 : const enum hrtimer_mode mode)
2394 : {
2395 0 : return schedule_hrtimeout_range(expires, 0, mode);
2396 : }
2397 : EXPORT_SYMBOL_GPL(schedule_hrtimeout);
|