Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
4 : *
5 : * Provides a framework for enqueueing and running callbacks from hardirq
6 : * context. The enqueueing is NMI-safe.
7 : */
8 :
9 : #include <linux/bug.h>
10 : #include <linux/kernel.h>
11 : #include <linux/export.h>
12 : #include <linux/irq_work.h>
13 : #include <linux/percpu.h>
14 : #include <linux/hardirq.h>
15 : #include <linux/irqflags.h>
16 : #include <linux/sched.h>
17 : #include <linux/tick.h>
18 : #include <linux/cpu.h>
19 : #include <linux/notifier.h>
20 : #include <linux/smp.h>
21 : #include <linux/smpboot.h>
22 : #include <asm/processor.h>
23 : #include <linux/kasan.h>
24 :
25 : static DEFINE_PER_CPU(struct llist_head, raised_list);
26 : static DEFINE_PER_CPU(struct llist_head, lazy_list);
27 : static DEFINE_PER_CPU(struct task_struct *, irq_workd);
28 :
29 : static void wake_irq_workd(void)
30 : {
31 : struct task_struct *tsk = __this_cpu_read(irq_workd);
32 :
33 : if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
34 : wake_up_process(tsk);
35 : }
36 :
37 : #ifdef CONFIG_SMP
38 : static void irq_work_wake(struct irq_work *entry)
39 : {
40 : wake_irq_workd();
41 : }
42 :
43 : static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
44 : IRQ_WORK_INIT_HARD(irq_work_wake);
45 : #endif
46 :
47 : static int irq_workd_should_run(unsigned int cpu)
48 : {
49 : return !llist_empty(this_cpu_ptr(&lazy_list));
50 : }
51 :
52 : /*
53 : * Claim the entry so that no one else will poke at it.
54 : */
55 : static bool irq_work_claim(struct irq_work *work)
56 : {
57 : int oflags;
58 :
59 0 : oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
60 : /*
61 : * If the work is already pending, no need to raise the IPI.
62 : * The pairing smp_mb() in irq_work_single() makes sure
63 : * everything we did before is visible.
64 : */
65 0 : if (oflags & IRQ_WORK_PENDING)
66 : return false;
67 : return true;
68 : }
69 :
70 0 : void __weak arch_irq_work_raise(void)
71 : {
72 : /*
73 : * Lame architectures will get the timer tick callback
74 : */
75 0 : }
76 :
77 : /* Enqueue on current CPU, work must already be claimed and preempt disabled */
78 0 : static void __irq_work_queue_local(struct irq_work *work)
79 : {
80 : struct llist_head *list;
81 0 : bool rt_lazy_work = false;
82 0 : bool lazy_work = false;
83 : int work_flags;
84 :
85 0 : work_flags = atomic_read(&work->node.a_flags);
86 0 : if (work_flags & IRQ_WORK_LAZY)
87 0 : lazy_work = true;
88 : else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
89 : !(work_flags & IRQ_WORK_HARD_IRQ))
90 : rt_lazy_work = true;
91 :
92 0 : if (lazy_work || rt_lazy_work)
93 : list = this_cpu_ptr(&lazy_list);
94 : else
95 0 : list = this_cpu_ptr(&raised_list);
96 :
97 0 : if (!llist_add(&work->node.llist, list))
98 : return;
99 :
100 : /* If the work is "lazy", handle it from next tick if any */
101 0 : if (!lazy_work || tick_nohz_tick_stopped())
102 0 : arch_irq_work_raise();
103 : }
104 :
105 : /* Enqueue the irq work @work on the current CPU */
106 0 : bool irq_work_queue(struct irq_work *work)
107 : {
108 : /* Only queue if not already pending */
109 0 : if (!irq_work_claim(work))
110 : return false;
111 :
112 : /* Queue the entry and raise the IPI if needed. */
113 0 : preempt_disable();
114 0 : __irq_work_queue_local(work);
115 0 : preempt_enable();
116 :
117 0 : return true;
118 : }
119 : EXPORT_SYMBOL_GPL(irq_work_queue);
120 :
121 : /*
122 : * Enqueue the irq_work @work on @cpu unless it's already pending
123 : * somewhere.
124 : *
125 : * Can be re-enqueued while the callback is still in progress.
126 : */
127 0 : bool irq_work_queue_on(struct irq_work *work, int cpu)
128 : {
129 : #ifndef CONFIG_SMP
130 0 : return irq_work_queue(work);
131 :
132 : #else /* CONFIG_SMP: */
133 : /* All work should have been flushed before going offline */
134 : WARN_ON_ONCE(cpu_is_offline(cpu));
135 :
136 : /* Only queue if not already pending */
137 : if (!irq_work_claim(work))
138 : return false;
139 :
140 : kasan_record_aux_stack_noalloc(work);
141 :
142 : preempt_disable();
143 : if (cpu != smp_processor_id()) {
144 : /* Arch remote IPI send/receive backend aren't NMI safe */
145 : WARN_ON_ONCE(in_nmi());
146 :
147 : /*
148 : * On PREEMPT_RT the items which are not marked as
149 : * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
150 : * item is used on the remote CPU to wake the thread.
151 : */
152 : if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
153 : !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
154 :
155 : if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
156 : goto out;
157 :
158 : work = &per_cpu(irq_work_wakeup, cpu);
159 : if (!irq_work_claim(work))
160 : goto out;
161 : }
162 :
163 : __smp_call_single_queue(cpu, &work->node.llist);
164 : } else {
165 : __irq_work_queue_local(work);
166 : }
167 : out:
168 : preempt_enable();
169 :
170 : return true;
171 : #endif /* CONFIG_SMP */
172 : }
173 :
174 0 : bool irq_work_needs_cpu(void)
175 : {
176 : struct llist_head *raised, *lazy;
177 :
178 0 : raised = this_cpu_ptr(&raised_list);
179 0 : lazy = this_cpu_ptr(&lazy_list);
180 :
181 0 : if (llist_empty(raised) || arch_irq_work_has_interrupt())
182 0 : if (llist_empty(lazy))
183 : return false;
184 :
185 : /* All work should have been flushed before going offline */
186 : WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
187 :
188 : return true;
189 : }
190 :
191 0 : void irq_work_single(void *arg)
192 : {
193 0 : struct irq_work *work = arg;
194 : int flags;
195 :
196 : /*
197 : * Clear the PENDING bit, after this point the @work can be re-used.
198 : * The PENDING bit acts as a lock, and we own it, so we can clear it
199 : * without atomic ops.
200 : */
201 0 : flags = atomic_read(&work->node.a_flags);
202 0 : flags &= ~IRQ_WORK_PENDING;
203 0 : atomic_set(&work->node.a_flags, flags);
204 :
205 : /*
206 : * See irq_work_claim().
207 : */
208 0 : smp_mb();
209 :
210 : lockdep_irq_work_enter(flags);
211 0 : work->func(work);
212 : lockdep_irq_work_exit(flags);
213 :
214 : /*
215 : * Clear the BUSY bit, if set, and return to the free state if no-one
216 : * else claimed it meanwhile.
217 : */
218 0 : (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
219 :
220 : if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
221 : !arch_irq_work_has_interrupt())
222 0 : rcuwait_wake_up(&work->irqwait);
223 0 : }
224 :
225 2751 : static void irq_work_run_list(struct llist_head *list)
226 : {
227 : struct irq_work *work, *tmp;
228 : struct llist_node *llnode;
229 :
230 : /*
231 : * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
232 : * in a per-CPU thread in preemptible context. Only the items which are
233 : * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
234 : */
235 2751 : BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
236 :
237 2751 : if (llist_empty(list))
238 : return;
239 :
240 0 : llnode = llist_del_all(list);
241 0 : llist_for_each_entry_safe(work, tmp, llnode, node.llist)
242 0 : irq_work_single(work);
243 : }
244 :
245 : /*
246 : * hotplug calls this through:
247 : * hotplug_cfd() -> flush_smp_call_function_queue()
248 : */
249 0 : void irq_work_run(void)
250 : {
251 0 : irq_work_run_list(this_cpu_ptr(&raised_list));
252 : if (!IS_ENABLED(CONFIG_PREEMPT_RT))
253 0 : irq_work_run_list(this_cpu_ptr(&lazy_list));
254 : else
255 : wake_irq_workd();
256 0 : }
257 : EXPORT_SYMBOL_GPL(irq_work_run);
258 :
259 2751 : void irq_work_tick(void)
260 : {
261 2751 : struct llist_head *raised = this_cpu_ptr(&raised_list);
262 :
263 2751 : if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
264 0 : irq_work_run_list(raised);
265 :
266 : if (!IS_ENABLED(CONFIG_PREEMPT_RT))
267 2751 : irq_work_run_list(this_cpu_ptr(&lazy_list));
268 : else
269 : wake_irq_workd();
270 2751 : }
271 :
272 : /*
273 : * Synchronize against the irq_work @entry, ensures the entry is not
274 : * currently in use.
275 : */
276 0 : void irq_work_sync(struct irq_work *work)
277 : {
278 : lockdep_assert_irqs_enabled();
279 : might_sleep();
280 :
281 : if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
282 : !arch_irq_work_has_interrupt()) {
283 0 : rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
284 : TASK_UNINTERRUPTIBLE);
285 : return;
286 : }
287 :
288 : while (irq_work_is_busy(work))
289 : cpu_relax();
290 : }
291 : EXPORT_SYMBOL_GPL(irq_work_sync);
292 :
293 : static void run_irq_workd(unsigned int cpu)
294 : {
295 : irq_work_run_list(this_cpu_ptr(&lazy_list));
296 : }
297 :
298 : static void irq_workd_setup(unsigned int cpu)
299 : {
300 : sched_set_fifo_low(current);
301 : }
302 :
303 : static struct smp_hotplug_thread irqwork_threads = {
304 : .store = &irq_workd,
305 : .setup = irq_workd_setup,
306 : .thread_should_run = irq_workd_should_run,
307 : .thread_fn = run_irq_workd,
308 : .thread_comm = "irq_work/%u",
309 : };
310 :
311 1 : static __init int irq_work_init_threads(void)
312 : {
313 : if (IS_ENABLED(CONFIG_PREEMPT_RT))
314 : BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
315 1 : return 0;
316 : }
317 : early_initcall(irq_work_init_threads);
|