Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * kernel/workqueue.c - generic async execution with shared worker pool
4 : *
5 : * Copyright (C) 2002 Ingo Molnar
6 : *
7 : * Derived from the taskqueue/keventd code by:
8 : * David Woodhouse <dwmw2@infradead.org>
9 : * Andrew Morton
10 : * Kai Petzke <wpp@marie.physik.tu-berlin.de>
11 : * Theodore Ts'o <tytso@mit.edu>
12 : *
13 : * Made to use alloc_percpu by Christoph Lameter.
14 : *
15 : * Copyright (C) 2010 SUSE Linux Products GmbH
16 : * Copyright (C) 2010 Tejun Heo <tj@kernel.org>
17 : *
18 : * This is the generic async execution mechanism. Work items as are
19 : * executed in process context. The worker pool is shared and
20 : * automatically managed. There are two worker pools for each CPU (one for
21 : * normal work items and the other for high priority ones) and some extra
22 : * pools for workqueues which are not bound to any specific CPU - the
23 : * number of these backing pools is dynamic.
24 : *
25 : * Please read Documentation/core-api/workqueue.rst for details.
26 : */
27 :
28 : #include <linux/export.h>
29 : #include <linux/kernel.h>
30 : #include <linux/sched.h>
31 : #include <linux/init.h>
32 : #include <linux/signal.h>
33 : #include <linux/completion.h>
34 : #include <linux/workqueue.h>
35 : #include <linux/slab.h>
36 : #include <linux/cpu.h>
37 : #include <linux/notifier.h>
38 : #include <linux/kthread.h>
39 : #include <linux/hardirq.h>
40 : #include <linux/mempolicy.h>
41 : #include <linux/freezer.h>
42 : #include <linux/debug_locks.h>
43 : #include <linux/lockdep.h>
44 : #include <linux/idr.h>
45 : #include <linux/jhash.h>
46 : #include <linux/hashtable.h>
47 : #include <linux/rculist.h>
48 : #include <linux/nodemask.h>
49 : #include <linux/moduleparam.h>
50 : #include <linux/uaccess.h>
51 : #include <linux/sched/isolation.h>
52 : #include <linux/sched/debug.h>
53 : #include <linux/nmi.h>
54 : #include <linux/kvm_para.h>
55 :
56 : #include "workqueue_internal.h"
57 :
58 : enum {
59 : /*
60 : * worker_pool flags
61 : *
62 : * A bound pool is either associated or disassociated with its CPU.
63 : * While associated (!DISASSOCIATED), all workers are bound to the
64 : * CPU and none has %WORKER_UNBOUND set and concurrency management
65 : * is in effect.
66 : *
67 : * While DISASSOCIATED, the cpu may be offline and all workers have
68 : * %WORKER_UNBOUND set and concurrency management disabled, and may
69 : * be executing on any CPU. The pool behaves as an unbound one.
70 : *
71 : * Note that DISASSOCIATED should be flipped only while holding
72 : * wq_pool_attach_mutex to avoid changing binding state while
73 : * worker_attach_to_pool() is in progress.
74 : */
75 : POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
76 : POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
77 :
78 : /* worker flags */
79 : WORKER_DIE = 1 << 1, /* die die die */
80 : WORKER_IDLE = 1 << 2, /* is idle */
81 : WORKER_PREP = 1 << 3, /* preparing to run works */
82 : WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */
83 : WORKER_UNBOUND = 1 << 7, /* worker is unbound */
84 : WORKER_REBOUND = 1 << 8, /* worker was rebound */
85 :
86 : WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE |
87 : WORKER_UNBOUND | WORKER_REBOUND,
88 :
89 : NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
90 :
91 : UNBOUND_POOL_HASH_ORDER = 6, /* hashed by pool->attrs */
92 : BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
93 :
94 : MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
95 : IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
96 :
97 : MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2,
98 : /* call for help after 10ms
99 : (min two ticks) */
100 : MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */
101 : CREATE_COOLDOWN = HZ, /* time to breath after fail */
102 :
103 : /*
104 : * Rescue workers are used only on emergencies and shared by
105 : * all cpus. Give MIN_NICE.
106 : */
107 : RESCUER_NICE_LEVEL = MIN_NICE,
108 : HIGHPRI_NICE_LEVEL = MIN_NICE,
109 :
110 : WQ_NAME_LEN = 24,
111 : };
112 :
113 : /*
114 : * Structure fields follow one of the following exclusion rules.
115 : *
116 : * I: Modifiable by initialization/destruction paths and read-only for
117 : * everyone else.
118 : *
119 : * P: Preemption protected. Disabling preemption is enough and should
120 : * only be modified and accessed from the local cpu.
121 : *
122 : * L: pool->lock protected. Access with pool->lock held.
123 : *
124 : * X: During normal operation, modification requires pool->lock and should
125 : * be done only from local cpu. Either disabling preemption on local
126 : * cpu or grabbing pool->lock is enough for read access. If
127 : * POOL_DISASSOCIATED is set, it's identical to L.
128 : *
129 : * K: Only modified by worker while holding pool->lock. Can be safely read by
130 : * self, while holding pool->lock or from IRQ context if %current is the
131 : * kworker.
132 : *
133 : * S: Only modified by worker self.
134 : *
135 : * A: wq_pool_attach_mutex protected.
136 : *
137 : * PL: wq_pool_mutex protected.
138 : *
139 : * PR: wq_pool_mutex protected for writes. RCU protected for reads.
140 : *
141 : * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
142 : *
143 : * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
144 : * RCU for reads.
145 : *
146 : * WQ: wq->mutex protected.
147 : *
148 : * WR: wq->mutex protected for writes. RCU protected for reads.
149 : *
150 : * MD: wq_mayday_lock protected.
151 : *
152 : * WD: Used internally by the watchdog.
153 : */
154 :
155 : /* struct worker is defined in workqueue_internal.h */
156 :
157 : struct worker_pool {
158 : raw_spinlock_t lock; /* the pool lock */
159 : int cpu; /* I: the associated cpu */
160 : int node; /* I: the associated node ID */
161 : int id; /* I: pool ID */
162 : unsigned int flags; /* X: flags */
163 :
164 : unsigned long watchdog_ts; /* L: watchdog timestamp */
165 : bool cpu_stall; /* WD: stalled cpu bound pool */
166 :
167 : /*
168 : * The counter is incremented in a process context on the associated CPU
169 : * w/ preemption disabled, and decremented or reset in the same context
170 : * but w/ pool->lock held. The readers grab pool->lock and are
171 : * guaranteed to see if the counter reached zero.
172 : */
173 : int nr_running;
174 :
175 : struct list_head worklist; /* L: list of pending works */
176 :
177 : int nr_workers; /* L: total number of workers */
178 : int nr_idle; /* L: currently idle workers */
179 :
180 : struct list_head idle_list; /* L: list of idle workers */
181 : struct timer_list idle_timer; /* L: worker idle timeout */
182 : struct work_struct idle_cull_work; /* L: worker idle cleanup */
183 :
184 : struct timer_list mayday_timer; /* L: SOS timer for workers */
185 :
186 : /* a workers is either on busy_hash or idle_list, or the manager */
187 : DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
188 : /* L: hash of busy workers */
189 :
190 : struct worker *manager; /* L: purely informational */
191 : struct list_head workers; /* A: attached workers */
192 : struct list_head dying_workers; /* A: workers about to die */
193 : struct completion *detach_completion; /* all workers detached */
194 :
195 : struct ida worker_ida; /* worker IDs for task name */
196 :
197 : struct workqueue_attrs *attrs; /* I: worker attributes */
198 : struct hlist_node hash_node; /* PL: unbound_pool_hash node */
199 : int refcnt; /* PL: refcnt for unbound pools */
200 :
201 : /*
202 : * Destruction of pool is RCU protected to allow dereferences
203 : * from get_work_pool().
204 : */
205 : struct rcu_head rcu;
206 : };
207 :
208 : /*
209 : * Per-pool_workqueue statistics. These can be monitored using
210 : * tools/workqueue/wq_monitor.py.
211 : */
212 : enum pool_workqueue_stats {
213 : PWQ_STAT_STARTED, /* work items started execution */
214 : PWQ_STAT_COMPLETED, /* work items completed execution */
215 : PWQ_STAT_CPU_TIME, /* total CPU time consumed */
216 : PWQ_STAT_CPU_INTENSIVE, /* wq_cpu_intensive_thresh_us violations */
217 : PWQ_STAT_CM_WAKEUP, /* concurrency-management worker wakeups */
218 : PWQ_STAT_MAYDAY, /* maydays to rescuer */
219 : PWQ_STAT_RESCUED, /* linked work items executed by rescuer */
220 :
221 : PWQ_NR_STATS,
222 : };
223 :
224 : /*
225 : * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
226 : * of work_struct->data are used for flags and the remaining high bits
227 : * point to the pwq; thus, pwqs need to be aligned at two's power of the
228 : * number of flag bits.
229 : */
230 : struct pool_workqueue {
231 : struct worker_pool *pool; /* I: the associated pool */
232 : struct workqueue_struct *wq; /* I: the owning workqueue */
233 : int work_color; /* L: current color */
234 : int flush_color; /* L: flushing color */
235 : int refcnt; /* L: reference count */
236 : int nr_in_flight[WORK_NR_COLORS];
237 : /* L: nr of in_flight works */
238 :
239 : /*
240 : * nr_active management and WORK_STRUCT_INACTIVE:
241 : *
242 : * When pwq->nr_active >= max_active, new work item is queued to
243 : * pwq->inactive_works instead of pool->worklist and marked with
244 : * WORK_STRUCT_INACTIVE.
245 : *
246 : * All work items marked with WORK_STRUCT_INACTIVE do not participate
247 : * in pwq->nr_active and all work items in pwq->inactive_works are
248 : * marked with WORK_STRUCT_INACTIVE. But not all WORK_STRUCT_INACTIVE
249 : * work items are in pwq->inactive_works. Some of them are ready to
250 : * run in pool->worklist or worker->scheduled. Those work itmes are
251 : * only struct wq_barrier which is used for flush_work() and should
252 : * not participate in pwq->nr_active. For non-barrier work item, it
253 : * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works.
254 : */
255 : int nr_active; /* L: nr of active works */
256 : int max_active; /* L: max active works */
257 : struct list_head inactive_works; /* L: inactive works */
258 : struct list_head pwqs_node; /* WR: node on wq->pwqs */
259 : struct list_head mayday_node; /* MD: node on wq->maydays */
260 :
261 : u64 stats[PWQ_NR_STATS];
262 :
263 : /*
264 : * Release of unbound pwq is punted to system_wq. See put_pwq()
265 : * and pwq_unbound_release_workfn() for details. pool_workqueue
266 : * itself is also RCU protected so that the first pwq can be
267 : * determined without grabbing wq->mutex.
268 : */
269 : struct work_struct unbound_release_work;
270 : struct rcu_head rcu;
271 : } __aligned(1 << WORK_STRUCT_FLAG_BITS);
272 :
273 : /*
274 : * Structure used to wait for workqueue flush.
275 : */
276 : struct wq_flusher {
277 : struct list_head list; /* WQ: list of flushers */
278 : int flush_color; /* WQ: flush color waiting for */
279 : struct completion done; /* flush completion */
280 : };
281 :
282 : struct wq_device;
283 :
284 : /*
285 : * The externally visible workqueue. It relays the issued work items to
286 : * the appropriate worker_pool through its pool_workqueues.
287 : */
288 : struct workqueue_struct {
289 : struct list_head pwqs; /* WR: all pwqs of this wq */
290 : struct list_head list; /* PR: list of all workqueues */
291 :
292 : struct mutex mutex; /* protects this wq */
293 : int work_color; /* WQ: current work color */
294 : int flush_color; /* WQ: current flush color */
295 : atomic_t nr_pwqs_to_flush; /* flush in progress */
296 : struct wq_flusher *first_flusher; /* WQ: first flusher */
297 : struct list_head flusher_queue; /* WQ: flush waiters */
298 : struct list_head flusher_overflow; /* WQ: flush overflow list */
299 :
300 : struct list_head maydays; /* MD: pwqs requesting rescue */
301 : struct worker *rescuer; /* MD: rescue worker */
302 :
303 : int nr_drainers; /* WQ: drain in progress */
304 : int saved_max_active; /* WQ: saved pwq max_active */
305 :
306 : struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */
307 : struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */
308 :
309 : #ifdef CONFIG_SYSFS
310 : struct wq_device *wq_dev; /* I: for sysfs interface */
311 : #endif
312 : #ifdef CONFIG_LOCKDEP
313 : char *lock_name;
314 : struct lock_class_key key;
315 : struct lockdep_map lockdep_map;
316 : #endif
317 : char name[WQ_NAME_LEN]; /* I: workqueue name */
318 :
319 : /*
320 : * Destruction of workqueue_struct is RCU protected to allow walking
321 : * the workqueues list without grabbing wq_pool_mutex.
322 : * This is used to dump all workqueues from sysrq.
323 : */
324 : struct rcu_head rcu;
325 :
326 : /* hot fields used during command issue, aligned to cacheline */
327 : unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
328 : struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
329 : struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
330 : };
331 :
332 : static struct kmem_cache *pwq_cache;
333 :
334 : static cpumask_var_t *wq_numa_possible_cpumask;
335 : /* possible CPUs of each node */
336 :
337 : /*
338 : * Per-cpu work items which run for longer than the following threshold are
339 : * automatically considered CPU intensive and excluded from concurrency
340 : * management to prevent them from noticeably delaying other per-cpu work items.
341 : */
342 : static unsigned long wq_cpu_intensive_thresh_us = 10000;
343 : module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
344 :
345 : static bool wq_disable_numa;
346 : module_param_named(disable_numa, wq_disable_numa, bool, 0444);
347 :
348 : /* see the comment above the definition of WQ_POWER_EFFICIENT */
349 : static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
350 : module_param_named(power_efficient, wq_power_efficient, bool, 0444);
351 :
352 : static bool wq_online; /* can kworkers be created yet? */
353 :
354 : static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
355 :
356 : /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
357 : static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
358 :
359 : static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
360 : static DEFINE_MUTEX(wq_pool_attach_mutex); /* protects worker attach/detach */
361 : static DEFINE_RAW_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
362 : /* wait for manager to go away */
363 : static struct rcuwait manager_wait = __RCUWAIT_INITIALIZER(manager_wait);
364 :
365 : static LIST_HEAD(workqueues); /* PR: list of all workqueues */
366 : static bool workqueue_freezing; /* PL: have wqs started freezing? */
367 :
368 : /* PL&A: allowable cpus for unbound wqs and work items */
369 : static cpumask_var_t wq_unbound_cpumask;
370 :
371 : /* CPU where unbound work was last round robin scheduled from this CPU */
372 : static DEFINE_PER_CPU(int, wq_rr_cpu_last);
373 :
374 : /*
375 : * Local execution of unbound work items is no longer guaranteed. The
376 : * following always forces round-robin CPU selection on unbound work items
377 : * to uncover usages which depend on it.
378 : */
379 : #ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
380 : static bool wq_debug_force_rr_cpu = true;
381 : #else
382 : static bool wq_debug_force_rr_cpu = false;
383 : #endif
384 : module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
385 :
386 : /* the per-cpu worker pools */
387 : static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools);
388 :
389 : static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */
390 :
391 : /* PL: hash of all unbound pools keyed by pool->attrs */
392 : static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER);
393 :
394 : /* I: attributes used when instantiating standard unbound pools on demand */
395 : static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS];
396 :
397 : /* I: attributes used when instantiating ordered pools on demand */
398 : static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS];
399 :
400 : struct workqueue_struct *system_wq __read_mostly;
401 : EXPORT_SYMBOL(system_wq);
402 : struct workqueue_struct *system_highpri_wq __read_mostly;
403 : EXPORT_SYMBOL_GPL(system_highpri_wq);
404 : struct workqueue_struct *system_long_wq __read_mostly;
405 : EXPORT_SYMBOL_GPL(system_long_wq);
406 : struct workqueue_struct *system_unbound_wq __read_mostly;
407 : EXPORT_SYMBOL_GPL(system_unbound_wq);
408 : struct workqueue_struct *system_freezable_wq __read_mostly;
409 : EXPORT_SYMBOL_GPL(system_freezable_wq);
410 : struct workqueue_struct *system_power_efficient_wq __read_mostly;
411 : EXPORT_SYMBOL_GPL(system_power_efficient_wq);
412 : struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
413 : EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
414 :
415 : static int worker_thread(void *__worker);
416 : static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
417 : static void show_pwq(struct pool_workqueue *pwq);
418 : static void show_one_worker_pool(struct worker_pool *pool);
419 :
420 : #define CREATE_TRACE_POINTS
421 : #include <trace/events/workqueue.h>
422 :
423 : #define assert_rcu_or_pool_mutex() \
424 : RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
425 : !lockdep_is_held(&wq_pool_mutex), \
426 : "RCU or wq_pool_mutex should be held")
427 :
428 : #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
429 : RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
430 : !lockdep_is_held(&wq->mutex) && \
431 : !lockdep_is_held(&wq_pool_mutex), \
432 : "RCU, wq->mutex or wq_pool_mutex should be held")
433 :
434 : #define for_each_cpu_worker_pool(pool, cpu) \
435 : for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
436 : (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
437 : (pool)++)
438 :
439 : /**
440 : * for_each_pool - iterate through all worker_pools in the system
441 : * @pool: iteration cursor
442 : * @pi: integer used for iteration
443 : *
444 : * This must be called either with wq_pool_mutex held or RCU read
445 : * locked. If the pool needs to be used beyond the locking in effect, the
446 : * caller is responsible for guaranteeing that the pool stays online.
447 : *
448 : * The if/else clause exists only for the lockdep assertion and can be
449 : * ignored.
450 : */
451 : #define for_each_pool(pool, pi) \
452 : idr_for_each_entry(&worker_pool_idr, pool, pi) \
453 : if (({ assert_rcu_or_pool_mutex(); false; })) { } \
454 : else
455 :
456 : /**
457 : * for_each_pool_worker - iterate through all workers of a worker_pool
458 : * @worker: iteration cursor
459 : * @pool: worker_pool to iterate workers of
460 : *
461 : * This must be called with wq_pool_attach_mutex.
462 : *
463 : * The if/else clause exists only for the lockdep assertion and can be
464 : * ignored.
465 : */
466 : #define for_each_pool_worker(worker, pool) \
467 : list_for_each_entry((worker), &(pool)->workers, node) \
468 : if (({ lockdep_assert_held(&wq_pool_attach_mutex); false; })) { } \
469 : else
470 :
471 : /**
472 : * for_each_pwq - iterate through all pool_workqueues of the specified workqueue
473 : * @pwq: iteration cursor
474 : * @wq: the target workqueue
475 : *
476 : * This must be called either with wq->mutex held or RCU read locked.
477 : * If the pwq needs to be used beyond the locking in effect, the caller is
478 : * responsible for guaranteeing that the pwq stays online.
479 : *
480 : * The if/else clause exists only for the lockdep assertion and can be
481 : * ignored.
482 : */
483 : #define for_each_pwq(pwq, wq) \
484 : list_for_each_entry_rcu((pwq), &(wq)->pwqs, pwqs_node, \
485 : lockdep_is_held(&(wq->mutex)))
486 :
487 : #ifdef CONFIG_DEBUG_OBJECTS_WORK
488 :
489 : static const struct debug_obj_descr work_debug_descr;
490 :
491 : static void *work_debug_hint(void *addr)
492 : {
493 : return ((struct work_struct *) addr)->func;
494 : }
495 :
496 : static bool work_is_static_object(void *addr)
497 : {
498 : struct work_struct *work = addr;
499 :
500 : return test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work));
501 : }
502 :
503 : /*
504 : * fixup_init is called when:
505 : * - an active object is initialized
506 : */
507 : static bool work_fixup_init(void *addr, enum debug_obj_state state)
508 : {
509 : struct work_struct *work = addr;
510 :
511 : switch (state) {
512 : case ODEBUG_STATE_ACTIVE:
513 : cancel_work_sync(work);
514 : debug_object_init(work, &work_debug_descr);
515 : return true;
516 : default:
517 : return false;
518 : }
519 : }
520 :
521 : /*
522 : * fixup_free is called when:
523 : * - an active object is freed
524 : */
525 : static bool work_fixup_free(void *addr, enum debug_obj_state state)
526 : {
527 : struct work_struct *work = addr;
528 :
529 : switch (state) {
530 : case ODEBUG_STATE_ACTIVE:
531 : cancel_work_sync(work);
532 : debug_object_free(work, &work_debug_descr);
533 : return true;
534 : default:
535 : return false;
536 : }
537 : }
538 :
539 : static const struct debug_obj_descr work_debug_descr = {
540 : .name = "work_struct",
541 : .debug_hint = work_debug_hint,
542 : .is_static_object = work_is_static_object,
543 : .fixup_init = work_fixup_init,
544 : .fixup_free = work_fixup_free,
545 : };
546 :
547 : static inline void debug_work_activate(struct work_struct *work)
548 : {
549 : debug_object_activate(work, &work_debug_descr);
550 : }
551 :
552 : static inline void debug_work_deactivate(struct work_struct *work)
553 : {
554 : debug_object_deactivate(work, &work_debug_descr);
555 : }
556 :
557 : void __init_work(struct work_struct *work, int onstack)
558 : {
559 : if (onstack)
560 : debug_object_init_on_stack(work, &work_debug_descr);
561 : else
562 : debug_object_init(work, &work_debug_descr);
563 : }
564 : EXPORT_SYMBOL_GPL(__init_work);
565 :
566 : void destroy_work_on_stack(struct work_struct *work)
567 : {
568 : debug_object_free(work, &work_debug_descr);
569 : }
570 : EXPORT_SYMBOL_GPL(destroy_work_on_stack);
571 :
572 : void destroy_delayed_work_on_stack(struct delayed_work *work)
573 : {
574 : destroy_timer_on_stack(&work->timer);
575 : debug_object_free(&work->work, &work_debug_descr);
576 : }
577 : EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
578 :
579 : #else
580 : static inline void debug_work_activate(struct work_struct *work) { }
581 : static inline void debug_work_deactivate(struct work_struct *work) { }
582 : #endif
583 :
584 : /**
585 : * worker_pool_assign_id - allocate ID and assign it to @pool
586 : * @pool: the pool pointer of interest
587 : *
588 : * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned
589 : * successfully, -errno on failure.
590 : */
591 : static int worker_pool_assign_id(struct worker_pool *pool)
592 : {
593 : int ret;
594 :
595 : lockdep_assert_held(&wq_pool_mutex);
596 :
597 3 : ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE,
598 : GFP_KERNEL);
599 3 : if (ret >= 0) {
600 3 : pool->id = ret;
601 : return 0;
602 : }
603 : return ret;
604 : }
605 :
606 : /**
607 : * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
608 : * @wq: the target workqueue
609 : * @node: the node ID
610 : *
611 : * This must be called with any of wq_pool_mutex, wq->mutex or RCU
612 : * read locked.
613 : * If the pwq needs to be used beyond the locking in effect, the caller is
614 : * responsible for guaranteeing that the pwq stays online.
615 : *
616 : * Return: The unbound pool_workqueue for @node.
617 : */
618 : static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
619 : int node)
620 : {
621 : assert_rcu_or_wq_mutex_or_pool_mutex(wq);
622 :
623 : /*
624 : * XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
625 : * delayed item is pending. The plan is to keep CPU -> NODE
626 : * mapping valid and stable across CPU on/offlines. Once that
627 : * happens, this workaround can be removed.
628 : */
629 : if (unlikely(node == NUMA_NO_NODE))
630 : return wq->dfl_pwq;
631 :
632 7 : return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
633 : }
634 :
635 : static unsigned int work_color_to_flags(int color)
636 : {
637 16 : return color << WORK_STRUCT_COLOR_SHIFT;
638 : }
639 :
640 : static int get_work_color(unsigned long work_data)
641 : {
642 34 : return (work_data >> WORK_STRUCT_COLOR_SHIFT) &
643 : ((1 << WORK_STRUCT_COLOR_BITS) - 1);
644 : }
645 :
646 : static int work_next_color(int color)
647 : {
648 0 : return (color + 1) % WORK_NR_COLORS;
649 : }
650 :
651 : /*
652 : * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
653 : * contain the pointer to the queued pwq. Once execution starts, the flag
654 : * is cleared and the high bits contain OFFQ flags and pool ID.
655 : *
656 : * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
657 : * and clear_work_data() can be used to set the pwq, pool or clear
658 : * work->data. These functions should only be called while the work is
659 : * owned - ie. while the PENDING bit is set.
660 : *
661 : * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
662 : * corresponding to a work. Pool is available once the work has been
663 : * queued anywhere after initialization until it is sync canceled. pwq is
664 : * available only while the work item is queued.
665 : *
666 : * %WORK_OFFQ_CANCELING is used to mark a work item which is being
667 : * canceled. While being canceled, a work item may have its PENDING set
668 : * but stay off timer and worklist for arbitrarily long and nobody should
669 : * try to steal the PENDING bit.
670 : */
671 53 : static inline void set_work_data(struct work_struct *work, unsigned long data,
672 : unsigned long flags)
673 : {
674 106 : WARN_ON_ONCE(!work_pending(work));
675 106 : atomic_long_set(&work->data, data | flags | work_static(work));
676 53 : }
677 :
678 : static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
679 : unsigned long extra_flags)
680 : {
681 16 : set_work_data(work, (unsigned long)pwq,
682 : WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
683 : }
684 :
685 : static void set_work_pool_and_keep_pending(struct work_struct *work,
686 : int pool_id)
687 : {
688 0 : set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
689 : WORK_STRUCT_PENDING);
690 : }
691 :
692 : static void set_work_pool_and_clear_pending(struct work_struct *work,
693 : int pool_id)
694 : {
695 : /*
696 : * The following wmb is paired with the implied mb in
697 : * test_and_set_bit(PENDING) and ensures all updates to @work made
698 : * here are visible to and precede any updates by the next PENDING
699 : * owner.
700 : */
701 37 : smp_wmb();
702 37 : set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
703 : /*
704 : * The following mb guarantees that previous clear of a PENDING bit
705 : * will not be reordered with any speculative LOADS or STORES from
706 : * work->current_func, which is executed afterwards. This possible
707 : * reordering can lead to a missed execution on attempt to queue
708 : * the same @work. E.g. consider this case:
709 : *
710 : * CPU#0 CPU#1
711 : * ---------------------------- --------------------------------
712 : *
713 : * 1 STORE event_indicated
714 : * 2 queue_work_on() {
715 : * 3 test_and_set_bit(PENDING)
716 : * 4 } set_..._and_clear_pending() {
717 : * 5 set_work_data() # clear bit
718 : * 6 smp_mb()
719 : * 7 work->current_func() {
720 : * 8 LOAD event_indicated
721 : * }
722 : *
723 : * Without an explicit full barrier speculative LOAD on line 8 can
724 : * be executed before CPU#0 does STORE on line 1. If that happens,
725 : * CPU#0 observes the PENDING bit is still set and new execution of
726 : * a @work is not queued in a hope, that CPU#1 will eventually
727 : * finish the queued @work. Meanwhile CPU#1 does not see
728 : * event_indicated is set, because speculative LOAD was executed
729 : * before actual STORE.
730 : */
731 37 : smp_mb();
732 : }
733 :
734 : static void clear_work_data(struct work_struct *work)
735 : {
736 0 : smp_wmb(); /* see set_work_pool_and_clear_pending() */
737 0 : set_work_data(work, WORK_STRUCT_NO_POOL, 0);
738 : }
739 :
740 : static inline struct pool_workqueue *work_struct_pwq(unsigned long data)
741 : {
742 20 : return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK);
743 : }
744 :
745 : static struct pool_workqueue *get_work_pwq(struct work_struct *work)
746 : {
747 40 : unsigned long data = atomic_long_read(&work->data);
748 :
749 20 : if (data & WORK_STRUCT_PWQ)
750 18 : return work_struct_pwq(data);
751 : else
752 : return NULL;
753 : }
754 :
755 : /**
756 : * get_work_pool - return the worker_pool a given work was associated with
757 : * @work: the work item of interest
758 : *
759 : * Pools are created and destroyed under wq_pool_mutex, and allows read
760 : * access under RCU read lock. As such, this function should be
761 : * called under wq_pool_mutex or inside of a rcu_read_lock() region.
762 : *
763 : * All fields of the returned pool are accessible as long as the above
764 : * mentioned locking is in effect. If the returned pool needs to be used
765 : * beyond the critical section, the caller is responsible for ensuring the
766 : * returned pool is and stays online.
767 : *
768 : * Return: The worker_pool @work was last associated with. %NULL if none.
769 : */
770 23 : static struct worker_pool *get_work_pool(struct work_struct *work)
771 : {
772 46 : unsigned long data = atomic_long_read(&work->data);
773 : int pool_id;
774 :
775 : assert_rcu_or_pool_mutex();
776 :
777 23 : if (data & WORK_STRUCT_PWQ)
778 2 : return work_struct_pwq(data)->pool;
779 :
780 21 : pool_id = data >> WORK_OFFQ_POOL_SHIFT;
781 21 : if (pool_id == WORK_OFFQ_POOL_NONE)
782 : return NULL;
783 :
784 8 : return idr_find(&worker_pool_idr, pool_id);
785 : }
786 :
787 : /**
788 : * get_work_pool_id - return the worker pool ID a given work is associated with
789 : * @work: the work item of interest
790 : *
791 : * Return: The worker_pool ID @work was last associated with.
792 : * %WORK_OFFQ_POOL_NONE if none.
793 : */
794 : static int get_work_pool_id(struct work_struct *work)
795 : {
796 42 : unsigned long data = atomic_long_read(&work->data);
797 :
798 21 : if (data & WORK_STRUCT_PWQ)
799 0 : return work_struct_pwq(data)->pool->id;
800 :
801 21 : return data >> WORK_OFFQ_POOL_SHIFT;
802 : }
803 :
804 0 : static void mark_work_canceling(struct work_struct *work)
805 : {
806 0 : unsigned long pool_id = get_work_pool_id(work);
807 :
808 0 : pool_id <<= WORK_OFFQ_POOL_SHIFT;
809 0 : set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
810 0 : }
811 :
812 : static bool work_is_canceling(struct work_struct *work)
813 : {
814 0 : unsigned long data = atomic_long_read(&work->data);
815 :
816 0 : return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
817 : }
818 :
819 : /*
820 : * Policy functions. These define the policies on how the global worker
821 : * pools are managed. Unless noted otherwise, these functions assume that
822 : * they're being called with pool->lock held.
823 : */
824 :
825 : static bool __need_more_worker(struct worker_pool *pool)
826 : {
827 : return !pool->nr_running;
828 : }
829 :
830 : /*
831 : * Need to wake up a worker? Called from anything but currently
832 : * running workers.
833 : *
834 : * Note that, because unbound workers never contribute to nr_running, this
835 : * function will always return %true for unbound pools as long as the
836 : * worklist isn't empty.
837 : */
838 : static bool need_more_worker(struct worker_pool *pool)
839 : {
840 76 : return !list_empty(&pool->worklist) && __need_more_worker(pool);
841 : }
842 :
843 : /* Can I start working? Called from busy but !running workers. */
844 : static bool may_start_working(struct worker_pool *pool)
845 : {
846 : return pool->nr_idle;
847 : }
848 :
849 : /* Do I need to keep working? Called from currently running workers. */
850 : static bool keep_working(struct worker_pool *pool)
851 : {
852 28 : return !list_empty(&pool->worklist) && (pool->nr_running <= 1);
853 : }
854 :
855 : /* Do we need a new worker? Called from manager. */
856 : static bool need_to_create_worker(struct worker_pool *pool)
857 : {
858 2 : return need_more_worker(pool) && !may_start_working(pool);
859 : }
860 :
861 : /* Do we have too many workers and should some go away? */
862 : static bool too_many_workers(struct worker_pool *pool)
863 : {
864 23 : bool managing = pool->flags & POOL_MANAGER_ACTIVE;
865 23 : int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
866 23 : int nr_busy = pool->nr_workers - nr_idle;
867 :
868 23 : return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
869 : }
870 :
871 : /*
872 : * Wake up functions.
873 : */
874 :
875 : /* Return the first idle worker. Called with pool->lock held. */
876 : static struct worker *first_idle_worker(struct worker_pool *pool)
877 : {
878 32 : if (unlikely(list_empty(&pool->idle_list)))
879 : return NULL;
880 :
881 16 : return list_first_entry(&pool->idle_list, struct worker, entry);
882 : }
883 :
884 : /**
885 : * wake_up_worker - wake up an idle worker
886 : * @pool: worker pool to wake worker from
887 : *
888 : * Wake up the first idle worker of @pool.
889 : *
890 : * CONTEXT:
891 : * raw_spin_lock_irq(pool->lock).
892 : */
893 : static void wake_up_worker(struct worker_pool *pool)
894 : {
895 16 : struct worker *worker = first_idle_worker(pool);
896 :
897 16 : if (likely(worker))
898 16 : wake_up_process(worker->task);
899 : }
900 :
901 : /**
902 : * worker_set_flags - set worker flags and adjust nr_running accordingly
903 : * @worker: self
904 : * @flags: flags to set
905 : *
906 : * Set @flags in @worker->flags and adjust nr_running accordingly.
907 : *
908 : * CONTEXT:
909 : * raw_spin_lock_irq(pool->lock)
910 : */
911 13 : static inline void worker_set_flags(struct worker *worker, unsigned int flags)
912 : {
913 13 : struct worker_pool *pool = worker->pool;
914 :
915 26 : WARN_ON_ONCE(worker->task != current);
916 :
917 : /* If transitioning into NOT_RUNNING, adjust nr_running. */
918 26 : if ((flags & WORKER_NOT_RUNNING) &&
919 13 : !(worker->flags & WORKER_NOT_RUNNING)) {
920 6 : pool->nr_running--;
921 : }
922 :
923 13 : worker->flags |= flags;
924 13 : }
925 :
926 : /**
927 : * worker_clr_flags - clear worker flags and adjust nr_running accordingly
928 : * @worker: self
929 : * @flags: flags to clear
930 : *
931 : * Clear @flags in @worker->flags and adjust nr_running accordingly.
932 : *
933 : * CONTEXT:
934 : * raw_spin_lock_irq(pool->lock)
935 : */
936 47 : static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
937 : {
938 47 : struct worker_pool *pool = worker->pool;
939 47 : unsigned int oflags = worker->flags;
940 :
941 94 : WARN_ON_ONCE(worker->task != current);
942 :
943 47 : worker->flags &= ~flags;
944 :
945 : /*
946 : * If transitioning out of NOT_RUNNING, increment nr_running. Note
947 : * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
948 : * of multiple flags, not a single flag.
949 : */
950 47 : if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
951 22 : if (!(worker->flags & WORKER_NOT_RUNNING))
952 6 : pool->nr_running++;
953 47 : }
954 :
955 : #ifdef CONFIG_WQ_CPU_INTENSIVE_REPORT
956 :
957 : /*
958 : * Concurrency-managed per-cpu work items that hog CPU for longer than
959 : * wq_cpu_intensive_thresh_us trigger the automatic CPU_INTENSIVE mechanism,
960 : * which prevents them from stalling other concurrency-managed work items. If a
961 : * work function keeps triggering this mechanism, it's likely that the work item
962 : * should be using an unbound workqueue instead.
963 : *
964 : * wq_cpu_intensive_report() tracks work functions which trigger such conditions
965 : * and report them so that they can be examined and converted to use unbound
966 : * workqueues as appropriate. To avoid flooding the console, each violating work
967 : * function is tracked and reported with exponential backoff.
968 : */
969 : #define WCI_MAX_ENTS 128
970 :
971 : struct wci_ent {
972 : work_func_t func;
973 : atomic64_t cnt;
974 : struct hlist_node hash_node;
975 : };
976 :
977 : static struct wci_ent wci_ents[WCI_MAX_ENTS];
978 : static int wci_nr_ents;
979 : static DEFINE_RAW_SPINLOCK(wci_lock);
980 : static DEFINE_HASHTABLE(wci_hash, ilog2(WCI_MAX_ENTS));
981 :
982 : static struct wci_ent *wci_find_ent(work_func_t func)
983 : {
984 : struct wci_ent *ent;
985 :
986 : hash_for_each_possible_rcu(wci_hash, ent, hash_node,
987 : (unsigned long)func) {
988 : if (ent->func == func)
989 : return ent;
990 : }
991 : return NULL;
992 : }
993 :
994 : static void wq_cpu_intensive_report(work_func_t func)
995 : {
996 : struct wci_ent *ent;
997 :
998 : restart:
999 : ent = wci_find_ent(func);
1000 : if (ent) {
1001 : u64 cnt;
1002 :
1003 : /*
1004 : * Start reporting from the fourth time and back off
1005 : * exponentially.
1006 : */
1007 : cnt = atomic64_inc_return_relaxed(&ent->cnt);
1008 : if (cnt >= 4 && is_power_of_2(cnt))
1009 : printk_deferred(KERN_WARNING "workqueue: %ps hogged CPU for >%luus %llu times, consider switching to WQ_UNBOUND\n",
1010 : ent->func, wq_cpu_intensive_thresh_us,
1011 : atomic64_read(&ent->cnt));
1012 : return;
1013 : }
1014 :
1015 : /*
1016 : * @func is a new violation. Allocate a new entry for it. If wcn_ents[]
1017 : * is exhausted, something went really wrong and we probably made enough
1018 : * noise already.
1019 : */
1020 : if (wci_nr_ents >= WCI_MAX_ENTS)
1021 : return;
1022 :
1023 : raw_spin_lock(&wci_lock);
1024 :
1025 : if (wci_nr_ents >= WCI_MAX_ENTS) {
1026 : raw_spin_unlock(&wci_lock);
1027 : return;
1028 : }
1029 :
1030 : if (wci_find_ent(func)) {
1031 : raw_spin_unlock(&wci_lock);
1032 : goto restart;
1033 : }
1034 :
1035 : ent = &wci_ents[wci_nr_ents++];
1036 : ent->func = func;
1037 : atomic64_set(&ent->cnt, 1);
1038 : hash_add_rcu(wci_hash, &ent->hash_node, (unsigned long)func);
1039 :
1040 : raw_spin_unlock(&wci_lock);
1041 : }
1042 :
1043 : #else /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
1044 : static void wq_cpu_intensive_report(work_func_t func) {}
1045 : #endif /* CONFIG_WQ_CPU_INTENSIVE_REPORT */
1046 :
1047 : /**
1048 : * wq_worker_running - a worker is running again
1049 : * @task: task waking up
1050 : *
1051 : * This function is called when a worker returns from schedule()
1052 : */
1053 15 : void wq_worker_running(struct task_struct *task)
1054 : {
1055 15 : struct worker *worker = kthread_data(task);
1056 :
1057 15 : if (!READ_ONCE(worker->sleeping))
1058 : return;
1059 :
1060 : /*
1061 : * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check
1062 : * and the nr_running increment below, we may ruin the nr_running reset
1063 : * and leave with an unexpected pool->nr_running == 1 on the newly unbound
1064 : * pool. Protect against such race.
1065 : */
1066 0 : preempt_disable();
1067 0 : if (!(worker->flags & WORKER_NOT_RUNNING))
1068 0 : worker->pool->nr_running++;
1069 0 : preempt_enable();
1070 :
1071 : /*
1072 : * CPU intensive auto-detection cares about how long a work item hogged
1073 : * CPU without sleeping. Reset the starting timestamp on wakeup.
1074 : */
1075 0 : worker->current_at = worker->task->se.sum_exec_runtime;
1076 :
1077 0 : WRITE_ONCE(worker->sleeping, 0);
1078 : }
1079 :
1080 : /**
1081 : * wq_worker_sleeping - a worker is going to sleep
1082 : * @task: task going to sleep
1083 : *
1084 : * This function is called from schedule() when a busy worker is
1085 : * going to sleep.
1086 : */
1087 24 : void wq_worker_sleeping(struct task_struct *task)
1088 : {
1089 24 : struct worker *worker = kthread_data(task);
1090 : struct worker_pool *pool;
1091 :
1092 : /*
1093 : * Rescuers, which may not have all the fields set up like normal
1094 : * workers, also reach here, let's not access anything before
1095 : * checking NOT_RUNNING.
1096 : */
1097 24 : if (worker->flags & WORKER_NOT_RUNNING)
1098 : return;
1099 :
1100 0 : pool = worker->pool;
1101 :
1102 : /* Return if preempted before wq_worker_running() was reached */
1103 0 : if (READ_ONCE(worker->sleeping))
1104 : return;
1105 :
1106 0 : WRITE_ONCE(worker->sleeping, 1);
1107 0 : raw_spin_lock_irq(&pool->lock);
1108 :
1109 : /*
1110 : * Recheck in case unbind_workers() preempted us. We don't
1111 : * want to decrement nr_running after the worker is unbound
1112 : * and nr_running has been reset.
1113 : */
1114 0 : if (worker->flags & WORKER_NOT_RUNNING) {
1115 0 : raw_spin_unlock_irq(&pool->lock);
1116 0 : return;
1117 : }
1118 :
1119 0 : pool->nr_running--;
1120 0 : if (need_more_worker(pool)) {
1121 0 : worker->current_pwq->stats[PWQ_STAT_CM_WAKEUP]++;
1122 : wake_up_worker(pool);
1123 : }
1124 0 : raw_spin_unlock_irq(&pool->lock);
1125 : }
1126 :
1127 : /**
1128 : * wq_worker_tick - a scheduler tick occurred while a kworker is running
1129 : * @task: task currently running
1130 : *
1131 : * Called from scheduler_tick(). We're in the IRQ context and the current
1132 : * worker's fields which follow the 'K' locking rule can be accessed safely.
1133 : */
1134 0 : void wq_worker_tick(struct task_struct *task)
1135 : {
1136 0 : struct worker *worker = kthread_data(task);
1137 0 : struct pool_workqueue *pwq = worker->current_pwq;
1138 0 : struct worker_pool *pool = worker->pool;
1139 :
1140 0 : if (!pwq)
1141 : return;
1142 :
1143 0 : pwq->stats[PWQ_STAT_CPU_TIME] += TICK_USEC;
1144 :
1145 0 : if (!wq_cpu_intensive_thresh_us)
1146 : return;
1147 :
1148 : /*
1149 : * If the current worker is concurrency managed and hogged the CPU for
1150 : * longer than wq_cpu_intensive_thresh_us, it's automatically marked
1151 : * CPU_INTENSIVE to avoid stalling other concurrency-managed work items.
1152 : *
1153 : * Set @worker->sleeping means that @worker is in the process of
1154 : * switching out voluntarily and won't be contributing to
1155 : * @pool->nr_running until it wakes up. As wq_worker_sleeping() also
1156 : * decrements ->nr_running, setting CPU_INTENSIVE here can lead to
1157 : * double decrements. The task is releasing the CPU anyway. Let's skip.
1158 : * We probably want to make this prettier in the future.
1159 : */
1160 0 : if ((worker->flags & WORKER_NOT_RUNNING) || READ_ONCE(worker->sleeping) ||
1161 0 : worker->task->se.sum_exec_runtime - worker->current_at <
1162 0 : wq_cpu_intensive_thresh_us * NSEC_PER_USEC)
1163 : return;
1164 :
1165 0 : raw_spin_lock(&pool->lock);
1166 :
1167 0 : worker_set_flags(worker, WORKER_CPU_INTENSIVE);
1168 0 : wq_cpu_intensive_report(worker->current_func);
1169 0 : pwq->stats[PWQ_STAT_CPU_INTENSIVE]++;
1170 :
1171 0 : if (need_more_worker(pool)) {
1172 0 : pwq->stats[PWQ_STAT_CM_WAKEUP]++;
1173 : wake_up_worker(pool);
1174 : }
1175 :
1176 0 : raw_spin_unlock(&pool->lock);
1177 : }
1178 :
1179 : /**
1180 : * wq_worker_last_func - retrieve worker's last work function
1181 : * @task: Task to retrieve last work function of.
1182 : *
1183 : * Determine the last function a worker executed. This is called from
1184 : * the scheduler to get a worker's last known identity.
1185 : *
1186 : * CONTEXT:
1187 : * raw_spin_lock_irq(rq->lock)
1188 : *
1189 : * This function is called during schedule() when a kworker is going
1190 : * to sleep. It's used by psi to identify aggregation workers during
1191 : * dequeuing, to allow periodic aggregation to shut-off when that
1192 : * worker is the last task in the system or cgroup to go to sleep.
1193 : *
1194 : * As this function doesn't involve any workqueue-related locking, it
1195 : * only returns stable values when called from inside the scheduler's
1196 : * queuing and dequeuing paths, when @task, which must be a kworker,
1197 : * is guaranteed to not be processing any works.
1198 : *
1199 : * Return:
1200 : * The last work function %current executed as a worker, NULL if it
1201 : * hasn't executed any work yet.
1202 : */
1203 0 : work_func_t wq_worker_last_func(struct task_struct *task)
1204 : {
1205 0 : struct worker *worker = kthread_data(task);
1206 :
1207 0 : return worker->last_func;
1208 : }
1209 :
1210 : /**
1211 : * find_worker_executing_work - find worker which is executing a work
1212 : * @pool: pool of interest
1213 : * @work: work to find worker for
1214 : *
1215 : * Find a worker which is executing @work on @pool by searching
1216 : * @pool->busy_hash which is keyed by the address of @work. For a worker
1217 : * to match, its current execution should match the address of @work and
1218 : * its work function. This is to avoid unwanted dependency between
1219 : * unrelated work executions through a work item being recycled while still
1220 : * being executed.
1221 : *
1222 : * This is a bit tricky. A work item may be freed once its execution
1223 : * starts and nothing prevents the freed area from being recycled for
1224 : * another work item. If the same work item address ends up being reused
1225 : * before the original execution finishes, workqueue will identify the
1226 : * recycled work item as currently executing and make it wait until the
1227 : * current execution finishes, introducing an unwanted dependency.
1228 : *
1229 : * This function checks the work item address and work function to avoid
1230 : * false positives. Note that this isn't complete as one may construct a
1231 : * work function which can introduce dependency onto itself through a
1232 : * recycled work item. Well, if somebody wants to shoot oneself in the
1233 : * foot that badly, there's only so much we can do, and if such deadlock
1234 : * actually occurs, it should be easy to locate the culprit work function.
1235 : *
1236 : * CONTEXT:
1237 : * raw_spin_lock_irq(pool->lock).
1238 : *
1239 : * Return:
1240 : * Pointer to worker which is executing @work if found, %NULL
1241 : * otherwise.
1242 : */
1243 : static struct worker *find_worker_executing_work(struct worker_pool *pool,
1244 : struct work_struct *work)
1245 : {
1246 : struct worker *worker;
1247 :
1248 36 : hash_for_each_possible(pool->busy_hash, worker, hentry,
1249 : (unsigned long)work)
1250 0 : if (worker->current_work == work &&
1251 0 : worker->current_func == work->func)
1252 : return worker;
1253 :
1254 : return NULL;
1255 : }
1256 :
1257 : /**
1258 : * move_linked_works - move linked works to a list
1259 : * @work: start of series of works to be scheduled
1260 : * @head: target list to append @work to
1261 : * @nextp: out parameter for nested worklist walking
1262 : *
1263 : * Schedule linked works starting from @work to @head. Work series to
1264 : * be scheduled starts at @work and includes any consecutive work with
1265 : * WORK_STRUCT_LINKED set in its predecessor.
1266 : *
1267 : * If @nextp is not NULL, it's updated to point to the next work of
1268 : * the last scheduled work. This allows move_linked_works() to be
1269 : * nested inside outer list_for_each_entry_safe().
1270 : *
1271 : * CONTEXT:
1272 : * raw_spin_lock_irq(pool->lock).
1273 : */
1274 : static void move_linked_works(struct work_struct *work, struct list_head *head,
1275 : struct work_struct **nextp)
1276 : {
1277 : struct work_struct *n;
1278 :
1279 : /*
1280 : * Linked worklist will always end before the end of the list,
1281 : * use NULL for list head.
1282 : */
1283 4 : list_for_each_entry_safe_from(work, n, NULL, entry) {
1284 8 : list_move_tail(&work->entry, head);
1285 4 : if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
1286 : break;
1287 : }
1288 :
1289 : /*
1290 : * If we're already inside safe list traversal and have moved
1291 : * multiple works to the scheduled queue, the next position
1292 : * needs to be updated.
1293 : */
1294 : if (nextp)
1295 0 : *nextp = n;
1296 : }
1297 :
1298 : /**
1299 : * get_pwq - get an extra reference on the specified pool_workqueue
1300 : * @pwq: pool_workqueue to get
1301 : *
1302 : * Obtain an extra reference on @pwq. The caller should guarantee that
1303 : * @pwq has positive refcnt and be holding the matching pool->lock.
1304 : */
1305 16 : static void get_pwq(struct pool_workqueue *pwq)
1306 : {
1307 : lockdep_assert_held(&pwq->pool->lock);
1308 16 : WARN_ON_ONCE(pwq->refcnt <= 0);
1309 16 : pwq->refcnt++;
1310 16 : }
1311 :
1312 : /**
1313 : * put_pwq - put a pool_workqueue reference
1314 : * @pwq: pool_workqueue to put
1315 : *
1316 : * Drop a reference of @pwq. If its refcnt reaches zero, schedule its
1317 : * destruction. The caller should be holding the matching pool->lock.
1318 : */
1319 16 : static void put_pwq(struct pool_workqueue *pwq)
1320 : {
1321 : lockdep_assert_held(&pwq->pool->lock);
1322 16 : if (likely(--pwq->refcnt))
1323 : return;
1324 0 : if (WARN_ON_ONCE(!(pwq->wq->flags & WQ_UNBOUND)))
1325 : return;
1326 : /*
1327 : * @pwq can't be released under pool->lock, bounce to
1328 : * pwq_unbound_release_workfn(). This never recurses on the same
1329 : * pool->lock as this path is taken only for unbound workqueues and
1330 : * the release work item is scheduled on a per-cpu workqueue. To
1331 : * avoid lockdep warning, unbound pool->locks are given lockdep
1332 : * subclass of 1 in get_unbound_pool().
1333 : */
1334 0 : schedule_work(&pwq->unbound_release_work);
1335 : }
1336 :
1337 : /**
1338 : * put_pwq_unlocked - put_pwq() with surrounding pool lock/unlock
1339 : * @pwq: pool_workqueue to put (can be %NULL)
1340 : *
1341 : * put_pwq() with locking. This function also allows %NULL @pwq.
1342 : */
1343 6 : static void put_pwq_unlocked(struct pool_workqueue *pwq)
1344 : {
1345 6 : if (pwq) {
1346 : /*
1347 : * As both pwqs and pools are RCU protected, the
1348 : * following lock operations are safe.
1349 : */
1350 0 : raw_spin_lock_irq(&pwq->pool->lock);
1351 0 : put_pwq(pwq);
1352 0 : raw_spin_unlock_irq(&pwq->pool->lock);
1353 : }
1354 6 : }
1355 :
1356 0 : static void pwq_activate_inactive_work(struct work_struct *work)
1357 : {
1358 0 : struct pool_workqueue *pwq = get_work_pwq(work);
1359 :
1360 0 : trace_workqueue_activate_work(work);
1361 0 : if (list_empty(&pwq->pool->worklist))
1362 0 : pwq->pool->watchdog_ts = jiffies;
1363 0 : move_linked_works(work, &pwq->pool->worklist, NULL);
1364 0 : __clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));
1365 0 : pwq->nr_active++;
1366 0 : }
1367 :
1368 : static void pwq_activate_first_inactive(struct pool_workqueue *pwq)
1369 : {
1370 0 : struct work_struct *work = list_first_entry(&pwq->inactive_works,
1371 : struct work_struct, entry);
1372 :
1373 0 : pwq_activate_inactive_work(work);
1374 : }
1375 :
1376 : /**
1377 : * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
1378 : * @pwq: pwq of interest
1379 : * @work_data: work_data of work which left the queue
1380 : *
1381 : * A work either has completed or is removed from pending queue,
1382 : * decrement nr_in_flight of its pwq and handle workqueue flushing.
1383 : *
1384 : * CONTEXT:
1385 : * raw_spin_lock_irq(pool->lock).
1386 : */
1387 16 : static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)
1388 : {
1389 16 : int color = get_work_color(work_data);
1390 :
1391 16 : if (!(work_data & WORK_STRUCT_INACTIVE)) {
1392 14 : pwq->nr_active--;
1393 28 : if (!list_empty(&pwq->inactive_works)) {
1394 : /* one down, submit an inactive one */
1395 0 : if (pwq->nr_active < pwq->max_active)
1396 0 : pwq_activate_first_inactive(pwq);
1397 : }
1398 : }
1399 :
1400 16 : pwq->nr_in_flight[color]--;
1401 :
1402 : /* is flush in progress and are we at the flushing tip? */
1403 16 : if (likely(pwq->flush_color != color))
1404 : goto out_put;
1405 :
1406 : /* are there still in-flight works? */
1407 0 : if (pwq->nr_in_flight[color])
1408 : goto out_put;
1409 :
1410 : /* this pwq is done, clear flush_color */
1411 0 : pwq->flush_color = -1;
1412 :
1413 : /*
1414 : * If this was the last pwq, wake up the first flusher. It
1415 : * will handle the rest.
1416 : */
1417 0 : if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
1418 0 : complete(&pwq->wq->first_flusher->done);
1419 : out_put:
1420 16 : put_pwq(pwq);
1421 16 : }
1422 :
1423 : /**
1424 : * try_to_grab_pending - steal work item from worklist and disable irq
1425 : * @work: work item to steal
1426 : * @is_dwork: @work is a delayed_work
1427 : * @flags: place to store irq state
1428 : *
1429 : * Try to grab PENDING bit of @work. This function can handle @work in any
1430 : * stable state - idle, on timer or on worklist.
1431 : *
1432 : * Return:
1433 : *
1434 : * ======== ================================================================
1435 : * 1 if @work was pending and we successfully stole PENDING
1436 : * 0 if @work was idle and we claimed PENDING
1437 : * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
1438 : * -ENOENT if someone else is canceling @work, this state may persist
1439 : * for arbitrarily long
1440 : * ======== ================================================================
1441 : *
1442 : * Note:
1443 : * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
1444 : * interrupted while holding PENDING and @work off queue, irq must be
1445 : * disabled on entry. This, combined with delayed_work->timer being
1446 : * irqsafe, ensures that we return -EAGAIN for finite short period of time.
1447 : *
1448 : * On successful return, >= 0, irq is disabled and the caller is
1449 : * responsible for releasing it using local_irq_restore(*@flags).
1450 : *
1451 : * This function is safe to call from any context including IRQ handler.
1452 : */
1453 21 : static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1454 : unsigned long *flags)
1455 : {
1456 : struct worker_pool *pool;
1457 : struct pool_workqueue *pwq;
1458 :
1459 21 : local_irq_save(*flags);
1460 :
1461 : /* try to steal the timer if it exists */
1462 21 : if (is_dwork) {
1463 21 : struct delayed_work *dwork = to_delayed_work(work);
1464 :
1465 : /*
1466 : * dwork->timer is irqsafe. If del_timer() fails, it's
1467 : * guaranteed that the timer is not queued anywhere and not
1468 : * running on the local CPU.
1469 : */
1470 42 : if (likely(del_timer(&dwork->timer)))
1471 : return 1;
1472 : }
1473 :
1474 : /* try to claim PENDING the normal way */
1475 42 : if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
1476 : return 0;
1477 :
1478 : rcu_read_lock();
1479 : /*
1480 : * The queueing is in progress, or it is already queued. Try to
1481 : * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
1482 : */
1483 0 : pool = get_work_pool(work);
1484 0 : if (!pool)
1485 : goto fail;
1486 :
1487 0 : raw_spin_lock(&pool->lock);
1488 : /*
1489 : * work->data is guaranteed to point to pwq only while the work
1490 : * item is queued on pwq->wq, and both updating work->data to point
1491 : * to pwq on queueing and to pool on dequeueing are done under
1492 : * pwq->pool->lock. This in turn guarantees that, if work->data
1493 : * points to pwq which is associated with a locked pool, the work
1494 : * item is currently queued on that pool.
1495 : */
1496 0 : pwq = get_work_pwq(work);
1497 0 : if (pwq && pwq->pool == pool) {
1498 0 : debug_work_deactivate(work);
1499 :
1500 : /*
1501 : * A cancelable inactive work item must be in the
1502 : * pwq->inactive_works since a queued barrier can't be
1503 : * canceled (see the comments in insert_wq_barrier()).
1504 : *
1505 : * An inactive work item cannot be grabbed directly because
1506 : * it might have linked barrier work items which, if left
1507 : * on the inactive_works list, will confuse pwq->nr_active
1508 : * management later on and cause stall. Make sure the work
1509 : * item is activated before grabbing.
1510 : */
1511 0 : if (*work_data_bits(work) & WORK_STRUCT_INACTIVE)
1512 0 : pwq_activate_inactive_work(work);
1513 :
1514 0 : list_del_init(&work->entry);
1515 0 : pwq_dec_nr_in_flight(pwq, *work_data_bits(work));
1516 :
1517 : /* work->data points to pwq iff queued, point to pool */
1518 0 : set_work_pool_and_keep_pending(work, pool->id);
1519 :
1520 0 : raw_spin_unlock(&pool->lock);
1521 : rcu_read_unlock();
1522 0 : return 1;
1523 : }
1524 0 : raw_spin_unlock(&pool->lock);
1525 : fail:
1526 : rcu_read_unlock();
1527 0 : local_irq_restore(*flags);
1528 0 : if (work_is_canceling(work))
1529 : return -ENOENT;
1530 : cpu_relax();
1531 0 : return -EAGAIN;
1532 : }
1533 :
1534 : /**
1535 : * insert_work - insert a work into a pool
1536 : * @pwq: pwq @work belongs to
1537 : * @work: work to insert
1538 : * @head: insertion point
1539 : * @extra_flags: extra WORK_STRUCT_* flags to set
1540 : *
1541 : * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to
1542 : * work_struct flags.
1543 : *
1544 : * CONTEXT:
1545 : * raw_spin_lock_irq(pool->lock).
1546 : */
1547 16 : static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
1548 : struct list_head *head, unsigned int extra_flags)
1549 : {
1550 16 : struct worker_pool *pool = pwq->pool;
1551 :
1552 : /* record the work call stack in order to print it in KASAN reports */
1553 16 : kasan_record_aux_stack_noalloc(work);
1554 :
1555 : /* we own @work, set data and link */
1556 32 : set_work_pwq(work, pwq, extra_flags);
1557 32 : list_add_tail(&work->entry, head);
1558 16 : get_pwq(pwq);
1559 :
1560 16 : if (__need_more_worker(pool))
1561 : wake_up_worker(pool);
1562 16 : }
1563 :
1564 : /*
1565 : * Test whether @work is being queued from another work executing on the
1566 : * same workqueue.
1567 : */
1568 : static bool is_chained_work(struct workqueue_struct *wq)
1569 : {
1570 : struct worker *worker;
1571 :
1572 0 : worker = current_wq_worker();
1573 : /*
1574 : * Return %true iff I'm a worker executing a work item on @wq. If
1575 : * I'm @worker, it's safe to dereference it without locking.
1576 : */
1577 0 : return worker && worker->current_pwq->wq == wq;
1578 : }
1579 :
1580 : /*
1581 : * When queueing an unbound work item to a wq, prefer local CPU if allowed
1582 : * by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
1583 : * avoid perturbing sensitive tasks.
1584 : */
1585 7 : static int wq_select_unbound_cpu(int cpu)
1586 : {
1587 : int new_cpu;
1588 :
1589 7 : if (likely(!wq_debug_force_rr_cpu)) {
1590 7 : if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
1591 : return cpu;
1592 : } else {
1593 0 : pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
1594 : }
1595 :
1596 0 : if (cpumask_empty(wq_unbound_cpumask))
1597 : return cpu;
1598 :
1599 0 : new_cpu = __this_cpu_read(wq_rr_cpu_last);
1600 0 : new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
1601 0 : if (unlikely(new_cpu >= nr_cpu_ids)) {
1602 0 : new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
1603 0 : if (unlikely(new_cpu >= nr_cpu_ids))
1604 : return cpu;
1605 : }
1606 0 : __this_cpu_write(wq_rr_cpu_last, new_cpu);
1607 :
1608 0 : return new_cpu;
1609 : }
1610 :
1611 14 : static void __queue_work(int cpu, struct workqueue_struct *wq,
1612 : struct work_struct *work)
1613 : {
1614 : struct pool_workqueue *pwq;
1615 : struct worker_pool *last_pool;
1616 : struct list_head *worklist;
1617 : unsigned int work_flags;
1618 14 : unsigned int req_cpu = cpu;
1619 :
1620 : /*
1621 : * While a work item is PENDING && off queue, a task trying to
1622 : * steal the PENDING will busy-loop waiting for it to either get
1623 : * queued or lose PENDING. Grabbing PENDING and queueing should
1624 : * happen with IRQ disabled.
1625 : */
1626 : lockdep_assert_irqs_disabled();
1627 :
1628 :
1629 : /*
1630 : * For a draining wq, only works from the same workqueue are
1631 : * allowed. The __WQ_DESTROYING helps to spot the issue that
1632 : * queues a new work item to a wq after destroy_workqueue(wq).
1633 : */
1634 14 : if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) &&
1635 : WARN_ON_ONCE(!is_chained_work(wq))))
1636 : return;
1637 : rcu_read_lock();
1638 : retry:
1639 : /* pwq which will be used unless @work is executing elsewhere */
1640 14 : if (wq->flags & WQ_UNBOUND) {
1641 7 : if (req_cpu == WORK_CPU_UNBOUND)
1642 7 : cpu = wq_select_unbound_cpu(raw_smp_processor_id());
1643 7 : pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
1644 : } else {
1645 7 : if (req_cpu == WORK_CPU_UNBOUND)
1646 7 : cpu = raw_smp_processor_id();
1647 7 : pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
1648 : }
1649 :
1650 : /*
1651 : * If @work was previously on a different pool, it might still be
1652 : * running there, in which case the work needs to be queued on that
1653 : * pool to guarantee non-reentrancy.
1654 : */
1655 14 : last_pool = get_work_pool(work);
1656 14 : if (last_pool && last_pool != pwq->pool) {
1657 : struct worker *worker;
1658 :
1659 0 : raw_spin_lock(&last_pool->lock);
1660 :
1661 0 : worker = find_worker_executing_work(last_pool, work);
1662 :
1663 0 : if (worker && worker->current_pwq->wq == wq) {
1664 : pwq = worker->current_pwq;
1665 : } else {
1666 : /* meh... not running there, queue here */
1667 0 : raw_spin_unlock(&last_pool->lock);
1668 0 : raw_spin_lock(&pwq->pool->lock);
1669 : }
1670 : } else {
1671 14 : raw_spin_lock(&pwq->pool->lock);
1672 : }
1673 :
1674 : /*
1675 : * pwq is determined and locked. For unbound pools, we could have
1676 : * raced with pwq release and it could already be dead. If its
1677 : * refcnt is zero, repeat pwq selection. Note that pwqs never die
1678 : * without another pwq replacing it in the numa_pwq_tbl or while
1679 : * work items are executing on it, so the retrying is guaranteed to
1680 : * make forward-progress.
1681 : */
1682 14 : if (unlikely(!pwq->refcnt)) {
1683 0 : if (wq->flags & WQ_UNBOUND) {
1684 0 : raw_spin_unlock(&pwq->pool->lock);
1685 : cpu_relax();
1686 : goto retry;
1687 : }
1688 : /* oops */
1689 0 : WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
1690 : wq->name, cpu);
1691 : }
1692 :
1693 : /* pwq determined, queue */
1694 14 : trace_workqueue_queue_work(req_cpu, pwq, work);
1695 :
1696 28 : if (WARN_ON(!list_empty(&work->entry)))
1697 : goto out;
1698 :
1699 14 : pwq->nr_in_flight[pwq->work_color]++;
1700 28 : work_flags = work_color_to_flags(pwq->work_color);
1701 :
1702 14 : if (likely(pwq->nr_active < pwq->max_active)) {
1703 14 : trace_workqueue_activate_work(work);
1704 14 : pwq->nr_active++;
1705 14 : worklist = &pwq->pool->worklist;
1706 14 : if (list_empty(worklist))
1707 13 : pwq->pool->watchdog_ts = jiffies;
1708 : } else {
1709 0 : work_flags |= WORK_STRUCT_INACTIVE;
1710 0 : worklist = &pwq->inactive_works;
1711 : }
1712 :
1713 14 : debug_work_activate(work);
1714 14 : insert_work(pwq, work, worklist, work_flags);
1715 :
1716 : out:
1717 14 : raw_spin_unlock(&pwq->pool->lock);
1718 : rcu_read_unlock();
1719 : }
1720 :
1721 : /**
1722 : * queue_work_on - queue work on specific cpu
1723 : * @cpu: CPU number to execute work on
1724 : * @wq: workqueue to use
1725 : * @work: work to queue
1726 : *
1727 : * We queue the work to a specific CPU, the caller must ensure it
1728 : * can't go away. Callers that fail to ensure that the specified
1729 : * CPU cannot go away will execute on a randomly chosen CPU.
1730 : * But note well that callers specifying a CPU that never has been
1731 : * online will get a splat.
1732 : *
1733 : * Return: %false if @work was already on a queue, %true otherwise.
1734 : */
1735 15 : bool queue_work_on(int cpu, struct workqueue_struct *wq,
1736 : struct work_struct *work)
1737 : {
1738 15 : bool ret = false;
1739 : unsigned long flags;
1740 :
1741 15 : local_irq_save(flags);
1742 :
1743 30 : if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1744 14 : __queue_work(cpu, wq, work);
1745 14 : ret = true;
1746 : }
1747 :
1748 30 : local_irq_restore(flags);
1749 15 : return ret;
1750 : }
1751 : EXPORT_SYMBOL(queue_work_on);
1752 :
1753 : /**
1754 : * workqueue_select_cpu_near - Select a CPU based on NUMA node
1755 : * @node: NUMA node ID that we want to select a CPU from
1756 : *
1757 : * This function will attempt to find a "random" cpu available on a given
1758 : * node. If there are no CPUs available on the given node it will return
1759 : * WORK_CPU_UNBOUND indicating that we should just schedule to any
1760 : * available CPU if we need to schedule this work.
1761 : */
1762 : static int workqueue_select_cpu_near(int node)
1763 : {
1764 : int cpu;
1765 :
1766 : /* No point in doing this if NUMA isn't enabled for workqueues */
1767 0 : if (!wq_numa_enabled)
1768 : return WORK_CPU_UNBOUND;
1769 :
1770 : /* Delay binding to CPU if node is not valid or online */
1771 0 : if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1772 : return WORK_CPU_UNBOUND;
1773 :
1774 : /* Use local node/cpu if we are already there */
1775 : cpu = raw_smp_processor_id();
1776 : if (node == cpu_to_node(cpu))
1777 : return cpu;
1778 :
1779 : /* Use "random" otherwise know as "first" online CPU of node */
1780 : cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1781 :
1782 : /* If CPU is valid return that, otherwise just defer */
1783 : return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1784 : }
1785 :
1786 : /**
1787 : * queue_work_node - queue work on a "random" cpu for a given NUMA node
1788 : * @node: NUMA node that we are targeting the work for
1789 : * @wq: workqueue to use
1790 : * @work: work to queue
1791 : *
1792 : * We queue the work to a "random" CPU within a given NUMA node. The basic
1793 : * idea here is to provide a way to somehow associate work with a given
1794 : * NUMA node.
1795 : *
1796 : * This function will only make a best effort attempt at getting this onto
1797 : * the right NUMA node. If no node is requested or the requested node is
1798 : * offline then we just fall back to standard queue_work behavior.
1799 : *
1800 : * Currently the "random" CPU ends up being the first available CPU in the
1801 : * intersection of cpu_online_mask and the cpumask of the node, unless we
1802 : * are running on the node. In that case we just use the current CPU.
1803 : *
1804 : * Return: %false if @work was already on a queue, %true otherwise.
1805 : */
1806 0 : bool queue_work_node(int node, struct workqueue_struct *wq,
1807 : struct work_struct *work)
1808 : {
1809 : unsigned long flags;
1810 0 : bool ret = false;
1811 :
1812 : /*
1813 : * This current implementation is specific to unbound workqueues.
1814 : * Specifically we only return the first available CPU for a given
1815 : * node instead of cycling through individual CPUs within the node.
1816 : *
1817 : * If this is used with a per-cpu workqueue then the logic in
1818 : * workqueue_select_cpu_near would need to be updated to allow for
1819 : * some round robin type logic.
1820 : */
1821 0 : WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1822 :
1823 0 : local_irq_save(flags);
1824 :
1825 0 : if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1826 0 : int cpu = workqueue_select_cpu_near(node);
1827 :
1828 0 : __queue_work(cpu, wq, work);
1829 0 : ret = true;
1830 : }
1831 :
1832 0 : local_irq_restore(flags);
1833 0 : return ret;
1834 : }
1835 : EXPORT_SYMBOL_GPL(queue_work_node);
1836 :
1837 0 : void delayed_work_timer_fn(struct timer_list *t)
1838 : {
1839 0 : struct delayed_work *dwork = from_timer(dwork, t, timer);
1840 :
1841 : /* should have been called from irqsafe timer with irq already off */
1842 0 : __queue_work(dwork->cpu, dwork->wq, &dwork->work);
1843 0 : }
1844 : EXPORT_SYMBOL(delayed_work_timer_fn);
1845 :
1846 2 : static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1847 : struct delayed_work *dwork, unsigned long delay)
1848 : {
1849 2 : struct timer_list *timer = &dwork->timer;
1850 2 : struct work_struct *work = &dwork->work;
1851 :
1852 2 : WARN_ON_ONCE(!wq);
1853 2 : WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
1854 2 : WARN_ON_ONCE(timer_pending(timer));
1855 4 : WARN_ON_ONCE(!list_empty(&work->entry));
1856 :
1857 : /*
1858 : * If @delay is 0, queue @dwork->work immediately. This is for
1859 : * both optimization and correctness. The earliest @timer can
1860 : * expire is on the closest next tick and delayed_work users depend
1861 : * on that there's no such delay when @delay is 0.
1862 : */
1863 2 : if (!delay) {
1864 0 : __queue_work(cpu, wq, &dwork->work);
1865 0 : return;
1866 : }
1867 :
1868 2 : dwork->wq = wq;
1869 2 : dwork->cpu = cpu;
1870 2 : timer->expires = jiffies + delay;
1871 :
1872 2 : if (unlikely(cpu != WORK_CPU_UNBOUND))
1873 0 : add_timer_on(timer, cpu);
1874 : else
1875 2 : add_timer(timer);
1876 : }
1877 :
1878 : /**
1879 : * queue_delayed_work_on - queue work on specific CPU after delay
1880 : * @cpu: CPU number to execute work on
1881 : * @wq: workqueue to use
1882 : * @dwork: work to queue
1883 : * @delay: number of jiffies to wait before queueing
1884 : *
1885 : * Return: %false if @work was already on a queue, %true otherwise. If
1886 : * @delay is zero and @dwork is idle, it will be scheduled for immediate
1887 : * execution.
1888 : */
1889 2 : bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
1890 : struct delayed_work *dwork, unsigned long delay)
1891 : {
1892 2 : struct work_struct *work = &dwork->work;
1893 2 : bool ret = false;
1894 : unsigned long flags;
1895 :
1896 : /* read the comment in __queue_work() */
1897 2 : local_irq_save(flags);
1898 :
1899 4 : if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1900 2 : __queue_delayed_work(cpu, wq, dwork, delay);
1901 2 : ret = true;
1902 : }
1903 :
1904 4 : local_irq_restore(flags);
1905 2 : return ret;
1906 : }
1907 : EXPORT_SYMBOL(queue_delayed_work_on);
1908 :
1909 : /**
1910 : * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
1911 : * @cpu: CPU number to execute work on
1912 : * @wq: workqueue to use
1913 : * @dwork: work to queue
1914 : * @delay: number of jiffies to wait before queueing
1915 : *
1916 : * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
1917 : * modify @dwork's timer so that it expires after @delay. If @delay is
1918 : * zero, @work is guaranteed to be scheduled immediately regardless of its
1919 : * current state.
1920 : *
1921 : * Return: %false if @dwork was idle and queued, %true if @dwork was
1922 : * pending and its timer was modified.
1923 : *
1924 : * This function is safe to call from any context including IRQ handler.
1925 : * See try_to_grab_pending() for details.
1926 : */
1927 0 : bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
1928 : struct delayed_work *dwork, unsigned long delay)
1929 : {
1930 : unsigned long flags;
1931 : int ret;
1932 :
1933 : do {
1934 0 : ret = try_to_grab_pending(&dwork->work, true, &flags);
1935 0 : } while (unlikely(ret == -EAGAIN));
1936 :
1937 0 : if (likely(ret >= 0)) {
1938 0 : __queue_delayed_work(cpu, wq, dwork, delay);
1939 0 : local_irq_restore(flags);
1940 : }
1941 :
1942 : /* -ENOENT from try_to_grab_pending() becomes %true */
1943 0 : return ret;
1944 : }
1945 : EXPORT_SYMBOL_GPL(mod_delayed_work_on);
1946 :
1947 0 : static void rcu_work_rcufn(struct rcu_head *rcu)
1948 : {
1949 0 : struct rcu_work *rwork = container_of(rcu, struct rcu_work, rcu);
1950 :
1951 : /* read the comment in __queue_work() */
1952 : local_irq_disable();
1953 0 : __queue_work(WORK_CPU_UNBOUND, rwork->wq, &rwork->work);
1954 : local_irq_enable();
1955 0 : }
1956 :
1957 : /**
1958 : * queue_rcu_work - queue work after a RCU grace period
1959 : * @wq: workqueue to use
1960 : * @rwork: work to queue
1961 : *
1962 : * Return: %false if @rwork was already pending, %true otherwise. Note
1963 : * that a full RCU grace period is guaranteed only after a %true return.
1964 : * While @rwork is guaranteed to be executed after a %false return, the
1965 : * execution may happen before a full RCU grace period has passed.
1966 : */
1967 0 : bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
1968 : {
1969 0 : struct work_struct *work = &rwork->work;
1970 :
1971 0 : if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1972 0 : rwork->wq = wq;
1973 0 : call_rcu_hurry(&rwork->rcu, rcu_work_rcufn);
1974 0 : return true;
1975 : }
1976 :
1977 : return false;
1978 : }
1979 : EXPORT_SYMBOL(queue_rcu_work);
1980 :
1981 : /**
1982 : * worker_enter_idle - enter idle state
1983 : * @worker: worker which is entering idle state
1984 : *
1985 : * @worker is entering idle state. Update stats and idle timer if
1986 : * necessary.
1987 : *
1988 : * LOCKING:
1989 : * raw_spin_lock_irq(pool->lock).
1990 : */
1991 23 : static void worker_enter_idle(struct worker *worker)
1992 : {
1993 23 : struct worker_pool *pool = worker->pool;
1994 :
1995 46 : if (WARN_ON_ONCE(worker->flags & WORKER_IDLE) ||
1996 46 : WARN_ON_ONCE(!list_empty(&worker->entry) &&
1997 : (worker->hentry.next || worker->hentry.pprev)))
1998 : return;
1999 :
2000 : /* can't use worker_set_flags(), also called from create_worker() */
2001 23 : worker->flags |= WORKER_IDLE;
2002 23 : pool->nr_idle++;
2003 23 : worker->last_active = jiffies;
2004 :
2005 : /* idle_list is LIFO */
2006 46 : list_add(&worker->entry, &pool->idle_list);
2007 :
2008 46 : if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
2009 0 : mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
2010 :
2011 : /* Sanity check nr_running. */
2012 23 : WARN_ON_ONCE(pool->nr_workers == pool->nr_idle && pool->nr_running);
2013 : }
2014 :
2015 : /**
2016 : * worker_leave_idle - leave idle state
2017 : * @worker: worker which is leaving idle state
2018 : *
2019 : * @worker is leaving idle state. Update stats.
2020 : *
2021 : * LOCKING:
2022 : * raw_spin_lock_irq(pool->lock).
2023 : */
2024 18 : static void worker_leave_idle(struct worker *worker)
2025 : {
2026 18 : struct worker_pool *pool = worker->pool;
2027 :
2028 18 : if (WARN_ON_ONCE(!(worker->flags & WORKER_IDLE)))
2029 : return;
2030 18 : worker_clr_flags(worker, WORKER_IDLE);
2031 18 : pool->nr_idle--;
2032 18 : list_del_init(&worker->entry);
2033 : }
2034 :
2035 9 : static struct worker *alloc_worker(int node)
2036 : {
2037 : struct worker *worker;
2038 :
2039 9 : worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
2040 9 : if (worker) {
2041 18 : INIT_LIST_HEAD(&worker->entry);
2042 18 : INIT_LIST_HEAD(&worker->scheduled);
2043 18 : INIT_LIST_HEAD(&worker->node);
2044 : /* on creation a worker is in !idle && prep state */
2045 9 : worker->flags = WORKER_PREP;
2046 : }
2047 9 : return worker;
2048 : }
2049 :
2050 : /**
2051 : * worker_attach_to_pool() - attach a worker to a pool
2052 : * @worker: worker to be attached
2053 : * @pool: the target pool
2054 : *
2055 : * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
2056 : * cpu-binding of @worker are kept coordinated with the pool across
2057 : * cpu-[un]hotplugs.
2058 : */
2059 5 : static void worker_attach_to_pool(struct worker *worker,
2060 : struct worker_pool *pool)
2061 : {
2062 5 : mutex_lock(&wq_pool_attach_mutex);
2063 :
2064 : /*
2065 : * The wq_pool_attach_mutex ensures %POOL_DISASSOCIATED remains
2066 : * stable across this function. See the comments above the flag
2067 : * definition for details.
2068 : */
2069 5 : if (pool->flags & POOL_DISASSOCIATED)
2070 2 : worker->flags |= WORKER_UNBOUND;
2071 : else
2072 3 : kthread_set_per_cpu(worker->task, pool->cpu);
2073 :
2074 5 : if (worker->rescue_wq)
2075 0 : set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
2076 :
2077 10 : list_add_tail(&worker->node, &pool->workers);
2078 5 : worker->pool = pool;
2079 :
2080 5 : mutex_unlock(&wq_pool_attach_mutex);
2081 5 : }
2082 :
2083 : /**
2084 : * worker_detach_from_pool() - detach a worker from its pool
2085 : * @worker: worker which is attached to its pool
2086 : *
2087 : * Undo the attaching which had been done in worker_attach_to_pool(). The
2088 : * caller worker shouldn't access to the pool after detached except it has
2089 : * other reference to the pool.
2090 : */
2091 0 : static void worker_detach_from_pool(struct worker *worker)
2092 : {
2093 0 : struct worker_pool *pool = worker->pool;
2094 0 : struct completion *detach_completion = NULL;
2095 :
2096 0 : mutex_lock(&wq_pool_attach_mutex);
2097 :
2098 0 : kthread_set_per_cpu(worker->task, -1);
2099 0 : list_del(&worker->node);
2100 0 : worker->pool = NULL;
2101 :
2102 0 : if (list_empty(&pool->workers) && list_empty(&pool->dying_workers))
2103 0 : detach_completion = pool->detach_completion;
2104 0 : mutex_unlock(&wq_pool_attach_mutex);
2105 :
2106 : /* clear leftover flags without pool->lock after it is detached */
2107 0 : worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
2108 :
2109 0 : if (detach_completion)
2110 0 : complete(detach_completion);
2111 0 : }
2112 :
2113 : /**
2114 : * create_worker - create a new workqueue worker
2115 : * @pool: pool the new worker will belong to
2116 : *
2117 : * Create and start a new worker which is attached to @pool.
2118 : *
2119 : * CONTEXT:
2120 : * Might sleep. Does GFP_KERNEL allocations.
2121 : *
2122 : * Return:
2123 : * Pointer to the newly created worker.
2124 : */
2125 5 : static struct worker *create_worker(struct worker_pool *pool)
2126 : {
2127 : struct worker *worker;
2128 : int id;
2129 : char id_buf[16];
2130 :
2131 : /* ID is needed to determine kthread name */
2132 10 : id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
2133 5 : if (id < 0) {
2134 0 : pr_err_once("workqueue: Failed to allocate a worker ID: %pe\n",
2135 : ERR_PTR(id));
2136 : return NULL;
2137 : }
2138 :
2139 5 : worker = alloc_worker(pool->node);
2140 5 : if (!worker) {
2141 0 : pr_err_once("workqueue: Failed to allocate a worker\n");
2142 : goto fail;
2143 : }
2144 :
2145 5 : worker->id = id;
2146 :
2147 5 : if (pool->cpu >= 0)
2148 3 : snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
2149 3 : pool->attrs->nice < 0 ? "H" : "");
2150 : else
2151 2 : snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
2152 :
2153 5 : worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
2154 : "kworker/%s", id_buf);
2155 10 : if (IS_ERR(worker->task)) {
2156 0 : if (PTR_ERR(worker->task) == -EINTR) {
2157 0 : pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
2158 : id_buf);
2159 : } else {
2160 0 : pr_err_once("workqueue: Failed to create a worker thread: %pe",
2161 : worker->task);
2162 : }
2163 : goto fail;
2164 : }
2165 :
2166 5 : set_user_nice(worker->task, pool->attrs->nice);
2167 5 : kthread_bind_mask(worker->task, pool->attrs->cpumask);
2168 :
2169 : /* successful, attach the worker to the pool */
2170 5 : worker_attach_to_pool(worker, pool);
2171 :
2172 : /* start the newly created worker */
2173 5 : raw_spin_lock_irq(&pool->lock);
2174 5 : worker->pool->nr_workers++;
2175 5 : worker_enter_idle(worker);
2176 5 : wake_up_process(worker->task);
2177 5 : raw_spin_unlock_irq(&pool->lock);
2178 :
2179 5 : return worker;
2180 :
2181 : fail:
2182 0 : ida_free(&pool->worker_ida, id);
2183 0 : kfree(worker);
2184 0 : return NULL;
2185 : }
2186 :
2187 0 : static void unbind_worker(struct worker *worker)
2188 : {
2189 : lockdep_assert_held(&wq_pool_attach_mutex);
2190 :
2191 0 : kthread_set_per_cpu(worker->task, -1);
2192 0 : if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
2193 0 : WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
2194 : else
2195 0 : WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
2196 0 : }
2197 :
2198 0 : static void wake_dying_workers(struct list_head *cull_list)
2199 : {
2200 : struct worker *worker, *tmp;
2201 :
2202 0 : list_for_each_entry_safe(worker, tmp, cull_list, entry) {
2203 0 : list_del_init(&worker->entry);
2204 0 : unbind_worker(worker);
2205 : /*
2206 : * If the worker was somehow already running, then it had to be
2207 : * in pool->idle_list when set_worker_dying() happened or we
2208 : * wouldn't have gotten here.
2209 : *
2210 : * Thus, the worker must either have observed the WORKER_DIE
2211 : * flag, or have set its state to TASK_IDLE. Either way, the
2212 : * below will be observed by the worker and is safe to do
2213 : * outside of pool->lock.
2214 : */
2215 0 : wake_up_process(worker->task);
2216 : }
2217 0 : }
2218 :
2219 : /**
2220 : * set_worker_dying - Tag a worker for destruction
2221 : * @worker: worker to be destroyed
2222 : * @list: transfer worker away from its pool->idle_list and into list
2223 : *
2224 : * Tag @worker for destruction and adjust @pool stats accordingly. The worker
2225 : * should be idle.
2226 : *
2227 : * CONTEXT:
2228 : * raw_spin_lock_irq(pool->lock).
2229 : */
2230 0 : static void set_worker_dying(struct worker *worker, struct list_head *list)
2231 : {
2232 0 : struct worker_pool *pool = worker->pool;
2233 :
2234 : lockdep_assert_held(&pool->lock);
2235 : lockdep_assert_held(&wq_pool_attach_mutex);
2236 :
2237 : /* sanity check frenzy */
2238 0 : if (WARN_ON(worker->current_work) ||
2239 0 : WARN_ON(!list_empty(&worker->scheduled)) ||
2240 0 : WARN_ON(!(worker->flags & WORKER_IDLE)))
2241 : return;
2242 :
2243 0 : pool->nr_workers--;
2244 0 : pool->nr_idle--;
2245 :
2246 0 : worker->flags |= WORKER_DIE;
2247 :
2248 0 : list_move(&worker->entry, list);
2249 0 : list_move(&worker->node, &pool->dying_workers);
2250 : }
2251 :
2252 : /**
2253 : * idle_worker_timeout - check if some idle workers can now be deleted.
2254 : * @t: The pool's idle_timer that just expired
2255 : *
2256 : * The timer is armed in worker_enter_idle(). Note that it isn't disarmed in
2257 : * worker_leave_idle(), as a worker flicking between idle and active while its
2258 : * pool is at the too_many_workers() tipping point would cause too much timer
2259 : * housekeeping overhead. Since IDLE_WORKER_TIMEOUT is long enough, we just let
2260 : * it expire and re-evaluate things from there.
2261 : */
2262 0 : static void idle_worker_timeout(struct timer_list *t)
2263 : {
2264 0 : struct worker_pool *pool = from_timer(pool, t, idle_timer);
2265 0 : bool do_cull = false;
2266 :
2267 0 : if (work_pending(&pool->idle_cull_work))
2268 : return;
2269 :
2270 0 : raw_spin_lock_irq(&pool->lock);
2271 :
2272 0 : if (too_many_workers(pool)) {
2273 : struct worker *worker;
2274 : unsigned long expires;
2275 :
2276 : /* idle_list is kept in LIFO order, check the last one */
2277 0 : worker = list_entry(pool->idle_list.prev, struct worker, entry);
2278 0 : expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2279 0 : do_cull = !time_before(jiffies, expires);
2280 :
2281 0 : if (!do_cull)
2282 0 : mod_timer(&pool->idle_timer, expires);
2283 : }
2284 0 : raw_spin_unlock_irq(&pool->lock);
2285 :
2286 0 : if (do_cull)
2287 0 : queue_work(system_unbound_wq, &pool->idle_cull_work);
2288 : }
2289 :
2290 : /**
2291 : * idle_cull_fn - cull workers that have been idle for too long.
2292 : * @work: the pool's work for handling these idle workers
2293 : *
2294 : * This goes through a pool's idle workers and gets rid of those that have been
2295 : * idle for at least IDLE_WORKER_TIMEOUT seconds.
2296 : *
2297 : * We don't want to disturb isolated CPUs because of a pcpu kworker being
2298 : * culled, so this also resets worker affinity. This requires a sleepable
2299 : * context, hence the split between timer callback and work item.
2300 : */
2301 0 : static void idle_cull_fn(struct work_struct *work)
2302 : {
2303 0 : struct worker_pool *pool = container_of(work, struct worker_pool, idle_cull_work);
2304 : struct list_head cull_list;
2305 :
2306 0 : INIT_LIST_HEAD(&cull_list);
2307 : /*
2308 : * Grabbing wq_pool_attach_mutex here ensures an already-running worker
2309 : * cannot proceed beyong worker_detach_from_pool() in its self-destruct
2310 : * path. This is required as a previously-preempted worker could run after
2311 : * set_worker_dying() has happened but before wake_dying_workers() did.
2312 : */
2313 0 : mutex_lock(&wq_pool_attach_mutex);
2314 0 : raw_spin_lock_irq(&pool->lock);
2315 :
2316 0 : while (too_many_workers(pool)) {
2317 : struct worker *worker;
2318 : unsigned long expires;
2319 :
2320 0 : worker = list_entry(pool->idle_list.prev, struct worker, entry);
2321 0 : expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2322 :
2323 0 : if (time_before(jiffies, expires)) {
2324 0 : mod_timer(&pool->idle_timer, expires);
2325 0 : break;
2326 : }
2327 :
2328 0 : set_worker_dying(worker, &cull_list);
2329 : }
2330 :
2331 0 : raw_spin_unlock_irq(&pool->lock);
2332 0 : wake_dying_workers(&cull_list);
2333 0 : mutex_unlock(&wq_pool_attach_mutex);
2334 0 : }
2335 :
2336 0 : static void send_mayday(struct work_struct *work)
2337 : {
2338 0 : struct pool_workqueue *pwq = get_work_pwq(work);
2339 0 : struct workqueue_struct *wq = pwq->wq;
2340 :
2341 : lockdep_assert_held(&wq_mayday_lock);
2342 :
2343 0 : if (!wq->rescuer)
2344 : return;
2345 :
2346 : /* mayday mayday mayday */
2347 0 : if (list_empty(&pwq->mayday_node)) {
2348 : /*
2349 : * If @pwq is for an unbound wq, its base ref may be put at
2350 : * any time due to an attribute change. Pin @pwq until the
2351 : * rescuer is done with it.
2352 : */
2353 0 : get_pwq(pwq);
2354 0 : list_add_tail(&pwq->mayday_node, &wq->maydays);
2355 0 : wake_up_process(wq->rescuer->task);
2356 0 : pwq->stats[PWQ_STAT_MAYDAY]++;
2357 : }
2358 : }
2359 :
2360 0 : static void pool_mayday_timeout(struct timer_list *t)
2361 : {
2362 0 : struct worker_pool *pool = from_timer(pool, t, mayday_timer);
2363 : struct work_struct *work;
2364 :
2365 0 : raw_spin_lock_irq(&pool->lock);
2366 0 : raw_spin_lock(&wq_mayday_lock); /* for wq->maydays */
2367 :
2368 0 : if (need_to_create_worker(pool)) {
2369 : /*
2370 : * We've been trying to create a new worker but
2371 : * haven't been successful. We might be hitting an
2372 : * allocation deadlock. Send distress signals to
2373 : * rescuers.
2374 : */
2375 0 : list_for_each_entry(work, &pool->worklist, entry)
2376 0 : send_mayday(work);
2377 : }
2378 :
2379 0 : raw_spin_unlock(&wq_mayday_lock);
2380 0 : raw_spin_unlock_irq(&pool->lock);
2381 :
2382 0 : mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
2383 0 : }
2384 :
2385 : /**
2386 : * maybe_create_worker - create a new worker if necessary
2387 : * @pool: pool to create a new worker for
2388 : *
2389 : * Create a new worker for @pool if necessary. @pool is guaranteed to
2390 : * have at least one idle worker on return from this function. If
2391 : * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
2392 : * sent to all rescuers with works scheduled on @pool to resolve
2393 : * possible allocation deadlock.
2394 : *
2395 : * On return, need_to_create_worker() is guaranteed to be %false and
2396 : * may_start_working() %true.
2397 : *
2398 : * LOCKING:
2399 : * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
2400 : * multiple times. Does GFP_KERNEL allocations. Called only from
2401 : * manager.
2402 : */
2403 2 : static void maybe_create_worker(struct worker_pool *pool)
2404 : __releases(&pool->lock)
2405 : __acquires(&pool->lock)
2406 : {
2407 : restart:
2408 2 : raw_spin_unlock_irq(&pool->lock);
2409 :
2410 : /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
2411 2 : mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
2412 :
2413 : while (true) {
2414 2 : if (create_worker(pool) || !need_to_create_worker(pool))
2415 : break;
2416 :
2417 0 : schedule_timeout_interruptible(CREATE_COOLDOWN);
2418 :
2419 0 : if (!need_to_create_worker(pool))
2420 : break;
2421 : }
2422 :
2423 4 : del_timer_sync(&pool->mayday_timer);
2424 2 : raw_spin_lock_irq(&pool->lock);
2425 : /*
2426 : * This is necessary even after a new worker was just successfully
2427 : * created as @pool->lock was dropped and the new worker might have
2428 : * already become busy.
2429 : */
2430 2 : if (need_to_create_worker(pool))
2431 : goto restart;
2432 2 : }
2433 :
2434 : /**
2435 : * manage_workers - manage worker pool
2436 : * @worker: self
2437 : *
2438 : * Assume the manager role and manage the worker pool @worker belongs
2439 : * to. At any given time, there can be only zero or one manager per
2440 : * pool. The exclusion is handled automatically by this function.
2441 : *
2442 : * The caller can safely start processing works on false return. On
2443 : * true return, it's guaranteed that need_to_create_worker() is false
2444 : * and may_start_working() is true.
2445 : *
2446 : * CONTEXT:
2447 : * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
2448 : * multiple times. Does GFP_KERNEL allocations.
2449 : *
2450 : * Return:
2451 : * %false if the pool doesn't need management and the caller can safely
2452 : * start processing works, %true if management function was performed and
2453 : * the conditions that the caller verified before calling the function may
2454 : * no longer be true.
2455 : */
2456 2 : static bool manage_workers(struct worker *worker)
2457 : {
2458 2 : struct worker_pool *pool = worker->pool;
2459 :
2460 2 : if (pool->flags & POOL_MANAGER_ACTIVE)
2461 : return false;
2462 :
2463 2 : pool->flags |= POOL_MANAGER_ACTIVE;
2464 2 : pool->manager = worker;
2465 :
2466 2 : maybe_create_worker(pool);
2467 :
2468 2 : pool->manager = NULL;
2469 2 : pool->flags &= ~POOL_MANAGER_ACTIVE;
2470 2 : rcuwait_wake_up(&manager_wait);
2471 2 : return true;
2472 : }
2473 :
2474 : /**
2475 : * process_one_work - process single work
2476 : * @worker: self
2477 : * @work: work to process
2478 : *
2479 : * Process @work. This function contains all the logics necessary to
2480 : * process a single work including synchronization against and
2481 : * interaction with other workers on the same cpu, queueing and
2482 : * flushing. As long as context requirement is met, any worker can
2483 : * call this function to process a work.
2484 : *
2485 : * CONTEXT:
2486 : * raw_spin_lock_irq(pool->lock) which is released and regrabbed.
2487 : */
2488 16 : static void process_one_work(struct worker *worker, struct work_struct *work)
2489 : __releases(&pool->lock)
2490 : __acquires(&pool->lock)
2491 : {
2492 16 : struct pool_workqueue *pwq = get_work_pwq(work);
2493 16 : struct worker_pool *pool = worker->pool;
2494 : unsigned long work_data;
2495 : struct worker *collision;
2496 : #ifdef CONFIG_LOCKDEP
2497 : /*
2498 : * It is permissible to free the struct work_struct from
2499 : * inside the function that is called from it, this we need to
2500 : * take into account for lockdep too. To avoid bogus "held
2501 : * lock freed" warnings as well as problems when looking into
2502 : * work->lockdep_map, make a copy and use that here.
2503 : */
2504 : struct lockdep_map lockdep_map;
2505 :
2506 : lockdep_copy_map(&lockdep_map, &work->lockdep_map);
2507 : #endif
2508 : /* ensure we're on the correct CPU */
2509 16 : WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
2510 : raw_smp_processor_id() != pool->cpu);
2511 :
2512 : /*
2513 : * A single work shouldn't be executed concurrently by
2514 : * multiple workers on a single cpu. Check whether anyone is
2515 : * already processing the work. If so, defer the work to the
2516 : * currently executing one.
2517 : */
2518 16 : collision = find_worker_executing_work(pool, work);
2519 16 : if (unlikely(collision)) {
2520 0 : move_linked_works(work, &collision->scheduled, NULL);
2521 : return;
2522 : }
2523 :
2524 : /* claim and dequeue */
2525 16 : debug_work_deactivate(work);
2526 32 : hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
2527 16 : worker->current_work = work;
2528 16 : worker->current_func = work->func;
2529 16 : worker->current_pwq = pwq;
2530 16 : worker->current_at = worker->task->se.sum_exec_runtime;
2531 16 : work_data = *work_data_bits(work);
2532 16 : worker->current_color = get_work_color(work_data);
2533 :
2534 : /*
2535 : * Record wq name for cmdline and debug reporting, may get
2536 : * overridden through set_worker_desc().
2537 : */
2538 32 : strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN);
2539 :
2540 32 : list_del_init(&work->entry);
2541 :
2542 : /*
2543 : * CPU intensive works don't participate in concurrency management.
2544 : * They're the scheduler's responsibility. This takes @worker out
2545 : * of concurrency management and the next code block will chain
2546 : * execution of the pending work items.
2547 : */
2548 16 : if (unlikely(pwq->wq->flags & WQ_CPU_INTENSIVE))
2549 0 : worker_set_flags(worker, WORKER_CPU_INTENSIVE);
2550 :
2551 : /*
2552 : * Wake up another worker if necessary. The condition is always
2553 : * false for normal per-cpu workers since nr_running would always
2554 : * be >= 1 at this point. This is used to chain execution of the
2555 : * pending work items for WORKER_NOT_RUNNING workers such as the
2556 : * UNBOUND and CPU_INTENSIVE ones.
2557 : */
2558 16 : if (need_more_worker(pool))
2559 : wake_up_worker(pool);
2560 :
2561 : /*
2562 : * Record the last pool and clear PENDING which should be the last
2563 : * update to @work. Also, do this inside @pool->lock so that
2564 : * PENDING and queued state changes happen together while IRQ is
2565 : * disabled.
2566 : */
2567 32 : set_work_pool_and_clear_pending(work, pool->id);
2568 :
2569 16 : raw_spin_unlock_irq(&pool->lock);
2570 :
2571 : lock_map_acquire(&pwq->wq->lockdep_map);
2572 : lock_map_acquire(&lockdep_map);
2573 : /*
2574 : * Strictly speaking we should mark the invariant state without holding
2575 : * any locks, that is, before these two lock_map_acquire()'s.
2576 : *
2577 : * However, that would result in:
2578 : *
2579 : * A(W1)
2580 : * WFC(C)
2581 : * A(W1)
2582 : * C(C)
2583 : *
2584 : * Which would create W1->C->W1 dependencies, even though there is no
2585 : * actual deadlock possible. There are two solutions, using a
2586 : * read-recursive acquire on the work(queue) 'locks', but this will then
2587 : * hit the lockdep limitation on recursive locks, or simply discard
2588 : * these locks.
2589 : *
2590 : * AFAICT there is no possible deadlock scenario between the
2591 : * flush_work() and complete() primitives (except for single-threaded
2592 : * workqueues), so hiding them isn't a problem.
2593 : */
2594 16 : lockdep_invariant_state(true);
2595 16 : pwq->stats[PWQ_STAT_STARTED]++;
2596 16 : trace_workqueue_execute_start(work);
2597 16 : worker->current_func(work);
2598 : /*
2599 : * While we must be careful to not use "work" after this, the trace
2600 : * point will only record its address.
2601 : */
2602 16 : trace_workqueue_execute_end(work, worker->current_func);
2603 16 : pwq->stats[PWQ_STAT_COMPLETED]++;
2604 : lock_map_release(&lockdep_map);
2605 : lock_map_release(&pwq->wq->lockdep_map);
2606 :
2607 16 : if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
2608 0 : pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
2609 : " last function: %ps\n",
2610 : current->comm, preempt_count(), task_pid_nr(current),
2611 : worker->current_func);
2612 0 : debug_show_held_locks(current);
2613 0 : dump_stack();
2614 : }
2615 :
2616 : /*
2617 : * The following prevents a kworker from hogging CPU on !PREEMPTION
2618 : * kernels, where a requeueing work item waiting for something to
2619 : * happen could deadlock with stop_machine as such work item could
2620 : * indefinitely requeue itself while all other CPUs are trapped in
2621 : * stop_machine. At the same time, report a quiescent RCU state so
2622 : * the same condition doesn't freeze RCU.
2623 : */
2624 16 : cond_resched();
2625 :
2626 16 : raw_spin_lock_irq(&pool->lock);
2627 :
2628 : /*
2629 : * In addition to %WQ_CPU_INTENSIVE, @worker may also have been marked
2630 : * CPU intensive by wq_worker_tick() if @work hogged CPU longer than
2631 : * wq_cpu_intensive_thresh_us. Clear it.
2632 : */
2633 16 : worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
2634 :
2635 : /* tag the worker for identification in schedule() */
2636 16 : worker->last_func = worker->current_func;
2637 :
2638 : /* we're done with it, release */
2639 32 : hash_del(&worker->hentry);
2640 16 : worker->current_work = NULL;
2641 16 : worker->current_func = NULL;
2642 16 : worker->current_pwq = NULL;
2643 16 : worker->current_color = INT_MAX;
2644 16 : pwq_dec_nr_in_flight(pwq, work_data);
2645 : }
2646 :
2647 : /**
2648 : * process_scheduled_works - process scheduled works
2649 : * @worker: self
2650 : *
2651 : * Process all scheduled works. Please note that the scheduled list
2652 : * may change while processing a work, so this function repeatedly
2653 : * fetches a work from the top and executes it.
2654 : *
2655 : * CONTEXT:
2656 : * raw_spin_lock_irq(pool->lock) which may be released and regrabbed
2657 : * multiple times.
2658 : */
2659 : static void process_scheduled_works(struct worker *worker)
2660 : {
2661 12 : while (!list_empty(&worker->scheduled)) {
2662 4 : struct work_struct *work = list_first_entry(&worker->scheduled,
2663 : struct work_struct, entry);
2664 4 : process_one_work(worker, work);
2665 : }
2666 : }
2667 :
2668 9 : static void set_pf_worker(bool val)
2669 : {
2670 9 : mutex_lock(&wq_pool_attach_mutex);
2671 9 : if (val)
2672 9 : current->flags |= PF_WQ_WORKER;
2673 : else
2674 0 : current->flags &= ~PF_WQ_WORKER;
2675 9 : mutex_unlock(&wq_pool_attach_mutex);
2676 9 : }
2677 :
2678 : /**
2679 : * worker_thread - the worker thread function
2680 : * @__worker: self
2681 : *
2682 : * The worker thread function. All workers belong to a worker_pool -
2683 : * either a per-cpu one or dynamic unbound one. These workers process all
2684 : * work items regardless of their specific target workqueue. The only
2685 : * exception is work items which belong to workqueues with a rescuer which
2686 : * will be explained in rescuer_thread().
2687 : *
2688 : * Return: 0
2689 : */
2690 5 : static int worker_thread(void *__worker)
2691 : {
2692 5 : struct worker *worker = __worker;
2693 5 : struct worker_pool *pool = worker->pool;
2694 :
2695 : /* tell the scheduler that this is a workqueue worker */
2696 5 : set_pf_worker(true);
2697 : woke_up:
2698 18 : raw_spin_lock_irq(&pool->lock);
2699 :
2700 : /* am I supposed to die? */
2701 18 : if (unlikely(worker->flags & WORKER_DIE)) {
2702 0 : raw_spin_unlock_irq(&pool->lock);
2703 0 : set_pf_worker(false);
2704 :
2705 0 : set_task_comm(worker->task, "kworker/dying");
2706 0 : ida_free(&pool->worker_ida, worker->id);
2707 0 : worker_detach_from_pool(worker);
2708 0 : WARN_ON_ONCE(!list_empty(&worker->entry));
2709 0 : kfree(worker);
2710 0 : return 0;
2711 : }
2712 :
2713 18 : worker_leave_idle(worker);
2714 : recheck:
2715 : /* no more worker necessary? */
2716 20 : if (!need_more_worker(pool))
2717 : goto sleep;
2718 :
2719 : /* do we need to manage? */
2720 15 : if (unlikely(!may_start_working(pool)) && manage_workers(worker))
2721 : goto recheck;
2722 :
2723 : /*
2724 : * ->scheduled list can only be filled while a worker is
2725 : * preparing to process a work or actually processing it.
2726 : * Make sure nobody diddled with it while I was sleeping.
2727 : */
2728 26 : WARN_ON_ONCE(!list_empty(&worker->scheduled));
2729 :
2730 : /*
2731 : * Finish PREP stage. We're guaranteed to have at least one idle
2732 : * worker or that someone else has already assumed the manager
2733 : * role. This is where @worker starts participating in concurrency
2734 : * management if applicable and concurrency management is restored
2735 : * after being rebound. See rebind_workers() for details.
2736 : */
2737 13 : worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND);
2738 :
2739 : do {
2740 14 : struct work_struct *work =
2741 14 : list_first_entry(&pool->worklist,
2742 : struct work_struct, entry);
2743 :
2744 14 : pool->watchdog_ts = jiffies;
2745 :
2746 14 : if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
2747 : /* optimization path, not strictly necessary */
2748 12 : process_one_work(worker, work);
2749 24 : if (unlikely(!list_empty(&worker->scheduled)))
2750 : process_scheduled_works(worker);
2751 : } else {
2752 2 : move_linked_works(work, &worker->scheduled, NULL);
2753 : process_scheduled_works(worker);
2754 : }
2755 14 : } while (keep_working(pool));
2756 :
2757 13 : worker_set_flags(worker, WORKER_PREP);
2758 : sleep:
2759 : /*
2760 : * pool->lock is held and there's no work to process and no need to
2761 : * manage, sleep. Workers are woken up only while holding
2762 : * pool->lock or from local cpu, so setting the current state
2763 : * before releasing pool->lock is enough to prevent losing any
2764 : * event.
2765 : */
2766 18 : worker_enter_idle(worker);
2767 18 : __set_current_state(TASK_IDLE);
2768 18 : raw_spin_unlock_irq(&pool->lock);
2769 18 : schedule();
2770 13 : goto woke_up;
2771 : }
2772 :
2773 : /**
2774 : * rescuer_thread - the rescuer thread function
2775 : * @__rescuer: self
2776 : *
2777 : * Workqueue rescuer thread function. There's one rescuer for each
2778 : * workqueue which has WQ_MEM_RECLAIM set.
2779 : *
2780 : * Regular work processing on a pool may block trying to create a new
2781 : * worker which uses GFP_KERNEL allocation which has slight chance of
2782 : * developing into deadlock if some works currently on the same queue
2783 : * need to be processed to satisfy the GFP_KERNEL allocation. This is
2784 : * the problem rescuer solves.
2785 : *
2786 : * When such condition is possible, the pool summons rescuers of all
2787 : * workqueues which have works queued on the pool and let them process
2788 : * those works so that forward progress can be guaranteed.
2789 : *
2790 : * This should happen rarely.
2791 : *
2792 : * Return: 0
2793 : */
2794 4 : static int rescuer_thread(void *__rescuer)
2795 : {
2796 4 : struct worker *rescuer = __rescuer;
2797 4 : struct workqueue_struct *wq = rescuer->rescue_wq;
2798 4 : struct list_head *scheduled = &rescuer->scheduled;
2799 : bool should_stop;
2800 :
2801 4 : set_user_nice(current, RESCUER_NICE_LEVEL);
2802 :
2803 : /*
2804 : * Mark rescuer as worker too. As WORKER_PREP is never cleared, it
2805 : * doesn't participate in concurrency management.
2806 : */
2807 4 : set_pf_worker(true);
2808 : repeat:
2809 4 : set_current_state(TASK_IDLE);
2810 :
2811 : /*
2812 : * By the time the rescuer is requested to stop, the workqueue
2813 : * shouldn't have any work pending, but @wq->maydays may still have
2814 : * pwq(s) queued. This can happen by non-rescuer workers consuming
2815 : * all the work items before the rescuer got to them. Go through
2816 : * @wq->maydays processing before acting on should_stop so that the
2817 : * list is always empty on exit.
2818 : */
2819 4 : should_stop = kthread_should_stop();
2820 :
2821 : /* see whether any pwq is asking for help */
2822 4 : raw_spin_lock_irq(&wq_mayday_lock);
2823 :
2824 12 : while (!list_empty(&wq->maydays)) {
2825 0 : struct pool_workqueue *pwq = list_first_entry(&wq->maydays,
2826 : struct pool_workqueue, mayday_node);
2827 0 : struct worker_pool *pool = pwq->pool;
2828 : struct work_struct *work, *n;
2829 0 : bool first = true;
2830 :
2831 0 : __set_current_state(TASK_RUNNING);
2832 0 : list_del_init(&pwq->mayday_node);
2833 :
2834 0 : raw_spin_unlock_irq(&wq_mayday_lock);
2835 :
2836 0 : worker_attach_to_pool(rescuer, pool);
2837 :
2838 0 : raw_spin_lock_irq(&pool->lock);
2839 :
2840 : /*
2841 : * Slurp in all works issued via this workqueue and
2842 : * process'em.
2843 : */
2844 0 : WARN_ON_ONCE(!list_empty(scheduled));
2845 0 : list_for_each_entry_safe(work, n, &pool->worklist, entry) {
2846 0 : if (get_work_pwq(work) == pwq) {
2847 0 : if (first)
2848 0 : pool->watchdog_ts = jiffies;
2849 0 : move_linked_works(work, scheduled, &n);
2850 0 : pwq->stats[PWQ_STAT_RESCUED]++;
2851 : }
2852 0 : first = false;
2853 : }
2854 :
2855 0 : if (!list_empty(scheduled)) {
2856 0 : process_scheduled_works(rescuer);
2857 :
2858 : /*
2859 : * The above execution of rescued work items could
2860 : * have created more to rescue through
2861 : * pwq_activate_first_inactive() or chained
2862 : * queueing. Let's put @pwq back on mayday list so
2863 : * that such back-to-back work items, which may be
2864 : * being used to relieve memory pressure, don't
2865 : * incur MAYDAY_INTERVAL delay inbetween.
2866 : */
2867 0 : if (pwq->nr_active && need_to_create_worker(pool)) {
2868 0 : raw_spin_lock(&wq_mayday_lock);
2869 : /*
2870 : * Queue iff we aren't racing destruction
2871 : * and somebody else hasn't queued it already.
2872 : */
2873 0 : if (wq->rescuer && list_empty(&pwq->mayday_node)) {
2874 0 : get_pwq(pwq);
2875 0 : list_add_tail(&pwq->mayday_node, &wq->maydays);
2876 : }
2877 0 : raw_spin_unlock(&wq_mayday_lock);
2878 : }
2879 : }
2880 :
2881 : /*
2882 : * Put the reference grabbed by send_mayday(). @pool won't
2883 : * go away while we're still attached to it.
2884 : */
2885 0 : put_pwq(pwq);
2886 :
2887 : /*
2888 : * Leave this pool. If need_more_worker() is %true, notify a
2889 : * regular worker; otherwise, we end up with 0 concurrency
2890 : * and stalling the execution.
2891 : */
2892 0 : if (need_more_worker(pool))
2893 : wake_up_worker(pool);
2894 :
2895 0 : raw_spin_unlock_irq(&pool->lock);
2896 :
2897 0 : worker_detach_from_pool(rescuer);
2898 :
2899 0 : raw_spin_lock_irq(&wq_mayday_lock);
2900 : }
2901 :
2902 4 : raw_spin_unlock_irq(&wq_mayday_lock);
2903 :
2904 4 : if (should_stop) {
2905 0 : __set_current_state(TASK_RUNNING);
2906 0 : set_pf_worker(false);
2907 0 : return 0;
2908 : }
2909 :
2910 : /* rescuers should never participate in concurrency management */
2911 4 : WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
2912 4 : schedule();
2913 0 : goto repeat;
2914 : }
2915 :
2916 : /**
2917 : * check_flush_dependency - check for flush dependency sanity
2918 : * @target_wq: workqueue being flushed
2919 : * @target_work: work item being flushed (NULL for workqueue flushes)
2920 : *
2921 : * %current is trying to flush the whole @target_wq or @target_work on it.
2922 : * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
2923 : * reclaiming memory or running on a workqueue which doesn't have
2924 : * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
2925 : * a deadlock.
2926 : */
2927 2 : static void check_flush_dependency(struct workqueue_struct *target_wq,
2928 : struct work_struct *target_work)
2929 : {
2930 2 : work_func_t target_func = target_work ? target_work->func : NULL;
2931 : struct worker *worker;
2932 :
2933 2 : if (target_wq->flags & WQ_MEM_RECLAIM)
2934 : return;
2935 :
2936 2 : worker = current_wq_worker();
2937 :
2938 2 : WARN_ONCE(current->flags & PF_MEMALLOC,
2939 : "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
2940 : current->pid, current->comm, target_wq->name, target_func);
2941 2 : WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
2942 : (WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
2943 : "workqueue: WQ_MEM_RECLAIM %s:%ps is flushing !WQ_MEM_RECLAIM %s:%ps",
2944 : worker->current_pwq->wq->name, worker->current_func,
2945 : target_wq->name, target_func);
2946 : }
2947 :
2948 : struct wq_barrier {
2949 : struct work_struct work;
2950 : struct completion done;
2951 : struct task_struct *task; /* purely informational */
2952 : };
2953 :
2954 2 : static void wq_barrier_func(struct work_struct *work)
2955 : {
2956 2 : struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
2957 2 : complete(&barr->done);
2958 2 : }
2959 :
2960 : /**
2961 : * insert_wq_barrier - insert a barrier work
2962 : * @pwq: pwq to insert barrier into
2963 : * @barr: wq_barrier to insert
2964 : * @target: target work to attach @barr to
2965 : * @worker: worker currently executing @target, NULL if @target is not executing
2966 : *
2967 : * @barr is linked to @target such that @barr is completed only after
2968 : * @target finishes execution. Please note that the ordering
2969 : * guarantee is observed only with respect to @target and on the local
2970 : * cpu.
2971 : *
2972 : * Currently, a queued barrier can't be canceled. This is because
2973 : * try_to_grab_pending() can't determine whether the work to be
2974 : * grabbed is at the head of the queue and thus can't clear LINKED
2975 : * flag of the previous work while there must be a valid next work
2976 : * after a work with LINKED flag set.
2977 : *
2978 : * Note that when @worker is non-NULL, @target may be modified
2979 : * underneath us, so we can't reliably determine pwq from @target.
2980 : *
2981 : * CONTEXT:
2982 : * raw_spin_lock_irq(pool->lock).
2983 : */
2984 2 : static void insert_wq_barrier(struct pool_workqueue *pwq,
2985 : struct wq_barrier *barr,
2986 : struct work_struct *target, struct worker *worker)
2987 : {
2988 2 : unsigned int work_flags = 0;
2989 : unsigned int work_color;
2990 : struct list_head *head;
2991 :
2992 : /*
2993 : * debugobject calls are safe here even with pool->lock locked
2994 : * as we know for sure that this will not trigger any of the
2995 : * checks and call back into the fixup functions where we
2996 : * might deadlock.
2997 : */
2998 4 : INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
2999 2 : __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
3000 :
3001 4 : init_completion_map(&barr->done, &target->lockdep_map);
3002 :
3003 2 : barr->task = current;
3004 :
3005 : /* The barrier work item does not participate in pwq->nr_active. */
3006 2 : work_flags |= WORK_STRUCT_INACTIVE;
3007 :
3008 : /*
3009 : * If @target is currently being executed, schedule the
3010 : * barrier to the worker; otherwise, put it after @target.
3011 : */
3012 2 : if (worker) {
3013 0 : head = worker->scheduled.next;
3014 0 : work_color = worker->current_color;
3015 : } else {
3016 2 : unsigned long *bits = work_data_bits(target);
3017 :
3018 2 : head = target->entry.next;
3019 : /* there can already be other linked works, inherit and set */
3020 2 : work_flags |= *bits & WORK_STRUCT_LINKED;
3021 4 : work_color = get_work_color(*bits);
3022 2 : __set_bit(WORK_STRUCT_LINKED_BIT, bits);
3023 : }
3024 :
3025 2 : pwq->nr_in_flight[work_color]++;
3026 4 : work_flags |= work_color_to_flags(work_color);
3027 :
3028 2 : debug_work_activate(&barr->work);
3029 2 : insert_work(pwq, &barr->work, head, work_flags);
3030 2 : }
3031 :
3032 : /**
3033 : * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
3034 : * @wq: workqueue being flushed
3035 : * @flush_color: new flush color, < 0 for no-op
3036 : * @work_color: new work color, < 0 for no-op
3037 : *
3038 : * Prepare pwqs for workqueue flushing.
3039 : *
3040 : * If @flush_color is non-negative, flush_color on all pwqs should be
3041 : * -1. If no pwq has in-flight commands at the specified color, all
3042 : * pwq->flush_color's stay at -1 and %false is returned. If any pwq
3043 : * has in flight commands, its pwq->flush_color is set to
3044 : * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
3045 : * wakeup logic is armed and %true is returned.
3046 : *
3047 : * The caller should have initialized @wq->first_flusher prior to
3048 : * calling this function with non-negative @flush_color. If
3049 : * @flush_color is negative, no flush color update is done and %false
3050 : * is returned.
3051 : *
3052 : * If @work_color is non-negative, all pwqs should have the same
3053 : * work_color which is previous to @work_color and all will be
3054 : * advanced to @work_color.
3055 : *
3056 : * CONTEXT:
3057 : * mutex_lock(wq->mutex).
3058 : *
3059 : * Return:
3060 : * %true if @flush_color >= 0 and there's something to flush. %false
3061 : * otherwise.
3062 : */
3063 0 : static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
3064 : int flush_color, int work_color)
3065 : {
3066 0 : bool wait = false;
3067 : struct pool_workqueue *pwq;
3068 :
3069 0 : if (flush_color >= 0) {
3070 0 : WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
3071 0 : atomic_set(&wq->nr_pwqs_to_flush, 1);
3072 : }
3073 :
3074 0 : for_each_pwq(pwq, wq) {
3075 0 : struct worker_pool *pool = pwq->pool;
3076 :
3077 0 : raw_spin_lock_irq(&pool->lock);
3078 :
3079 0 : if (flush_color >= 0) {
3080 0 : WARN_ON_ONCE(pwq->flush_color != -1);
3081 :
3082 0 : if (pwq->nr_in_flight[flush_color]) {
3083 0 : pwq->flush_color = flush_color;
3084 0 : atomic_inc(&wq->nr_pwqs_to_flush);
3085 0 : wait = true;
3086 : }
3087 : }
3088 :
3089 0 : if (work_color >= 0) {
3090 0 : WARN_ON_ONCE(work_color != work_next_color(pwq->work_color));
3091 0 : pwq->work_color = work_color;
3092 : }
3093 :
3094 0 : raw_spin_unlock_irq(&pool->lock);
3095 : }
3096 :
3097 0 : if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
3098 0 : complete(&wq->first_flusher->done);
3099 :
3100 0 : return wait;
3101 : }
3102 :
3103 : /**
3104 : * __flush_workqueue - ensure that any scheduled work has run to completion.
3105 : * @wq: workqueue to flush
3106 : *
3107 : * This function sleeps until all work items which were queued on entry
3108 : * have finished execution, but it is not livelocked by new incoming ones.
3109 : */
3110 0 : void __flush_workqueue(struct workqueue_struct *wq)
3111 : {
3112 0 : struct wq_flusher this_flusher = {
3113 : .list = LIST_HEAD_INIT(this_flusher.list),
3114 : .flush_color = -1,
3115 0 : .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map),
3116 : };
3117 : int next_color;
3118 :
3119 0 : if (WARN_ON(!wq_online))
3120 0 : return;
3121 :
3122 : lock_map_acquire(&wq->lockdep_map);
3123 : lock_map_release(&wq->lockdep_map);
3124 :
3125 0 : mutex_lock(&wq->mutex);
3126 :
3127 : /*
3128 : * Start-to-wait phase
3129 : */
3130 0 : next_color = work_next_color(wq->work_color);
3131 :
3132 0 : if (next_color != wq->flush_color) {
3133 : /*
3134 : * Color space is not full. The current work_color
3135 : * becomes our flush_color and work_color is advanced
3136 : * by one.
3137 : */
3138 0 : WARN_ON_ONCE(!list_empty(&wq->flusher_overflow));
3139 0 : this_flusher.flush_color = wq->work_color;
3140 0 : wq->work_color = next_color;
3141 :
3142 0 : if (!wq->first_flusher) {
3143 : /* no flush in progress, become the first flusher */
3144 0 : WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
3145 :
3146 0 : wq->first_flusher = &this_flusher;
3147 :
3148 0 : if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
3149 : wq->work_color)) {
3150 : /* nothing to flush, done */
3151 0 : wq->flush_color = next_color;
3152 0 : wq->first_flusher = NULL;
3153 0 : goto out_unlock;
3154 : }
3155 : } else {
3156 : /* wait in queue */
3157 0 : WARN_ON_ONCE(wq->flush_color == this_flusher.flush_color);
3158 0 : list_add_tail(&this_flusher.list, &wq->flusher_queue);
3159 0 : flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
3160 : }
3161 : } else {
3162 : /*
3163 : * Oops, color space is full, wait on overflow queue.
3164 : * The next flush completion will assign us
3165 : * flush_color and transfer to flusher_queue.
3166 : */
3167 0 : list_add_tail(&this_flusher.list, &wq->flusher_overflow);
3168 : }
3169 :
3170 0 : check_flush_dependency(wq, NULL);
3171 :
3172 0 : mutex_unlock(&wq->mutex);
3173 :
3174 0 : wait_for_completion(&this_flusher.done);
3175 :
3176 : /*
3177 : * Wake-up-and-cascade phase
3178 : *
3179 : * First flushers are responsible for cascading flushes and
3180 : * handling overflow. Non-first flushers can simply return.
3181 : */
3182 0 : if (READ_ONCE(wq->first_flusher) != &this_flusher)
3183 : return;
3184 :
3185 0 : mutex_lock(&wq->mutex);
3186 :
3187 : /* we might have raced, check again with mutex held */
3188 0 : if (wq->first_flusher != &this_flusher)
3189 : goto out_unlock;
3190 :
3191 0 : WRITE_ONCE(wq->first_flusher, NULL);
3192 :
3193 0 : WARN_ON_ONCE(!list_empty(&this_flusher.list));
3194 0 : WARN_ON_ONCE(wq->flush_color != this_flusher.flush_color);
3195 :
3196 0 : while (true) {
3197 : struct wq_flusher *next, *tmp;
3198 :
3199 : /* complete all the flushers sharing the current flush color */
3200 0 : list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
3201 0 : if (next->flush_color != wq->flush_color)
3202 : break;
3203 0 : list_del_init(&next->list);
3204 0 : complete(&next->done);
3205 : }
3206 :
3207 0 : WARN_ON_ONCE(!list_empty(&wq->flusher_overflow) &&
3208 : wq->flush_color != work_next_color(wq->work_color));
3209 :
3210 : /* this flush_color is finished, advance by one */
3211 0 : wq->flush_color = work_next_color(wq->flush_color);
3212 :
3213 : /* one color has been freed, handle overflow queue */
3214 0 : if (!list_empty(&wq->flusher_overflow)) {
3215 : /*
3216 : * Assign the same color to all overflowed
3217 : * flushers, advance work_color and append to
3218 : * flusher_queue. This is the start-to-wait
3219 : * phase for these overflowed flushers.
3220 : */
3221 0 : list_for_each_entry(tmp, &wq->flusher_overflow, list)
3222 0 : tmp->flush_color = wq->work_color;
3223 :
3224 0 : wq->work_color = work_next_color(wq->work_color);
3225 :
3226 0 : list_splice_tail_init(&wq->flusher_overflow,
3227 : &wq->flusher_queue);
3228 0 : flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
3229 : }
3230 :
3231 0 : if (list_empty(&wq->flusher_queue)) {
3232 0 : WARN_ON_ONCE(wq->flush_color != wq->work_color);
3233 : break;
3234 : }
3235 :
3236 : /*
3237 : * Need to flush more colors. Make the next flusher
3238 : * the new first flusher and arm pwqs.
3239 : */
3240 0 : WARN_ON_ONCE(wq->flush_color == wq->work_color);
3241 0 : WARN_ON_ONCE(wq->flush_color != next->flush_color);
3242 :
3243 0 : list_del_init(&next->list);
3244 0 : wq->first_flusher = next;
3245 :
3246 0 : if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
3247 : break;
3248 :
3249 : /*
3250 : * Meh... this color is already done, clear first
3251 : * flusher and repeat cascading.
3252 : */
3253 0 : wq->first_flusher = NULL;
3254 : }
3255 :
3256 : out_unlock:
3257 0 : mutex_unlock(&wq->mutex);
3258 : }
3259 : EXPORT_SYMBOL(__flush_workqueue);
3260 :
3261 : /**
3262 : * drain_workqueue - drain a workqueue
3263 : * @wq: workqueue to drain
3264 : *
3265 : * Wait until the workqueue becomes empty. While draining is in progress,
3266 : * only chain queueing is allowed. IOW, only currently pending or running
3267 : * work items on @wq can queue further work items on it. @wq is flushed
3268 : * repeatedly until it becomes empty. The number of flushing is determined
3269 : * by the depth of chaining and should be relatively short. Whine if it
3270 : * takes too long.
3271 : */
3272 0 : void drain_workqueue(struct workqueue_struct *wq)
3273 : {
3274 0 : unsigned int flush_cnt = 0;
3275 : struct pool_workqueue *pwq;
3276 :
3277 : /*
3278 : * __queue_work() needs to test whether there are drainers, is much
3279 : * hotter than drain_workqueue() and already looks at @wq->flags.
3280 : * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
3281 : */
3282 0 : mutex_lock(&wq->mutex);
3283 0 : if (!wq->nr_drainers++)
3284 0 : wq->flags |= __WQ_DRAINING;
3285 0 : mutex_unlock(&wq->mutex);
3286 : reflush:
3287 0 : __flush_workqueue(wq);
3288 :
3289 0 : mutex_lock(&wq->mutex);
3290 :
3291 0 : for_each_pwq(pwq, wq) {
3292 : bool drained;
3293 :
3294 0 : raw_spin_lock_irq(&pwq->pool->lock);
3295 0 : drained = !pwq->nr_active && list_empty(&pwq->inactive_works);
3296 0 : raw_spin_unlock_irq(&pwq->pool->lock);
3297 :
3298 0 : if (drained)
3299 0 : continue;
3300 :
3301 0 : if (++flush_cnt == 10 ||
3302 0 : (flush_cnt % 100 == 0 && flush_cnt <= 1000))
3303 0 : pr_warn("workqueue %s: %s() isn't complete after %u tries\n",
3304 : wq->name, __func__, flush_cnt);
3305 :
3306 0 : mutex_unlock(&wq->mutex);
3307 0 : goto reflush;
3308 : }
3309 :
3310 0 : if (!--wq->nr_drainers)
3311 0 : wq->flags &= ~__WQ_DRAINING;
3312 0 : mutex_unlock(&wq->mutex);
3313 0 : }
3314 : EXPORT_SYMBOL_GPL(drain_workqueue);
3315 :
3316 9 : static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
3317 : bool from_cancel)
3318 : {
3319 9 : struct worker *worker = NULL;
3320 : struct worker_pool *pool;
3321 : struct pool_workqueue *pwq;
3322 :
3323 : might_sleep();
3324 :
3325 : rcu_read_lock();
3326 9 : pool = get_work_pool(work);
3327 9 : if (!pool) {
3328 : rcu_read_unlock();
3329 5 : return false;
3330 : }
3331 :
3332 4 : raw_spin_lock_irq(&pool->lock);
3333 : /* see the comment in try_to_grab_pending() with the same code */
3334 4 : pwq = get_work_pwq(work);
3335 4 : if (pwq) {
3336 2 : if (unlikely(pwq->pool != pool))
3337 : goto already_gone;
3338 : } else {
3339 2 : worker = find_worker_executing_work(pool, work);
3340 2 : if (!worker)
3341 : goto already_gone;
3342 0 : pwq = worker->current_pwq;
3343 : }
3344 :
3345 2 : check_flush_dependency(pwq->wq, work);
3346 :
3347 2 : insert_wq_barrier(pwq, barr, work, worker);
3348 2 : raw_spin_unlock_irq(&pool->lock);
3349 :
3350 : /*
3351 : * Force a lock recursion deadlock when using flush_work() inside a
3352 : * single-threaded or rescuer equipped workqueue.
3353 : *
3354 : * For single threaded workqueues the deadlock happens when the work
3355 : * is after the work issuing the flush_work(). For rescuer equipped
3356 : * workqueues the deadlock happens when the rescuer stalls, blocking
3357 : * forward progress.
3358 : */
3359 : if (!from_cancel &&
3360 : (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)) {
3361 : lock_map_acquire(&pwq->wq->lockdep_map);
3362 : lock_map_release(&pwq->wq->lockdep_map);
3363 : }
3364 : rcu_read_unlock();
3365 2 : return true;
3366 : already_gone:
3367 2 : raw_spin_unlock_irq(&pool->lock);
3368 : rcu_read_unlock();
3369 2 : return false;
3370 : }
3371 :
3372 9 : static bool __flush_work(struct work_struct *work, bool from_cancel)
3373 : {
3374 : struct wq_barrier barr;
3375 :
3376 9 : if (WARN_ON(!wq_online))
3377 : return false;
3378 :
3379 9 : if (WARN_ON(!work->func))
3380 : return false;
3381 :
3382 : lock_map_acquire(&work->lockdep_map);
3383 : lock_map_release(&work->lockdep_map);
3384 :
3385 9 : if (start_flush_work(work, &barr, from_cancel)) {
3386 2 : wait_for_completion(&barr.done);
3387 2 : destroy_work_on_stack(&barr.work);
3388 2 : return true;
3389 : } else {
3390 : return false;
3391 : }
3392 : }
3393 :
3394 : /**
3395 : * flush_work - wait for a work to finish executing the last queueing instance
3396 : * @work: the work to flush
3397 : *
3398 : * Wait until @work has finished execution. @work is guaranteed to be idle
3399 : * on return if it hasn't been requeued since flush started.
3400 : *
3401 : * Return:
3402 : * %true if flush_work() waited for the work to finish execution,
3403 : * %false if it was already idle.
3404 : */
3405 9 : bool flush_work(struct work_struct *work)
3406 : {
3407 9 : return __flush_work(work, false);
3408 : }
3409 : EXPORT_SYMBOL_GPL(flush_work);
3410 :
3411 : struct cwt_wait {
3412 : wait_queue_entry_t wait;
3413 : struct work_struct *work;
3414 : };
3415 :
3416 0 : static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
3417 : {
3418 0 : struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
3419 :
3420 0 : if (cwait->work != key)
3421 : return 0;
3422 0 : return autoremove_wake_function(wait, mode, sync, key);
3423 : }
3424 :
3425 0 : static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
3426 : {
3427 : static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
3428 : unsigned long flags;
3429 : int ret;
3430 :
3431 : do {
3432 0 : ret = try_to_grab_pending(work, is_dwork, &flags);
3433 : /*
3434 : * If someone else is already canceling, wait for it to
3435 : * finish. flush_work() doesn't work for PREEMPT_NONE
3436 : * because we may get scheduled between @work's completion
3437 : * and the other canceling task resuming and clearing
3438 : * CANCELING - flush_work() will return false immediately
3439 : * as @work is no longer busy, try_to_grab_pending() will
3440 : * return -ENOENT as @work is still being canceled and the
3441 : * other canceling task won't be able to clear CANCELING as
3442 : * we're hogging the CPU.
3443 : *
3444 : * Let's wait for completion using a waitqueue. As this
3445 : * may lead to the thundering herd problem, use a custom
3446 : * wake function which matches @work along with exclusive
3447 : * wait and wakeup.
3448 : */
3449 0 : if (unlikely(ret == -ENOENT)) {
3450 : struct cwt_wait cwait;
3451 :
3452 0 : init_wait(&cwait.wait);
3453 0 : cwait.wait.func = cwt_wakefn;
3454 0 : cwait.work = work;
3455 :
3456 0 : prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
3457 : TASK_UNINTERRUPTIBLE);
3458 0 : if (work_is_canceling(work))
3459 0 : schedule();
3460 0 : finish_wait(&cancel_waitq, &cwait.wait);
3461 : }
3462 0 : } while (unlikely(ret < 0));
3463 :
3464 : /* tell other tasks trying to grab @work to back off */
3465 0 : mark_work_canceling(work);
3466 0 : local_irq_restore(flags);
3467 :
3468 : /*
3469 : * This allows canceling during early boot. We know that @work
3470 : * isn't executing.
3471 : */
3472 0 : if (wq_online)
3473 0 : __flush_work(work, true);
3474 :
3475 0 : clear_work_data(work);
3476 :
3477 : /*
3478 : * Paired with prepare_to_wait() above so that either
3479 : * waitqueue_active() is visible here or !work_is_canceling() is
3480 : * visible there.
3481 : */
3482 0 : smp_mb();
3483 0 : if (waitqueue_active(&cancel_waitq))
3484 0 : __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
3485 :
3486 0 : return ret;
3487 : }
3488 :
3489 : /**
3490 : * cancel_work_sync - cancel a work and wait for it to finish
3491 : * @work: the work to cancel
3492 : *
3493 : * Cancel @work and wait for its execution to finish. This function
3494 : * can be used even if the work re-queues itself or migrates to
3495 : * another workqueue. On return from this function, @work is
3496 : * guaranteed to be not pending or executing on any CPU.
3497 : *
3498 : * cancel_work_sync(&delayed_work->work) must not be used for
3499 : * delayed_work's. Use cancel_delayed_work_sync() instead.
3500 : *
3501 : * The caller must ensure that the workqueue on which @work was last
3502 : * queued can't be destroyed before this function returns.
3503 : *
3504 : * Return:
3505 : * %true if @work was pending, %false otherwise.
3506 : */
3507 0 : bool cancel_work_sync(struct work_struct *work)
3508 : {
3509 0 : return __cancel_work_timer(work, false);
3510 : }
3511 : EXPORT_SYMBOL_GPL(cancel_work_sync);
3512 :
3513 : /**
3514 : * flush_delayed_work - wait for a dwork to finish executing the last queueing
3515 : * @dwork: the delayed work to flush
3516 : *
3517 : * Delayed timer is cancelled and the pending work is queued for
3518 : * immediate execution. Like flush_work(), this function only
3519 : * considers the last queueing instance of @dwork.
3520 : *
3521 : * Return:
3522 : * %true if flush_work() waited for the work to finish execution,
3523 : * %false if it was already idle.
3524 : */
3525 0 : bool flush_delayed_work(struct delayed_work *dwork)
3526 : {
3527 : local_irq_disable();
3528 0 : if (del_timer_sync(&dwork->timer))
3529 0 : __queue_work(dwork->cpu, dwork->wq, &dwork->work);
3530 : local_irq_enable();
3531 0 : return flush_work(&dwork->work);
3532 : }
3533 : EXPORT_SYMBOL(flush_delayed_work);
3534 :
3535 : /**
3536 : * flush_rcu_work - wait for a rwork to finish executing the last queueing
3537 : * @rwork: the rcu work to flush
3538 : *
3539 : * Return:
3540 : * %true if flush_rcu_work() waited for the work to finish execution,
3541 : * %false if it was already idle.
3542 : */
3543 0 : bool flush_rcu_work(struct rcu_work *rwork)
3544 : {
3545 0 : if (test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&rwork->work))) {
3546 0 : rcu_barrier();
3547 0 : flush_work(&rwork->work);
3548 0 : return true;
3549 : } else {
3550 0 : return flush_work(&rwork->work);
3551 : }
3552 : }
3553 : EXPORT_SYMBOL(flush_rcu_work);
3554 :
3555 21 : static bool __cancel_work(struct work_struct *work, bool is_dwork)
3556 : {
3557 : unsigned long flags;
3558 : int ret;
3559 :
3560 : do {
3561 21 : ret = try_to_grab_pending(work, is_dwork, &flags);
3562 21 : } while (unlikely(ret == -EAGAIN));
3563 :
3564 21 : if (unlikely(ret < 0))
3565 : return false;
3566 :
3567 42 : set_work_pool_and_clear_pending(work, get_work_pool_id(work));
3568 42 : local_irq_restore(flags);
3569 21 : return ret;
3570 : }
3571 :
3572 : /*
3573 : * See cancel_delayed_work()
3574 : */
3575 0 : bool cancel_work(struct work_struct *work)
3576 : {
3577 0 : return __cancel_work(work, false);
3578 : }
3579 : EXPORT_SYMBOL(cancel_work);
3580 :
3581 : /**
3582 : * cancel_delayed_work - cancel a delayed work
3583 : * @dwork: delayed_work to cancel
3584 : *
3585 : * Kill off a pending delayed_work.
3586 : *
3587 : * Return: %true if @dwork was pending and canceled; %false if it wasn't
3588 : * pending.
3589 : *
3590 : * Note:
3591 : * The work callback function may still be running on return, unless
3592 : * it returns %true and the work doesn't re-arm itself. Explicitly flush or
3593 : * use cancel_delayed_work_sync() to wait on it.
3594 : *
3595 : * This function is safe to call from any context including IRQ handler.
3596 : */
3597 21 : bool cancel_delayed_work(struct delayed_work *dwork)
3598 : {
3599 21 : return __cancel_work(&dwork->work, true);
3600 : }
3601 : EXPORT_SYMBOL(cancel_delayed_work);
3602 :
3603 : /**
3604 : * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
3605 : * @dwork: the delayed work cancel
3606 : *
3607 : * This is cancel_work_sync() for delayed works.
3608 : *
3609 : * Return:
3610 : * %true if @dwork was pending, %false otherwise.
3611 : */
3612 0 : bool cancel_delayed_work_sync(struct delayed_work *dwork)
3613 : {
3614 0 : return __cancel_work_timer(&dwork->work, true);
3615 : }
3616 : EXPORT_SYMBOL(cancel_delayed_work_sync);
3617 :
3618 : /**
3619 : * schedule_on_each_cpu - execute a function synchronously on each online CPU
3620 : * @func: the function to call
3621 : *
3622 : * schedule_on_each_cpu() executes @func on each online CPU using the
3623 : * system workqueue and blocks until all CPUs have completed.
3624 : * schedule_on_each_cpu() is very slow.
3625 : *
3626 : * Return:
3627 : * 0 on success, -errno on failure.
3628 : */
3629 0 : int schedule_on_each_cpu(work_func_t func)
3630 : {
3631 : int cpu;
3632 : struct work_struct __percpu *works;
3633 :
3634 0 : works = alloc_percpu(struct work_struct);
3635 0 : if (!works)
3636 : return -ENOMEM;
3637 :
3638 : cpus_read_lock();
3639 :
3640 0 : for_each_online_cpu(cpu) {
3641 0 : struct work_struct *work = per_cpu_ptr(works, cpu);
3642 :
3643 0 : INIT_WORK(work, func);
3644 0 : schedule_work_on(cpu, work);
3645 : }
3646 :
3647 0 : for_each_online_cpu(cpu)
3648 0 : flush_work(per_cpu_ptr(works, cpu));
3649 :
3650 : cpus_read_unlock();
3651 0 : free_percpu(works);
3652 0 : return 0;
3653 : }
3654 :
3655 : /**
3656 : * execute_in_process_context - reliably execute the routine with user context
3657 : * @fn: the function to execute
3658 : * @ew: guaranteed storage for the execute work structure (must
3659 : * be available when the work executes)
3660 : *
3661 : * Executes the function immediately if process context is available,
3662 : * otherwise schedules the function for delayed execution.
3663 : *
3664 : * Return: 0 - function was executed
3665 : * 1 - function was scheduled for execution
3666 : */
3667 0 : int execute_in_process_context(work_func_t fn, struct execute_work *ew)
3668 : {
3669 0 : if (!in_interrupt()) {
3670 0 : fn(&ew->work);
3671 0 : return 0;
3672 : }
3673 :
3674 0 : INIT_WORK(&ew->work, fn);
3675 0 : schedule_work(&ew->work);
3676 :
3677 0 : return 1;
3678 : }
3679 : EXPORT_SYMBOL_GPL(execute_in_process_context);
3680 :
3681 : /**
3682 : * free_workqueue_attrs - free a workqueue_attrs
3683 : * @attrs: workqueue_attrs to free
3684 : *
3685 : * Undo alloc_workqueue_attrs().
3686 : */
3687 0 : void free_workqueue_attrs(struct workqueue_attrs *attrs)
3688 : {
3689 3 : if (attrs) {
3690 6 : free_cpumask_var(attrs->cpumask);
3691 6 : kfree(attrs);
3692 : }
3693 0 : }
3694 :
3695 : /**
3696 : * alloc_workqueue_attrs - allocate a workqueue_attrs
3697 : *
3698 : * Allocate a new workqueue_attrs, initialize with default settings and
3699 : * return it.
3700 : *
3701 : * Return: The allocated new workqueue_attr on success. %NULL on failure.
3702 : */
3703 16 : struct workqueue_attrs *alloc_workqueue_attrs(void)
3704 : {
3705 : struct workqueue_attrs *attrs;
3706 :
3707 16 : attrs = kzalloc(sizeof(*attrs), GFP_KERNEL);
3708 16 : if (!attrs)
3709 : goto fail;
3710 16 : if (!alloc_cpumask_var(&attrs->cpumask, GFP_KERNEL))
3711 : goto fail;
3712 :
3713 32 : cpumask_copy(attrs->cpumask, cpu_possible_mask);
3714 16 : return attrs;
3715 : fail:
3716 : free_workqueue_attrs(attrs);
3717 : return NULL;
3718 : }
3719 :
3720 : static void copy_workqueue_attrs(struct workqueue_attrs *to,
3721 : const struct workqueue_attrs *from)
3722 : {
3723 13 : to->nice = from->nice;
3724 26 : cpumask_copy(to->cpumask, from->cpumask);
3725 : /*
3726 : * Unlike hash and equality test, this function doesn't ignore
3727 : * ->no_numa as it is used for both pool and wq attrs. Instead,
3728 : * get_unbound_pool() explicitly clears ->no_numa after copying.
3729 : */
3730 13 : to->no_numa = from->no_numa;
3731 : }
3732 :
3733 : /* hash value of the content of @attr */
3734 3 : static u32 wqattrs_hash(const struct workqueue_attrs *attrs)
3735 : {
3736 3 : u32 hash = 0;
3737 :
3738 6 : hash = jhash_1word(attrs->nice, hash);
3739 3 : hash = jhash(cpumask_bits(attrs->cpumask),
3740 : BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash);
3741 3 : return hash;
3742 : }
3743 :
3744 : /* content equality test */
3745 : static bool wqattrs_equal(const struct workqueue_attrs *a,
3746 : const struct workqueue_attrs *b)
3747 : {
3748 2 : if (a->nice != b->nice)
3749 : return false;
3750 4 : if (!cpumask_equal(a->cpumask, b->cpumask))
3751 : return false;
3752 : return true;
3753 : }
3754 :
3755 : /**
3756 : * init_worker_pool - initialize a newly zalloc'd worker_pool
3757 : * @pool: worker_pool to initialize
3758 : *
3759 : * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
3760 : *
3761 : * Return: 0 on success, -errno on failure. Even on failure, all fields
3762 : * inside @pool proper are initialized and put_unbound_pool() can be called
3763 : * on @pool safely to release it.
3764 : */
3765 3 : static int init_worker_pool(struct worker_pool *pool)
3766 : {
3767 : raw_spin_lock_init(&pool->lock);
3768 3 : pool->id = -1;
3769 3 : pool->cpu = -1;
3770 3 : pool->node = NUMA_NO_NODE;
3771 3 : pool->flags |= POOL_DISASSOCIATED;
3772 3 : pool->watchdog_ts = jiffies;
3773 6 : INIT_LIST_HEAD(&pool->worklist);
3774 6 : INIT_LIST_HEAD(&pool->idle_list);
3775 6 : hash_init(pool->busy_hash);
3776 :
3777 3 : timer_setup(&pool->idle_timer, idle_worker_timeout, TIMER_DEFERRABLE);
3778 6 : INIT_WORK(&pool->idle_cull_work, idle_cull_fn);
3779 :
3780 3 : timer_setup(&pool->mayday_timer, pool_mayday_timeout, 0);
3781 :
3782 6 : INIT_LIST_HEAD(&pool->workers);
3783 6 : INIT_LIST_HEAD(&pool->dying_workers);
3784 :
3785 6 : ida_init(&pool->worker_ida);
3786 6 : INIT_HLIST_NODE(&pool->hash_node);
3787 3 : pool->refcnt = 1;
3788 :
3789 : /* shouldn't fail above this point */
3790 3 : pool->attrs = alloc_workqueue_attrs();
3791 3 : if (!pool->attrs)
3792 : return -ENOMEM;
3793 3 : return 0;
3794 : }
3795 :
3796 : #ifdef CONFIG_LOCKDEP
3797 : static void wq_init_lockdep(struct workqueue_struct *wq)
3798 : {
3799 : char *lock_name;
3800 :
3801 : lockdep_register_key(&wq->key);
3802 : lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
3803 : if (!lock_name)
3804 : lock_name = wq->name;
3805 :
3806 : wq->lock_name = lock_name;
3807 : lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
3808 : }
3809 :
3810 : static void wq_unregister_lockdep(struct workqueue_struct *wq)
3811 : {
3812 : lockdep_unregister_key(&wq->key);
3813 : }
3814 :
3815 : static void wq_free_lockdep(struct workqueue_struct *wq)
3816 : {
3817 : if (wq->lock_name != wq->name)
3818 : kfree(wq->lock_name);
3819 : }
3820 : #else
3821 : static void wq_init_lockdep(struct workqueue_struct *wq)
3822 : {
3823 : }
3824 :
3825 : static void wq_unregister_lockdep(struct workqueue_struct *wq)
3826 : {
3827 : }
3828 :
3829 : static void wq_free_lockdep(struct workqueue_struct *wq)
3830 : {
3831 : }
3832 : #endif
3833 :
3834 0 : static void rcu_free_wq(struct rcu_head *rcu)
3835 : {
3836 0 : struct workqueue_struct *wq =
3837 0 : container_of(rcu, struct workqueue_struct, rcu);
3838 :
3839 0 : wq_free_lockdep(wq);
3840 :
3841 0 : if (!(wq->flags & WQ_UNBOUND))
3842 0 : free_percpu(wq->cpu_pwqs);
3843 : else
3844 0 : free_workqueue_attrs(wq->unbound_attrs);
3845 :
3846 0 : kfree(wq);
3847 0 : }
3848 :
3849 0 : static void rcu_free_pool(struct rcu_head *rcu)
3850 : {
3851 0 : struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3852 :
3853 0 : ida_destroy(&pool->worker_ida);
3854 0 : free_workqueue_attrs(pool->attrs);
3855 0 : kfree(pool);
3856 0 : }
3857 :
3858 : /**
3859 : * put_unbound_pool - put a worker_pool
3860 : * @pool: worker_pool to put
3861 : *
3862 : * Put @pool. If its refcnt reaches zero, it gets destroyed in RCU
3863 : * safe manner. get_unbound_pool() calls this function on its failure path
3864 : * and this function should be able to release pools which went through,
3865 : * successfully or not, init_worker_pool().
3866 : *
3867 : * Should be called with wq_pool_mutex held.
3868 : */
3869 0 : static void put_unbound_pool(struct worker_pool *pool)
3870 : {
3871 0 : DECLARE_COMPLETION_ONSTACK(detach_completion);
3872 : struct list_head cull_list;
3873 : struct worker *worker;
3874 :
3875 0 : INIT_LIST_HEAD(&cull_list);
3876 :
3877 : lockdep_assert_held(&wq_pool_mutex);
3878 :
3879 0 : if (--pool->refcnt)
3880 0 : return;
3881 :
3882 : /* sanity checks */
3883 0 : if (WARN_ON(!(pool->cpu < 0)) ||
3884 0 : WARN_ON(!list_empty(&pool->worklist)))
3885 : return;
3886 :
3887 : /* release id and unhash */
3888 0 : if (pool->id >= 0)
3889 0 : idr_remove(&worker_pool_idr, pool->id);
3890 0 : hash_del(&pool->hash_node);
3891 :
3892 : /*
3893 : * Become the manager and destroy all workers. This prevents
3894 : * @pool's workers from blocking on attach_mutex. We're the last
3895 : * manager and @pool gets freed with the flag set.
3896 : *
3897 : * Having a concurrent manager is quite unlikely to happen as we can
3898 : * only get here with
3899 : * pwq->refcnt == pool->refcnt == 0
3900 : * which implies no work queued to the pool, which implies no worker can
3901 : * become the manager. However a worker could have taken the role of
3902 : * manager before the refcnts dropped to 0, since maybe_create_worker()
3903 : * drops pool->lock
3904 : */
3905 : while (true) {
3906 0 : rcuwait_wait_event(&manager_wait,
3907 : !(pool->flags & POOL_MANAGER_ACTIVE),
3908 : TASK_UNINTERRUPTIBLE);
3909 :
3910 0 : mutex_lock(&wq_pool_attach_mutex);
3911 0 : raw_spin_lock_irq(&pool->lock);
3912 0 : if (!(pool->flags & POOL_MANAGER_ACTIVE)) {
3913 0 : pool->flags |= POOL_MANAGER_ACTIVE;
3914 : break;
3915 : }
3916 0 : raw_spin_unlock_irq(&pool->lock);
3917 0 : mutex_unlock(&wq_pool_attach_mutex);
3918 : }
3919 :
3920 0 : while ((worker = first_idle_worker(pool)))
3921 0 : set_worker_dying(worker, &cull_list);
3922 0 : WARN_ON(pool->nr_workers || pool->nr_idle);
3923 0 : raw_spin_unlock_irq(&pool->lock);
3924 :
3925 0 : wake_dying_workers(&cull_list);
3926 :
3927 0 : if (!list_empty(&pool->workers) || !list_empty(&pool->dying_workers))
3928 0 : pool->detach_completion = &detach_completion;
3929 0 : mutex_unlock(&wq_pool_attach_mutex);
3930 :
3931 0 : if (pool->detach_completion)
3932 0 : wait_for_completion(pool->detach_completion);
3933 :
3934 : /* shut down the timers */
3935 0 : del_timer_sync(&pool->idle_timer);
3936 0 : cancel_work_sync(&pool->idle_cull_work);
3937 0 : del_timer_sync(&pool->mayday_timer);
3938 :
3939 : /* RCU protected to allow dereferences from get_work_pool() */
3940 0 : call_rcu(&pool->rcu, rcu_free_pool);
3941 : }
3942 :
3943 : /**
3944 : * get_unbound_pool - get a worker_pool with the specified attributes
3945 : * @attrs: the attributes of the worker_pool to get
3946 : *
3947 : * Obtain a worker_pool which has the same attributes as @attrs, bump the
3948 : * reference count and return it. If there already is a matching
3949 : * worker_pool, it will be used; otherwise, this function attempts to
3950 : * create a new one.
3951 : *
3952 : * Should be called with wq_pool_mutex held.
3953 : *
3954 : * Return: On success, a worker_pool with the same attributes as @attrs.
3955 : * On failure, %NULL.
3956 : */
3957 3 : static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3958 : {
3959 3 : u32 hash = wqattrs_hash(attrs);
3960 : struct worker_pool *pool;
3961 : int node;
3962 3 : int target_node = NUMA_NO_NODE;
3963 :
3964 : lockdep_assert_held(&wq_pool_mutex);
3965 :
3966 : /* do we already have a matching pool? */
3967 3 : hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
3968 4 : if (wqattrs_equal(pool->attrs, attrs)) {
3969 2 : pool->refcnt++;
3970 2 : return pool;
3971 : }
3972 : }
3973 :
3974 : /* if cpumask is contained inside a NUMA node, we belong to that node */
3975 1 : if (wq_numa_enabled) {
3976 0 : for_each_node(node) {
3977 0 : if (cpumask_subset(attrs->cpumask,
3978 0 : wq_numa_possible_cpumask[node])) {
3979 : target_node = node;
3980 : break;
3981 : }
3982 : }
3983 : }
3984 :
3985 : /* nope, create a new one */
3986 1 : pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, target_node);
3987 1 : if (!pool || init_worker_pool(pool) < 0)
3988 : goto fail;
3989 :
3990 : lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
3991 2 : copy_workqueue_attrs(pool->attrs, attrs);
3992 1 : pool->node = target_node;
3993 :
3994 : /*
3995 : * no_numa isn't a worker_pool attribute, always clear it. See
3996 : * 'struct workqueue_attrs' comments for detail.
3997 : */
3998 1 : pool->attrs->no_numa = false;
3999 :
4000 1 : if (worker_pool_assign_id(pool) < 0)
4001 : goto fail;
4002 :
4003 : /* create and start the initial worker */
4004 1 : if (wq_online && !create_worker(pool))
4005 : goto fail;
4006 :
4007 : /* install */
4008 2 : hash_add(unbound_pool_hash, &pool->hash_node, hash);
4009 :
4010 1 : return pool;
4011 : fail:
4012 0 : if (pool)
4013 0 : put_unbound_pool(pool);
4014 : return NULL;
4015 : }
4016 :
4017 0 : static void rcu_free_pwq(struct rcu_head *rcu)
4018 : {
4019 0 : kmem_cache_free(pwq_cache,
4020 0 : container_of(rcu, struct pool_workqueue, rcu));
4021 0 : }
4022 :
4023 : /*
4024 : * Scheduled on system_wq by put_pwq() when an unbound pwq hits zero refcnt
4025 : * and needs to be destroyed.
4026 : */
4027 0 : static void pwq_unbound_release_workfn(struct work_struct *work)
4028 : {
4029 0 : struct pool_workqueue *pwq = container_of(work, struct pool_workqueue,
4030 : unbound_release_work);
4031 0 : struct workqueue_struct *wq = pwq->wq;
4032 0 : struct worker_pool *pool = pwq->pool;
4033 0 : bool is_last = false;
4034 :
4035 : /*
4036 : * when @pwq is not linked, it doesn't hold any reference to the
4037 : * @wq, and @wq is invalid to access.
4038 : */
4039 0 : if (!list_empty(&pwq->pwqs_node)) {
4040 0 : if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
4041 : return;
4042 :
4043 0 : mutex_lock(&wq->mutex);
4044 0 : list_del_rcu(&pwq->pwqs_node);
4045 0 : is_last = list_empty(&wq->pwqs);
4046 0 : mutex_unlock(&wq->mutex);
4047 : }
4048 :
4049 0 : mutex_lock(&wq_pool_mutex);
4050 0 : put_unbound_pool(pool);
4051 0 : mutex_unlock(&wq_pool_mutex);
4052 :
4053 0 : call_rcu(&pwq->rcu, rcu_free_pwq);
4054 :
4055 : /*
4056 : * If we're the last pwq going away, @wq is already dead and no one
4057 : * is gonna access it anymore. Schedule RCU free.
4058 : */
4059 0 : if (is_last) {
4060 0 : wq_unregister_lockdep(wq);
4061 0 : call_rcu(&wq->rcu, rcu_free_wq);
4062 : }
4063 : }
4064 :
4065 : /**
4066 : * pwq_adjust_max_active - update a pwq's max_active to the current setting
4067 : * @pwq: target pool_workqueue
4068 : *
4069 : * If @pwq isn't freezing, set @pwq->max_active to the associated
4070 : * workqueue's saved_max_active and activate inactive work items
4071 : * accordingly. If @pwq is freezing, clear @pwq->max_active to zero.
4072 : */
4073 28 : static void pwq_adjust_max_active(struct pool_workqueue *pwq)
4074 : {
4075 28 : struct workqueue_struct *wq = pwq->wq;
4076 28 : bool freezable = wq->flags & WQ_FREEZABLE;
4077 : unsigned long flags;
4078 :
4079 : /* for @wq->saved_max_active */
4080 : lockdep_assert_held(&wq->mutex);
4081 :
4082 : /* fast exit for non-freezable wqs */
4083 28 : if (!freezable && pwq->max_active == wq->saved_max_active)
4084 : return;
4085 :
4086 : /* this function can be called during early boot w/ irq disabled */
4087 17 : raw_spin_lock_irqsave(&pwq->pool->lock, flags);
4088 :
4089 : /*
4090 : * During [un]freezing, the caller is responsible for ensuring that
4091 : * this function is called at least once after @workqueue_freezing
4092 : * is updated and visible.
4093 : */
4094 17 : if (!freezable || !workqueue_freezing) {
4095 17 : bool kick = false;
4096 :
4097 17 : pwq->max_active = wq->saved_max_active;
4098 :
4099 51 : while (!list_empty(&pwq->inactive_works) &&
4100 0 : pwq->nr_active < pwq->max_active) {
4101 0 : pwq_activate_first_inactive(pwq);
4102 0 : kick = true;
4103 : }
4104 :
4105 : /*
4106 : * Need to kick a worker after thawed or an unbound wq's
4107 : * max_active is bumped. In realtime scenarios, always kicking a
4108 : * worker will cause interference on the isolated cpu cores, so
4109 : * let's kick iff work items were activated.
4110 : */
4111 17 : if (kick)
4112 0 : wake_up_worker(pwq->pool);
4113 : } else {
4114 0 : pwq->max_active = 0;
4115 : }
4116 :
4117 34 : raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
4118 : }
4119 :
4120 : /* initialize newly allocated @pwq which is associated with @wq and @pool */
4121 14 : static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,
4122 : struct worker_pool *pool)
4123 : {
4124 14 : BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
4125 :
4126 28 : memset(pwq, 0, sizeof(*pwq));
4127 :
4128 14 : pwq->pool = pool;
4129 14 : pwq->wq = wq;
4130 14 : pwq->flush_color = -1;
4131 14 : pwq->refcnt = 1;
4132 28 : INIT_LIST_HEAD(&pwq->inactive_works);
4133 28 : INIT_LIST_HEAD(&pwq->pwqs_node);
4134 28 : INIT_LIST_HEAD(&pwq->mayday_node);
4135 28 : INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn);
4136 14 : }
4137 :
4138 : /* sync @pwq with the current state of its associated wq and link it */
4139 17 : static void link_pwq(struct pool_workqueue *pwq)
4140 : {
4141 17 : struct workqueue_struct *wq = pwq->wq;
4142 :
4143 : lockdep_assert_held(&wq->mutex);
4144 :
4145 : /* may be called multiple times, ignore if already linked */
4146 34 : if (!list_empty(&pwq->pwqs_node))
4147 : return;
4148 :
4149 : /* set the matching work_color */
4150 14 : pwq->work_color = wq->work_color;
4151 :
4152 : /* sync max_active to the current setting */
4153 14 : pwq_adjust_max_active(pwq);
4154 :
4155 : /* link in @pwq */
4156 14 : list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
4157 : }
4158 :
4159 : /* obtain a pool matching @attr and create a pwq associating the pool and @wq */
4160 3 : static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
4161 : const struct workqueue_attrs *attrs)
4162 : {
4163 : struct worker_pool *pool;
4164 : struct pool_workqueue *pwq;
4165 :
4166 : lockdep_assert_held(&wq_pool_mutex);
4167 :
4168 3 : pool = get_unbound_pool(attrs);
4169 3 : if (!pool)
4170 : return NULL;
4171 :
4172 3 : pwq = kmem_cache_alloc_node(pwq_cache, GFP_KERNEL, pool->node);
4173 3 : if (!pwq) {
4174 0 : put_unbound_pool(pool);
4175 0 : return NULL;
4176 : }
4177 :
4178 3 : init_pwq(pwq, wq, pool);
4179 3 : return pwq;
4180 : }
4181 :
4182 : /**
4183 : * wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node
4184 : * @attrs: the wq_attrs of the default pwq of the target workqueue
4185 : * @node: the target NUMA node
4186 : * @cpu_going_down: if >= 0, the CPU to consider as offline
4187 : * @cpumask: outarg, the resulting cpumask
4188 : *
4189 : * Calculate the cpumask a workqueue with @attrs should use on @node. If
4190 : * @cpu_going_down is >= 0, that cpu is considered offline during
4191 : * calculation. The result is stored in @cpumask.
4192 : *
4193 : * If NUMA affinity is not enabled, @attrs->cpumask is always used. If
4194 : * enabled and @node has online CPUs requested by @attrs, the returned
4195 : * cpumask is the intersection of the possible CPUs of @node and
4196 : * @attrs->cpumask.
4197 : *
4198 : * The caller is responsible for ensuring that the cpumask of @node stays
4199 : * stable.
4200 : *
4201 : * Return: %true if the resulting @cpumask is different from @attrs->cpumask,
4202 : * %false if equal.
4203 : */
4204 3 : static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
4205 : int cpu_going_down, cpumask_t *cpumask)
4206 : {
4207 3 : if (!wq_numa_enabled || attrs->no_numa)
4208 : goto use_dfl;
4209 :
4210 : /* does @node have any online CPUs @attrs wants? */
4211 0 : cpumask_and(cpumask, cpumask_of_node(node), attrs->cpumask);
4212 0 : if (cpu_going_down >= 0)
4213 : cpumask_clear_cpu(cpu_going_down, cpumask);
4214 :
4215 0 : if (cpumask_empty(cpumask))
4216 : goto use_dfl;
4217 :
4218 : /* yeap, return possible CPUs in @node that @attrs wants */
4219 0 : cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
4220 :
4221 0 : if (cpumask_empty(cpumask)) {
4222 0 : pr_warn_once("WARNING: workqueue cpumask: online intersect > "
4223 : "possible intersect\n");
4224 : return false;
4225 : }
4226 :
4227 0 : return !cpumask_equal(cpumask, attrs->cpumask);
4228 :
4229 : use_dfl:
4230 6 : cpumask_copy(cpumask, attrs->cpumask);
4231 3 : return false;
4232 : }
4233 :
4234 : /* install @pwq into @wq's numa_pwq_tbl[] for @node and return the old pwq */
4235 : static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
4236 : int node,
4237 : struct pool_workqueue *pwq)
4238 : {
4239 : struct pool_workqueue *old_pwq;
4240 :
4241 : lockdep_assert_held(&wq_pool_mutex);
4242 : lockdep_assert_held(&wq->mutex);
4243 :
4244 : /* link_pwq() can handle duplicate calls */
4245 3 : link_pwq(pwq);
4246 :
4247 3 : old_pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4248 3 : rcu_assign_pointer(wq->numa_pwq_tbl[node], pwq);
4249 : return old_pwq;
4250 : }
4251 :
4252 : /* context to store the prepared attrs & pwqs before applying */
4253 : struct apply_wqattrs_ctx {
4254 : struct workqueue_struct *wq; /* target workqueue */
4255 : struct workqueue_attrs *attrs; /* attrs to apply */
4256 : struct list_head list; /* queued for batching commit */
4257 : struct pool_workqueue *dfl_pwq;
4258 : struct pool_workqueue *pwq_tbl[];
4259 : };
4260 :
4261 : /* free the resources after success or abort */
4262 3 : static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
4263 : {
4264 3 : if (ctx) {
4265 : int node;
4266 :
4267 3 : for_each_node(node)
4268 3 : put_pwq_unlocked(ctx->pwq_tbl[node]);
4269 3 : put_pwq_unlocked(ctx->dfl_pwq);
4270 :
4271 6 : free_workqueue_attrs(ctx->attrs);
4272 :
4273 3 : kfree(ctx);
4274 : }
4275 3 : }
4276 :
4277 : /* allocate the attrs and pwqs for later installation */
4278 : static struct apply_wqattrs_ctx *
4279 3 : apply_wqattrs_prepare(struct workqueue_struct *wq,
4280 : const struct workqueue_attrs *attrs,
4281 : const cpumask_var_t unbound_cpumask)
4282 : {
4283 : struct apply_wqattrs_ctx *ctx;
4284 : struct workqueue_attrs *new_attrs, *tmp_attrs;
4285 : int node;
4286 :
4287 : lockdep_assert_held(&wq_pool_mutex);
4288 :
4289 3 : ctx = kzalloc(struct_size(ctx, pwq_tbl, nr_node_ids), GFP_KERNEL);
4290 :
4291 3 : new_attrs = alloc_workqueue_attrs();
4292 3 : tmp_attrs = alloc_workqueue_attrs();
4293 3 : if (!ctx || !new_attrs || !tmp_attrs)
4294 : goto out_free;
4295 :
4296 : /*
4297 : * Calculate the attrs of the default pwq with unbound_cpumask
4298 : * which is wq_unbound_cpumask or to set to wq_unbound_cpumask.
4299 : * If the user configured cpumask doesn't overlap with the
4300 : * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
4301 : */
4302 3 : copy_workqueue_attrs(new_attrs, attrs);
4303 6 : cpumask_and(new_attrs->cpumask, new_attrs->cpumask, unbound_cpumask);
4304 6 : if (unlikely(cpumask_empty(new_attrs->cpumask)))
4305 0 : cpumask_copy(new_attrs->cpumask, unbound_cpumask);
4306 :
4307 : /*
4308 : * We may create multiple pwqs with differing cpumasks. Make a
4309 : * copy of @new_attrs which will be modified and used to obtain
4310 : * pools.
4311 : */
4312 3 : copy_workqueue_attrs(tmp_attrs, new_attrs);
4313 :
4314 : /*
4315 : * If something goes wrong during CPU up/down, we'll fall back to
4316 : * the default pwq covering whole @attrs->cpumask. Always create
4317 : * it even if we don't use it immediately.
4318 : */
4319 3 : ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
4320 3 : if (!ctx->dfl_pwq)
4321 : goto out_free;
4322 :
4323 6 : for_each_node(node) {
4324 3 : if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
4325 0 : ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
4326 0 : if (!ctx->pwq_tbl[node])
4327 : goto out_free;
4328 : } else {
4329 3 : ctx->dfl_pwq->refcnt++;
4330 3 : ctx->pwq_tbl[node] = ctx->dfl_pwq;
4331 : }
4332 : }
4333 :
4334 : /* save the user configured attrs and sanitize it. */
4335 3 : copy_workqueue_attrs(new_attrs, attrs);
4336 6 : cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
4337 3 : ctx->attrs = new_attrs;
4338 :
4339 3 : ctx->wq = wq;
4340 3 : free_workqueue_attrs(tmp_attrs);
4341 3 : return ctx;
4342 :
4343 : out_free:
4344 0 : free_workqueue_attrs(tmp_attrs);
4345 0 : free_workqueue_attrs(new_attrs);
4346 0 : apply_wqattrs_cleanup(ctx);
4347 0 : return NULL;
4348 : }
4349 :
4350 : /* set attrs and install prepared pwqs, @ctx points to old pwqs on return */
4351 3 : static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
4352 : {
4353 : int node;
4354 :
4355 : /* all pwqs have been created successfully, let's install'em */
4356 3 : mutex_lock(&ctx->wq->mutex);
4357 :
4358 6 : copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
4359 :
4360 : /* save the previous pwq and install the new one */
4361 6 : for_each_node(node)
4362 6 : ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
4363 : ctx->pwq_tbl[node]);
4364 :
4365 : /* @dfl_pwq might not have been used, ensure it's linked */
4366 3 : link_pwq(ctx->dfl_pwq);
4367 3 : swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
4368 :
4369 3 : mutex_unlock(&ctx->wq->mutex);
4370 3 : }
4371 :
4372 : static void apply_wqattrs_lock(void)
4373 : {
4374 : /* CPUs should stay stable across pwq creations and installations */
4375 : cpus_read_lock();
4376 0 : mutex_lock(&wq_pool_mutex);
4377 : }
4378 :
4379 : static void apply_wqattrs_unlock(void)
4380 : {
4381 0 : mutex_unlock(&wq_pool_mutex);
4382 : cpus_read_unlock();
4383 : }
4384 :
4385 3 : static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
4386 : const struct workqueue_attrs *attrs)
4387 : {
4388 : struct apply_wqattrs_ctx *ctx;
4389 :
4390 : /* only unbound workqueues can change attributes */
4391 3 : if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
4392 : return -EINVAL;
4393 :
4394 : /* creating multiple pwqs breaks ordering guarantee */
4395 6 : if (!list_empty(&wq->pwqs)) {
4396 0 : if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4397 : return -EINVAL;
4398 :
4399 0 : wq->flags &= ~__WQ_ORDERED;
4400 : }
4401 :
4402 3 : ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask);
4403 3 : if (!ctx)
4404 : return -ENOMEM;
4405 :
4406 : /* the ctx has been prepared successfully, let's commit it */
4407 3 : apply_wqattrs_commit(ctx);
4408 3 : apply_wqattrs_cleanup(ctx);
4409 :
4410 3 : return 0;
4411 : }
4412 :
4413 : /**
4414 : * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
4415 : * @wq: the target workqueue
4416 : * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
4417 : *
4418 : * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
4419 : * machines, this function maps a separate pwq to each NUMA node with
4420 : * possibles CPUs in @attrs->cpumask so that work items are affine to the
4421 : * NUMA node it was issued on. Older pwqs are released as in-flight work
4422 : * items finish. Note that a work item which repeatedly requeues itself
4423 : * back-to-back will stay on its current pwq.
4424 : *
4425 : * Performs GFP_KERNEL allocations.
4426 : *
4427 : * Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().
4428 : *
4429 : * Return: 0 on success and -errno on failure.
4430 : */
4431 3 : int apply_workqueue_attrs(struct workqueue_struct *wq,
4432 : const struct workqueue_attrs *attrs)
4433 : {
4434 : int ret;
4435 :
4436 : lockdep_assert_cpus_held();
4437 :
4438 3 : mutex_lock(&wq_pool_mutex);
4439 3 : ret = apply_workqueue_attrs_locked(wq, attrs);
4440 3 : mutex_unlock(&wq_pool_mutex);
4441 :
4442 3 : return ret;
4443 : }
4444 :
4445 : /**
4446 : * wq_update_unbound_numa - update NUMA affinity of a wq for CPU hot[un]plug
4447 : * @wq: the target workqueue
4448 : * @cpu: the CPU coming up or going down
4449 : * @online: whether @cpu is coming up or going down
4450 : *
4451 : * This function is to be called from %CPU_DOWN_PREPARE, %CPU_ONLINE and
4452 : * %CPU_DOWN_FAILED. @cpu is being hot[un]plugged, update NUMA affinity of
4453 : * @wq accordingly.
4454 : *
4455 : * If NUMA affinity can't be adjusted due to memory allocation failure, it
4456 : * falls back to @wq->dfl_pwq which may not be optimal but is always
4457 : * correct.
4458 : *
4459 : * Note that when the last allowed CPU of a NUMA node goes offline for a
4460 : * workqueue with a cpumask spanning multiple nodes, the workers which were
4461 : * already executing the work items for the workqueue will lose their CPU
4462 : * affinity and may execute on any CPU. This is similar to how per-cpu
4463 : * workqueues behave on CPU_DOWN. If a workqueue user wants strict
4464 : * affinity, it's the user's responsibility to flush the work item from
4465 : * CPU_DOWN_PREPARE.
4466 : */
4467 8 : static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4468 : bool online)
4469 : {
4470 8 : int node = cpu_to_node(cpu);
4471 8 : int cpu_off = online ? -1 : cpu;
4472 8 : struct pool_workqueue *old_pwq = NULL, *pwq;
4473 : struct workqueue_attrs *target_attrs;
4474 : cpumask_t *cpumask;
4475 :
4476 : lockdep_assert_held(&wq_pool_mutex);
4477 :
4478 8 : if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
4479 0 : wq->unbound_attrs->no_numa)
4480 : return;
4481 :
4482 : /*
4483 : * We don't wanna alloc/free wq_attrs for each wq for each CPU.
4484 : * Let's use a preallocated one. The following buf is protected by
4485 : * CPU hotplug exclusion.
4486 : */
4487 0 : target_attrs = wq_update_unbound_numa_attrs_buf;
4488 0 : cpumask = target_attrs->cpumask;
4489 :
4490 0 : copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
4491 0 : pwq = unbound_pwq_by_node(wq, node);
4492 :
4493 : /*
4494 : * Let's determine what needs to be done. If the target cpumask is
4495 : * different from the default pwq's, we need to compare it to @pwq's
4496 : * and create a new one if they don't match. If the target cpumask
4497 : * equals the default pwq's, the default pwq should be used.
4498 : */
4499 0 : if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
4500 0 : if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
4501 : return;
4502 : } else {
4503 : goto use_dfl_pwq;
4504 : }
4505 :
4506 : /* create a new pwq */
4507 0 : pwq = alloc_unbound_pwq(wq, target_attrs);
4508 0 : if (!pwq) {
4509 0 : pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4510 : wq->name);
4511 0 : goto use_dfl_pwq;
4512 : }
4513 :
4514 : /* Install the new pwq. */
4515 0 : mutex_lock(&wq->mutex);
4516 0 : old_pwq = numa_pwq_tbl_install(wq, node, pwq);
4517 0 : goto out_unlock;
4518 :
4519 : use_dfl_pwq:
4520 0 : mutex_lock(&wq->mutex);
4521 0 : raw_spin_lock_irq(&wq->dfl_pwq->pool->lock);
4522 0 : get_pwq(wq->dfl_pwq);
4523 0 : raw_spin_unlock_irq(&wq->dfl_pwq->pool->lock);
4524 0 : old_pwq = numa_pwq_tbl_install(wq, node, wq->dfl_pwq);
4525 : out_unlock:
4526 0 : mutex_unlock(&wq->mutex);
4527 0 : put_pwq_unlocked(old_pwq);
4528 : }
4529 :
4530 14 : static int alloc_and_link_pwqs(struct workqueue_struct *wq)
4531 : {
4532 14 : bool highpri = wq->flags & WQ_HIGHPRI;
4533 : int cpu, ret;
4534 :
4535 14 : if (!(wq->flags & WQ_UNBOUND)) {
4536 11 : wq->cpu_pwqs = alloc_percpu(struct pool_workqueue);
4537 11 : if (!wq->cpu_pwqs)
4538 : return -ENOMEM;
4539 :
4540 11 : for_each_possible_cpu(cpu) {
4541 11 : struct pool_workqueue *pwq =
4542 11 : per_cpu_ptr(wq->cpu_pwqs, cpu);
4543 11 : struct worker_pool *cpu_pools =
4544 11 : per_cpu(cpu_worker_pools, cpu);
4545 :
4546 11 : init_pwq(pwq, wq, &cpu_pools[highpri]);
4547 :
4548 11 : mutex_lock(&wq->mutex);
4549 11 : link_pwq(pwq);
4550 11 : mutex_unlock(&wq->mutex);
4551 : }
4552 : return 0;
4553 : }
4554 :
4555 : cpus_read_lock();
4556 3 : if (wq->flags & __WQ_ORDERED) {
4557 1 : ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
4558 : /* there should only be single pwq for ordering guarantee */
4559 1 : WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
4560 : wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
4561 : "ordering guarantee broken for workqueue %s\n", wq->name);
4562 : } else {
4563 2 : ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
4564 : }
4565 : cpus_read_unlock();
4566 :
4567 : return ret;
4568 : }
4569 :
4570 14 : static int wq_clamp_max_active(int max_active, unsigned int flags,
4571 : const char *name)
4572 : {
4573 14 : int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
4574 :
4575 14 : if (max_active < 1 || max_active > lim)
4576 0 : pr_warn("workqueue: max_active %d requested for %s is out of range, clamping between %d and %d\n",
4577 : max_active, name, 1, lim);
4578 :
4579 14 : return clamp_val(max_active, 1, lim);
4580 : }
4581 :
4582 : /*
4583 : * Workqueues which may be used during memory reclaim should have a rescuer
4584 : * to guarantee forward progress.
4585 : */
4586 14 : static int init_rescuer(struct workqueue_struct *wq)
4587 : {
4588 : struct worker *rescuer;
4589 : int ret;
4590 :
4591 14 : if (!(wq->flags & WQ_MEM_RECLAIM))
4592 : return 0;
4593 :
4594 4 : rescuer = alloc_worker(NUMA_NO_NODE);
4595 4 : if (!rescuer) {
4596 0 : pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n",
4597 : wq->name);
4598 0 : return -ENOMEM;
4599 : }
4600 :
4601 4 : rescuer->rescue_wq = wq;
4602 4 : rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
4603 8 : if (IS_ERR(rescuer->task)) {
4604 0 : ret = PTR_ERR(rescuer->task);
4605 0 : pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
4606 : wq->name, ERR_PTR(ret));
4607 0 : kfree(rescuer);
4608 0 : return ret;
4609 : }
4610 :
4611 4 : wq->rescuer = rescuer;
4612 4 : kthread_bind_mask(rescuer->task, cpu_possible_mask);
4613 4 : wake_up_process(rescuer->task);
4614 :
4615 4 : return 0;
4616 : }
4617 :
4618 : __printf(1, 4)
4619 14 : struct workqueue_struct *alloc_workqueue(const char *fmt,
4620 : unsigned int flags,
4621 : int max_active, ...)
4622 : {
4623 14 : size_t tbl_size = 0;
4624 : va_list args;
4625 : struct workqueue_struct *wq;
4626 : struct pool_workqueue *pwq;
4627 :
4628 : /*
4629 : * Unbound && max_active == 1 used to imply ordered, which is no
4630 : * longer the case on NUMA machines due to per-node pools. While
4631 : * alloc_ordered_workqueue() is the right way to create an ordered
4632 : * workqueue, keep the previous behavior to avoid subtle breakages
4633 : * on NUMA.
4634 : */
4635 14 : if ((flags & WQ_UNBOUND) && max_active == 1)
4636 1 : flags |= __WQ_ORDERED;
4637 :
4638 : /* see the comment above the definition of WQ_POWER_EFFICIENT */
4639 14 : if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
4640 0 : flags |= WQ_UNBOUND;
4641 :
4642 : /* allocate wq and format name */
4643 14 : if (flags & WQ_UNBOUND)
4644 3 : tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
4645 :
4646 14 : wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
4647 14 : if (!wq)
4648 : return NULL;
4649 :
4650 14 : if (flags & WQ_UNBOUND) {
4651 3 : wq->unbound_attrs = alloc_workqueue_attrs();
4652 3 : if (!wq->unbound_attrs)
4653 : goto err_free_wq;
4654 : }
4655 :
4656 14 : va_start(args, max_active);
4657 14 : vsnprintf(wq->name, sizeof(wq->name), fmt, args);
4658 14 : va_end(args);
4659 :
4660 14 : max_active = max_active ?: WQ_DFL_ACTIVE;
4661 14 : max_active = wq_clamp_max_active(max_active, flags, wq->name);
4662 :
4663 : /* init wq */
4664 14 : wq->flags = flags;
4665 14 : wq->saved_max_active = max_active;
4666 14 : mutex_init(&wq->mutex);
4667 28 : atomic_set(&wq->nr_pwqs_to_flush, 0);
4668 28 : INIT_LIST_HEAD(&wq->pwqs);
4669 28 : INIT_LIST_HEAD(&wq->flusher_queue);
4670 28 : INIT_LIST_HEAD(&wq->flusher_overflow);
4671 28 : INIT_LIST_HEAD(&wq->maydays);
4672 :
4673 14 : wq_init_lockdep(wq);
4674 28 : INIT_LIST_HEAD(&wq->list);
4675 :
4676 14 : if (alloc_and_link_pwqs(wq) < 0)
4677 : goto err_unreg_lockdep;
4678 :
4679 14 : if (wq_online && init_rescuer(wq) < 0)
4680 : goto err_destroy;
4681 :
4682 14 : if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
4683 : goto err_destroy;
4684 :
4685 : /*
4686 : * wq_pool_mutex protects global freeze state and workqueues list.
4687 : * Grab it, adjust max_active and add the new @wq to workqueues
4688 : * list.
4689 : */
4690 14 : mutex_lock(&wq_pool_mutex);
4691 :
4692 14 : mutex_lock(&wq->mutex);
4693 28 : for_each_pwq(pwq, wq)
4694 14 : pwq_adjust_max_active(pwq);
4695 14 : mutex_unlock(&wq->mutex);
4696 :
4697 28 : list_add_tail_rcu(&wq->list, &workqueues);
4698 :
4699 14 : mutex_unlock(&wq_pool_mutex);
4700 :
4701 14 : return wq;
4702 :
4703 : err_unreg_lockdep:
4704 : wq_unregister_lockdep(wq);
4705 : wq_free_lockdep(wq);
4706 : err_free_wq:
4707 0 : free_workqueue_attrs(wq->unbound_attrs);
4708 0 : kfree(wq);
4709 0 : return NULL;
4710 : err_destroy:
4711 0 : destroy_workqueue(wq);
4712 0 : return NULL;
4713 : }
4714 : EXPORT_SYMBOL_GPL(alloc_workqueue);
4715 :
4716 : static bool pwq_busy(struct pool_workqueue *pwq)
4717 : {
4718 : int i;
4719 :
4720 0 : for (i = 0; i < WORK_NR_COLORS; i++)
4721 0 : if (pwq->nr_in_flight[i])
4722 : return true;
4723 :
4724 0 : if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))
4725 : return true;
4726 0 : if (pwq->nr_active || !list_empty(&pwq->inactive_works))
4727 : return true;
4728 :
4729 : return false;
4730 : }
4731 :
4732 : /**
4733 : * destroy_workqueue - safely terminate a workqueue
4734 : * @wq: target workqueue
4735 : *
4736 : * Safely destroy a workqueue. All work currently pending will be done first.
4737 : */
4738 0 : void destroy_workqueue(struct workqueue_struct *wq)
4739 : {
4740 : struct pool_workqueue *pwq;
4741 : int node;
4742 :
4743 : /*
4744 : * Remove it from sysfs first so that sanity check failure doesn't
4745 : * lead to sysfs name conflicts.
4746 : */
4747 0 : workqueue_sysfs_unregister(wq);
4748 :
4749 : /* mark the workqueue destruction is in progress */
4750 0 : mutex_lock(&wq->mutex);
4751 0 : wq->flags |= __WQ_DESTROYING;
4752 0 : mutex_unlock(&wq->mutex);
4753 :
4754 : /* drain it before proceeding with destruction */
4755 0 : drain_workqueue(wq);
4756 :
4757 : /* kill rescuer, if sanity checks fail, leave it w/o rescuer */
4758 0 : if (wq->rescuer) {
4759 0 : struct worker *rescuer = wq->rescuer;
4760 :
4761 : /* this prevents new queueing */
4762 0 : raw_spin_lock_irq(&wq_mayday_lock);
4763 0 : wq->rescuer = NULL;
4764 0 : raw_spin_unlock_irq(&wq_mayday_lock);
4765 :
4766 : /* rescuer will empty maydays list before exiting */
4767 0 : kthread_stop(rescuer->task);
4768 0 : kfree(rescuer);
4769 : }
4770 :
4771 : /*
4772 : * Sanity checks - grab all the locks so that we wait for all
4773 : * in-flight operations which may do put_pwq().
4774 : */
4775 0 : mutex_lock(&wq_pool_mutex);
4776 0 : mutex_lock(&wq->mutex);
4777 0 : for_each_pwq(pwq, wq) {
4778 0 : raw_spin_lock_irq(&pwq->pool->lock);
4779 0 : if (WARN_ON(pwq_busy(pwq))) {
4780 0 : pr_warn("%s: %s has the following busy pwq\n",
4781 : __func__, wq->name);
4782 0 : show_pwq(pwq);
4783 0 : raw_spin_unlock_irq(&pwq->pool->lock);
4784 0 : mutex_unlock(&wq->mutex);
4785 0 : mutex_unlock(&wq_pool_mutex);
4786 0 : show_one_workqueue(wq);
4787 0 : return;
4788 : }
4789 0 : raw_spin_unlock_irq(&pwq->pool->lock);
4790 : }
4791 0 : mutex_unlock(&wq->mutex);
4792 :
4793 : /*
4794 : * wq list is used to freeze wq, remove from list after
4795 : * flushing is complete in case freeze races us.
4796 : */
4797 0 : list_del_rcu(&wq->list);
4798 0 : mutex_unlock(&wq_pool_mutex);
4799 :
4800 0 : if (!(wq->flags & WQ_UNBOUND)) {
4801 0 : wq_unregister_lockdep(wq);
4802 : /*
4803 : * The base ref is never dropped on per-cpu pwqs. Directly
4804 : * schedule RCU free.
4805 : */
4806 0 : call_rcu(&wq->rcu, rcu_free_wq);
4807 : } else {
4808 : /*
4809 : * We're the sole accessor of @wq at this point. Directly
4810 : * access numa_pwq_tbl[] and dfl_pwq to put the base refs.
4811 : * @wq will be freed when the last pwq is released.
4812 : */
4813 0 : for_each_node(node) {
4814 0 : pwq = rcu_access_pointer(wq->numa_pwq_tbl[node]);
4815 0 : RCU_INIT_POINTER(wq->numa_pwq_tbl[node], NULL);
4816 0 : put_pwq_unlocked(pwq);
4817 : }
4818 :
4819 : /*
4820 : * Put dfl_pwq. @wq may be freed any time after dfl_pwq is
4821 : * put. Don't access it afterwards.
4822 : */
4823 0 : pwq = wq->dfl_pwq;
4824 0 : wq->dfl_pwq = NULL;
4825 0 : put_pwq_unlocked(pwq);
4826 : }
4827 : }
4828 : EXPORT_SYMBOL_GPL(destroy_workqueue);
4829 :
4830 : /**
4831 : * workqueue_set_max_active - adjust max_active of a workqueue
4832 : * @wq: target workqueue
4833 : * @max_active: new max_active value.
4834 : *
4835 : * Set max_active of @wq to @max_active.
4836 : *
4837 : * CONTEXT:
4838 : * Don't call from IRQ context.
4839 : */
4840 0 : void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
4841 : {
4842 : struct pool_workqueue *pwq;
4843 :
4844 : /* disallow meddling with max_active for ordered workqueues */
4845 0 : if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
4846 : return;
4847 :
4848 0 : max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
4849 :
4850 0 : mutex_lock(&wq->mutex);
4851 :
4852 0 : wq->flags &= ~__WQ_ORDERED;
4853 0 : wq->saved_max_active = max_active;
4854 :
4855 0 : for_each_pwq(pwq, wq)
4856 0 : pwq_adjust_max_active(pwq);
4857 :
4858 0 : mutex_unlock(&wq->mutex);
4859 : }
4860 : EXPORT_SYMBOL_GPL(workqueue_set_max_active);
4861 :
4862 : /**
4863 : * current_work - retrieve %current task's work struct
4864 : *
4865 : * Determine if %current task is a workqueue worker and what it's working on.
4866 : * Useful to find out the context that the %current task is running in.
4867 : *
4868 : * Return: work struct if %current task is a workqueue worker, %NULL otherwise.
4869 : */
4870 0 : struct work_struct *current_work(void)
4871 : {
4872 0 : struct worker *worker = current_wq_worker();
4873 :
4874 0 : return worker ? worker->current_work : NULL;
4875 : }
4876 : EXPORT_SYMBOL(current_work);
4877 :
4878 : /**
4879 : * current_is_workqueue_rescuer - is %current workqueue rescuer?
4880 : *
4881 : * Determine whether %current is a workqueue rescuer. Can be used from
4882 : * work functions to determine whether it's being run off the rescuer task.
4883 : *
4884 : * Return: %true if %current is a workqueue rescuer. %false otherwise.
4885 : */
4886 0 : bool current_is_workqueue_rescuer(void)
4887 : {
4888 0 : struct worker *worker = current_wq_worker();
4889 :
4890 0 : return worker && worker->rescue_wq;
4891 : }
4892 :
4893 : /**
4894 : * workqueue_congested - test whether a workqueue is congested
4895 : * @cpu: CPU in question
4896 : * @wq: target workqueue
4897 : *
4898 : * Test whether @wq's cpu workqueue for @cpu is congested. There is
4899 : * no synchronization around this function and the test result is
4900 : * unreliable and only useful as advisory hints or for debugging.
4901 : *
4902 : * If @cpu is WORK_CPU_UNBOUND, the test is performed on the local CPU.
4903 : * Note that both per-cpu and unbound workqueues may be associated with
4904 : * multiple pool_workqueues which have separate congested states. A
4905 : * workqueue being congested on one CPU doesn't mean the workqueue is also
4906 : * contested on other CPUs / NUMA nodes.
4907 : *
4908 : * Return:
4909 : * %true if congested, %false otherwise.
4910 : */
4911 0 : bool workqueue_congested(int cpu, struct workqueue_struct *wq)
4912 : {
4913 : struct pool_workqueue *pwq;
4914 : bool ret;
4915 :
4916 : rcu_read_lock();
4917 0 : preempt_disable();
4918 :
4919 : if (cpu == WORK_CPU_UNBOUND)
4920 : cpu = smp_processor_id();
4921 :
4922 0 : if (!(wq->flags & WQ_UNBOUND))
4923 0 : pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
4924 : else
4925 0 : pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
4926 :
4927 0 : ret = !list_empty(&pwq->inactive_works);
4928 0 : preempt_enable();
4929 : rcu_read_unlock();
4930 :
4931 0 : return ret;
4932 : }
4933 : EXPORT_SYMBOL_GPL(workqueue_congested);
4934 :
4935 : /**
4936 : * work_busy - test whether a work is currently pending or running
4937 : * @work: the work to be tested
4938 : *
4939 : * Test whether @work is currently pending or running. There is no
4940 : * synchronization around this function and the test result is
4941 : * unreliable and only useful as advisory hints or for debugging.
4942 : *
4943 : * Return:
4944 : * OR'd bitmask of WORK_BUSY_* bits.
4945 : */
4946 0 : unsigned int work_busy(struct work_struct *work)
4947 : {
4948 : struct worker_pool *pool;
4949 : unsigned long flags;
4950 0 : unsigned int ret = 0;
4951 :
4952 0 : if (work_pending(work))
4953 0 : ret |= WORK_BUSY_PENDING;
4954 :
4955 : rcu_read_lock();
4956 0 : pool = get_work_pool(work);
4957 0 : if (pool) {
4958 0 : raw_spin_lock_irqsave(&pool->lock, flags);
4959 0 : if (find_worker_executing_work(pool, work))
4960 0 : ret |= WORK_BUSY_RUNNING;
4961 0 : raw_spin_unlock_irqrestore(&pool->lock, flags);
4962 : }
4963 : rcu_read_unlock();
4964 :
4965 0 : return ret;
4966 : }
4967 : EXPORT_SYMBOL_GPL(work_busy);
4968 :
4969 : /**
4970 : * set_worker_desc - set description for the current work item
4971 : * @fmt: printf-style format string
4972 : * @...: arguments for the format string
4973 : *
4974 : * This function can be called by a running work function to describe what
4975 : * the work item is about. If the worker task gets dumped, this
4976 : * information will be printed out together to help debugging. The
4977 : * description can be at most WORKER_DESC_LEN including the trailing '\0'.
4978 : */
4979 0 : void set_worker_desc(const char *fmt, ...)
4980 : {
4981 0 : struct worker *worker = current_wq_worker();
4982 : va_list args;
4983 :
4984 0 : if (worker) {
4985 0 : va_start(args, fmt);
4986 0 : vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
4987 0 : va_end(args);
4988 : }
4989 0 : }
4990 : EXPORT_SYMBOL_GPL(set_worker_desc);
4991 :
4992 : /**
4993 : * print_worker_info - print out worker information and description
4994 : * @log_lvl: the log level to use when printing
4995 : * @task: target task
4996 : *
4997 : * If @task is a worker and currently executing a work item, print out the
4998 : * name of the workqueue being serviced and worker description set with
4999 : * set_worker_desc() by the currently executing work item.
5000 : *
5001 : * This function can be safely called on any task as long as the
5002 : * task_struct itself is accessible. While safe, this function isn't
5003 : * synchronized and may print out mixups or garbages of limited length.
5004 : */
5005 1 : void print_worker_info(const char *log_lvl, struct task_struct *task)
5006 : {
5007 1 : work_func_t *fn = NULL;
5008 1 : char name[WQ_NAME_LEN] = { };
5009 1 : char desc[WORKER_DESC_LEN] = { };
5010 1 : struct pool_workqueue *pwq = NULL;
5011 1 : struct workqueue_struct *wq = NULL;
5012 : struct worker *worker;
5013 :
5014 1 : if (!(task->flags & PF_WQ_WORKER))
5015 1 : return;
5016 :
5017 : /*
5018 : * This function is called without any synchronization and @task
5019 : * could be in any state. Be careful with dereferences.
5020 : */
5021 0 : worker = kthread_probe_data(task);
5022 :
5023 : /*
5024 : * Carefully copy the associated workqueue's workfn, name and desc.
5025 : * Keep the original last '\0' in case the original is garbage.
5026 : */
5027 0 : copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
5028 0 : copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
5029 0 : copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
5030 0 : copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
5031 0 : copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
5032 :
5033 0 : if (fn || name[0] || desc[0]) {
5034 0 : printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
5035 0 : if (strcmp(name, desc))
5036 0 : pr_cont(" (%s)", desc);
5037 0 : pr_cont("\n");
5038 : }
5039 : }
5040 :
5041 0 : static void pr_cont_pool_info(struct worker_pool *pool)
5042 : {
5043 0 : pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
5044 0 : if (pool->node != NUMA_NO_NODE)
5045 0 : pr_cont(" node=%d", pool->node);
5046 0 : pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
5047 0 : }
5048 :
5049 : struct pr_cont_work_struct {
5050 : bool comma;
5051 : work_func_t func;
5052 : long ctr;
5053 : };
5054 :
5055 0 : static void pr_cont_work_flush(bool comma, work_func_t func, struct pr_cont_work_struct *pcwsp)
5056 : {
5057 0 : if (!pcwsp->ctr)
5058 : goto out_record;
5059 0 : if (func == pcwsp->func) {
5060 0 : pcwsp->ctr++;
5061 0 : return;
5062 : }
5063 0 : if (pcwsp->ctr == 1)
5064 0 : pr_cont("%s %ps", pcwsp->comma ? "," : "", pcwsp->func);
5065 : else
5066 0 : pr_cont("%s %ld*%ps", pcwsp->comma ? "," : "", pcwsp->ctr, pcwsp->func);
5067 0 : pcwsp->ctr = 0;
5068 : out_record:
5069 0 : if ((long)func == -1L)
5070 : return;
5071 0 : pcwsp->comma = comma;
5072 0 : pcwsp->func = func;
5073 0 : pcwsp->ctr = 1;
5074 : }
5075 :
5076 0 : static void pr_cont_work(bool comma, struct work_struct *work, struct pr_cont_work_struct *pcwsp)
5077 : {
5078 0 : if (work->func == wq_barrier_func) {
5079 : struct wq_barrier *barr;
5080 :
5081 0 : barr = container_of(work, struct wq_barrier, work);
5082 :
5083 0 : pr_cont_work_flush(comma, (work_func_t)-1, pcwsp);
5084 0 : pr_cont("%s BAR(%d)", comma ? "," : "",
5085 : task_pid_nr(barr->task));
5086 : } else {
5087 0 : if (!comma)
5088 0 : pr_cont_work_flush(comma, (work_func_t)-1, pcwsp);
5089 0 : pr_cont_work_flush(comma, work->func, pcwsp);
5090 : }
5091 0 : }
5092 :
5093 0 : static void show_pwq(struct pool_workqueue *pwq)
5094 : {
5095 0 : struct pr_cont_work_struct pcws = { .ctr = 0, };
5096 0 : struct worker_pool *pool = pwq->pool;
5097 : struct work_struct *work;
5098 : struct worker *worker;
5099 0 : bool has_in_flight = false, has_pending = false;
5100 : int bkt;
5101 :
5102 0 : pr_info(" pwq %d:", pool->id);
5103 0 : pr_cont_pool_info(pool);
5104 :
5105 0 : pr_cont(" active=%d/%d refcnt=%d%s\n",
5106 : pwq->nr_active, pwq->max_active, pwq->refcnt,
5107 : !list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
5108 :
5109 0 : hash_for_each(pool->busy_hash, bkt, worker, hentry) {
5110 0 : if (worker->current_pwq == pwq) {
5111 : has_in_flight = true;
5112 : break;
5113 : }
5114 : }
5115 0 : if (has_in_flight) {
5116 0 : bool comma = false;
5117 :
5118 0 : pr_info(" in-flight:");
5119 0 : hash_for_each(pool->busy_hash, bkt, worker, hentry) {
5120 0 : if (worker->current_pwq != pwq)
5121 0 : continue;
5122 :
5123 0 : pr_cont("%s %d%s:%ps", comma ? "," : "",
5124 : task_pid_nr(worker->task),
5125 : worker->rescue_wq ? "(RESCUER)" : "",
5126 : worker->current_func);
5127 0 : list_for_each_entry(work, &worker->scheduled, entry)
5128 0 : pr_cont_work(false, work, &pcws);
5129 0 : pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
5130 0 : comma = true;
5131 : }
5132 0 : pr_cont("\n");
5133 : }
5134 :
5135 0 : list_for_each_entry(work, &pool->worklist, entry) {
5136 0 : if (get_work_pwq(work) == pwq) {
5137 : has_pending = true;
5138 : break;
5139 : }
5140 : }
5141 0 : if (has_pending) {
5142 0 : bool comma = false;
5143 :
5144 0 : pr_info(" pending:");
5145 0 : list_for_each_entry(work, &pool->worklist, entry) {
5146 0 : if (get_work_pwq(work) != pwq)
5147 0 : continue;
5148 :
5149 0 : pr_cont_work(comma, work, &pcws);
5150 0 : comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
5151 : }
5152 0 : pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
5153 0 : pr_cont("\n");
5154 : }
5155 :
5156 0 : if (!list_empty(&pwq->inactive_works)) {
5157 0 : bool comma = false;
5158 :
5159 0 : pr_info(" inactive:");
5160 0 : list_for_each_entry(work, &pwq->inactive_works, entry) {
5161 0 : pr_cont_work(comma, work, &pcws);
5162 0 : comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
5163 : }
5164 0 : pr_cont_work_flush(comma, (work_func_t)-1L, &pcws);
5165 0 : pr_cont("\n");
5166 : }
5167 0 : }
5168 :
5169 : /**
5170 : * show_one_workqueue - dump state of specified workqueue
5171 : * @wq: workqueue whose state will be printed
5172 : */
5173 0 : void show_one_workqueue(struct workqueue_struct *wq)
5174 : {
5175 : struct pool_workqueue *pwq;
5176 0 : bool idle = true;
5177 : unsigned long flags;
5178 :
5179 0 : for_each_pwq(pwq, wq) {
5180 0 : if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
5181 : idle = false;
5182 : break;
5183 : }
5184 : }
5185 0 : if (idle) /* Nothing to print for idle workqueue */
5186 : return;
5187 :
5188 0 : pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
5189 :
5190 0 : for_each_pwq(pwq, wq) {
5191 0 : raw_spin_lock_irqsave(&pwq->pool->lock, flags);
5192 0 : if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
5193 : /*
5194 : * Defer printing to avoid deadlocks in console
5195 : * drivers that queue work while holding locks
5196 : * also taken in their write paths.
5197 : */
5198 0 : printk_deferred_enter();
5199 0 : show_pwq(pwq);
5200 0 : printk_deferred_exit();
5201 : }
5202 0 : raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
5203 : /*
5204 : * We could be printing a lot from atomic context, e.g.
5205 : * sysrq-t -> show_all_workqueues(). Avoid triggering
5206 : * hard lockup.
5207 : */
5208 : touch_nmi_watchdog();
5209 : }
5210 :
5211 : }
5212 :
5213 : /**
5214 : * show_one_worker_pool - dump state of specified worker pool
5215 : * @pool: worker pool whose state will be printed
5216 : */
5217 0 : static void show_one_worker_pool(struct worker_pool *pool)
5218 : {
5219 : struct worker *worker;
5220 0 : bool first = true;
5221 : unsigned long flags;
5222 0 : unsigned long hung = 0;
5223 :
5224 0 : raw_spin_lock_irqsave(&pool->lock, flags);
5225 0 : if (pool->nr_workers == pool->nr_idle)
5226 : goto next_pool;
5227 :
5228 : /* How long the first pending work is waiting for a worker. */
5229 0 : if (!list_empty(&pool->worklist))
5230 0 : hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000;
5231 :
5232 : /*
5233 : * Defer printing to avoid deadlocks in console drivers that
5234 : * queue work while holding locks also taken in their write
5235 : * paths.
5236 : */
5237 0 : printk_deferred_enter();
5238 0 : pr_info("pool %d:", pool->id);
5239 0 : pr_cont_pool_info(pool);
5240 0 : pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers);
5241 0 : if (pool->manager)
5242 0 : pr_cont(" manager: %d",
5243 : task_pid_nr(pool->manager->task));
5244 0 : list_for_each_entry(worker, &pool->idle_list, entry) {
5245 0 : pr_cont(" %s%d", first ? "idle: " : "",
5246 : task_pid_nr(worker->task));
5247 0 : first = false;
5248 : }
5249 0 : pr_cont("\n");
5250 0 : printk_deferred_exit();
5251 : next_pool:
5252 0 : raw_spin_unlock_irqrestore(&pool->lock, flags);
5253 : /*
5254 : * We could be printing a lot from atomic context, e.g.
5255 : * sysrq-t -> show_all_workqueues(). Avoid triggering
5256 : * hard lockup.
5257 : */
5258 : touch_nmi_watchdog();
5259 :
5260 0 : }
5261 :
5262 : /**
5263 : * show_all_workqueues - dump workqueue state
5264 : *
5265 : * Called from a sysrq handler and prints out all busy workqueues and pools.
5266 : */
5267 0 : void show_all_workqueues(void)
5268 : {
5269 : struct workqueue_struct *wq;
5270 : struct worker_pool *pool;
5271 : int pi;
5272 :
5273 : rcu_read_lock();
5274 :
5275 0 : pr_info("Showing busy workqueues and worker pools:\n");
5276 :
5277 0 : list_for_each_entry_rcu(wq, &workqueues, list)
5278 0 : show_one_workqueue(wq);
5279 :
5280 0 : for_each_pool(pool, pi)
5281 0 : show_one_worker_pool(pool);
5282 :
5283 : rcu_read_unlock();
5284 0 : }
5285 :
5286 : /**
5287 : * show_freezable_workqueues - dump freezable workqueue state
5288 : *
5289 : * Called from try_to_freeze_tasks() and prints out all freezable workqueues
5290 : * still busy.
5291 : */
5292 0 : void show_freezable_workqueues(void)
5293 : {
5294 : struct workqueue_struct *wq;
5295 :
5296 : rcu_read_lock();
5297 :
5298 0 : pr_info("Showing freezable workqueues that are still busy:\n");
5299 :
5300 0 : list_for_each_entry_rcu(wq, &workqueues, list) {
5301 0 : if (!(wq->flags & WQ_FREEZABLE))
5302 0 : continue;
5303 0 : show_one_workqueue(wq);
5304 : }
5305 :
5306 : rcu_read_unlock();
5307 0 : }
5308 :
5309 : /* used to show worker information through /proc/PID/{comm,stat,status} */
5310 0 : void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
5311 : {
5312 : int off;
5313 :
5314 : /* always show the actual comm */
5315 0 : off = strscpy(buf, task->comm, size);
5316 0 : if (off < 0)
5317 : return;
5318 :
5319 : /* stabilize PF_WQ_WORKER and worker pool association */
5320 0 : mutex_lock(&wq_pool_attach_mutex);
5321 :
5322 0 : if (task->flags & PF_WQ_WORKER) {
5323 0 : struct worker *worker = kthread_data(task);
5324 0 : struct worker_pool *pool = worker->pool;
5325 :
5326 0 : if (pool) {
5327 0 : raw_spin_lock_irq(&pool->lock);
5328 : /*
5329 : * ->desc tracks information (wq name or
5330 : * set_worker_desc()) for the latest execution. If
5331 : * current, prepend '+', otherwise '-'.
5332 : */
5333 0 : if (worker->desc[0] != '\0') {
5334 0 : if (worker->current_work)
5335 0 : scnprintf(buf + off, size - off, "+%s",
5336 0 : worker->desc);
5337 : else
5338 0 : scnprintf(buf + off, size - off, "-%s",
5339 0 : worker->desc);
5340 : }
5341 0 : raw_spin_unlock_irq(&pool->lock);
5342 : }
5343 : }
5344 :
5345 0 : mutex_unlock(&wq_pool_attach_mutex);
5346 : }
5347 :
5348 : #ifdef CONFIG_SMP
5349 :
5350 : /*
5351 : * CPU hotplug.
5352 : *
5353 : * There are two challenges in supporting CPU hotplug. Firstly, there
5354 : * are a lot of assumptions on strong associations among work, pwq and
5355 : * pool which make migrating pending and scheduled works very
5356 : * difficult to implement without impacting hot paths. Secondly,
5357 : * worker pools serve mix of short, long and very long running works making
5358 : * blocked draining impractical.
5359 : *
5360 : * This is solved by allowing the pools to be disassociated from the CPU
5361 : * running as an unbound one and allowing it to be reattached later if the
5362 : * cpu comes back online.
5363 : */
5364 :
5365 : static void unbind_workers(int cpu)
5366 : {
5367 : struct worker_pool *pool;
5368 : struct worker *worker;
5369 :
5370 : for_each_cpu_worker_pool(pool, cpu) {
5371 : mutex_lock(&wq_pool_attach_mutex);
5372 : raw_spin_lock_irq(&pool->lock);
5373 :
5374 : /*
5375 : * We've blocked all attach/detach operations. Make all workers
5376 : * unbound and set DISASSOCIATED. Before this, all workers
5377 : * must be on the cpu. After this, they may become diasporas.
5378 : * And the preemption disabled section in their sched callbacks
5379 : * are guaranteed to see WORKER_UNBOUND since the code here
5380 : * is on the same cpu.
5381 : */
5382 : for_each_pool_worker(worker, pool)
5383 : worker->flags |= WORKER_UNBOUND;
5384 :
5385 : pool->flags |= POOL_DISASSOCIATED;
5386 :
5387 : /*
5388 : * The handling of nr_running in sched callbacks are disabled
5389 : * now. Zap nr_running. After this, nr_running stays zero and
5390 : * need_more_worker() and keep_working() are always true as
5391 : * long as the worklist is not empty. This pool now behaves as
5392 : * an unbound (in terms of concurrency management) pool which
5393 : * are served by workers tied to the pool.
5394 : */
5395 : pool->nr_running = 0;
5396 :
5397 : /*
5398 : * With concurrency management just turned off, a busy
5399 : * worker blocking could lead to lengthy stalls. Kick off
5400 : * unbound chain execution of currently pending work items.
5401 : */
5402 : wake_up_worker(pool);
5403 :
5404 : raw_spin_unlock_irq(&pool->lock);
5405 :
5406 : for_each_pool_worker(worker, pool)
5407 : unbind_worker(worker);
5408 :
5409 : mutex_unlock(&wq_pool_attach_mutex);
5410 : }
5411 : }
5412 :
5413 : /**
5414 : * rebind_workers - rebind all workers of a pool to the associated CPU
5415 : * @pool: pool of interest
5416 : *
5417 : * @pool->cpu is coming online. Rebind all workers to the CPU.
5418 : */
5419 : static void rebind_workers(struct worker_pool *pool)
5420 : {
5421 : struct worker *worker;
5422 :
5423 : lockdep_assert_held(&wq_pool_attach_mutex);
5424 :
5425 : /*
5426 : * Restore CPU affinity of all workers. As all idle workers should
5427 : * be on the run-queue of the associated CPU before any local
5428 : * wake-ups for concurrency management happen, restore CPU affinity
5429 : * of all workers first and then clear UNBOUND. As we're called
5430 : * from CPU_ONLINE, the following shouldn't fail.
5431 : */
5432 : for_each_pool_worker(worker, pool) {
5433 : kthread_set_per_cpu(worker->task, pool->cpu);
5434 : WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
5435 : pool->attrs->cpumask) < 0);
5436 : }
5437 :
5438 : raw_spin_lock_irq(&pool->lock);
5439 :
5440 : pool->flags &= ~POOL_DISASSOCIATED;
5441 :
5442 : for_each_pool_worker(worker, pool) {
5443 : unsigned int worker_flags = worker->flags;
5444 :
5445 : /*
5446 : * We want to clear UNBOUND but can't directly call
5447 : * worker_clr_flags() or adjust nr_running. Atomically
5448 : * replace UNBOUND with another NOT_RUNNING flag REBOUND.
5449 : * @worker will clear REBOUND using worker_clr_flags() when
5450 : * it initiates the next execution cycle thus restoring
5451 : * concurrency management. Note that when or whether
5452 : * @worker clears REBOUND doesn't affect correctness.
5453 : *
5454 : * WRITE_ONCE() is necessary because @worker->flags may be
5455 : * tested without holding any lock in
5456 : * wq_worker_running(). Without it, NOT_RUNNING test may
5457 : * fail incorrectly leading to premature concurrency
5458 : * management operations.
5459 : */
5460 : WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
5461 : worker_flags |= WORKER_REBOUND;
5462 : worker_flags &= ~WORKER_UNBOUND;
5463 : WRITE_ONCE(worker->flags, worker_flags);
5464 : }
5465 :
5466 : raw_spin_unlock_irq(&pool->lock);
5467 : }
5468 :
5469 : /**
5470 : * restore_unbound_workers_cpumask - restore cpumask of unbound workers
5471 : * @pool: unbound pool of interest
5472 : * @cpu: the CPU which is coming up
5473 : *
5474 : * An unbound pool may end up with a cpumask which doesn't have any online
5475 : * CPUs. When a worker of such pool get scheduled, the scheduler resets
5476 : * its cpus_allowed. If @cpu is in @pool's cpumask which didn't have any
5477 : * online CPU before, cpus_allowed of all its workers should be restored.
5478 : */
5479 : static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
5480 : {
5481 : static cpumask_t cpumask;
5482 : struct worker *worker;
5483 :
5484 : lockdep_assert_held(&wq_pool_attach_mutex);
5485 :
5486 : /* is @cpu allowed for @pool? */
5487 : if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
5488 : return;
5489 :
5490 : cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
5491 :
5492 : /* as we're called from CPU_ONLINE, the following shouldn't fail */
5493 : for_each_pool_worker(worker, pool)
5494 : WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, &cpumask) < 0);
5495 : }
5496 :
5497 : int workqueue_prepare_cpu(unsigned int cpu)
5498 : {
5499 : struct worker_pool *pool;
5500 :
5501 : for_each_cpu_worker_pool(pool, cpu) {
5502 : if (pool->nr_workers)
5503 : continue;
5504 : if (!create_worker(pool))
5505 : return -ENOMEM;
5506 : }
5507 : return 0;
5508 : }
5509 :
5510 : int workqueue_online_cpu(unsigned int cpu)
5511 : {
5512 : struct worker_pool *pool;
5513 : struct workqueue_struct *wq;
5514 : int pi;
5515 :
5516 : mutex_lock(&wq_pool_mutex);
5517 :
5518 : for_each_pool(pool, pi) {
5519 : mutex_lock(&wq_pool_attach_mutex);
5520 :
5521 : if (pool->cpu == cpu)
5522 : rebind_workers(pool);
5523 : else if (pool->cpu < 0)
5524 : restore_unbound_workers_cpumask(pool, cpu);
5525 :
5526 : mutex_unlock(&wq_pool_attach_mutex);
5527 : }
5528 :
5529 : /* update NUMA affinity of unbound workqueues */
5530 : list_for_each_entry(wq, &workqueues, list)
5531 : wq_update_unbound_numa(wq, cpu, true);
5532 :
5533 : mutex_unlock(&wq_pool_mutex);
5534 : return 0;
5535 : }
5536 :
5537 : int workqueue_offline_cpu(unsigned int cpu)
5538 : {
5539 : struct workqueue_struct *wq;
5540 :
5541 : /* unbinding per-cpu workers should happen on the local CPU */
5542 : if (WARN_ON(cpu != smp_processor_id()))
5543 : return -1;
5544 :
5545 : unbind_workers(cpu);
5546 :
5547 : /* update NUMA affinity of unbound workqueues */
5548 : mutex_lock(&wq_pool_mutex);
5549 : list_for_each_entry(wq, &workqueues, list)
5550 : wq_update_unbound_numa(wq, cpu, false);
5551 : mutex_unlock(&wq_pool_mutex);
5552 :
5553 : return 0;
5554 : }
5555 :
5556 : struct work_for_cpu {
5557 : struct work_struct work;
5558 : long (*fn)(void *);
5559 : void *arg;
5560 : long ret;
5561 : };
5562 :
5563 : static void work_for_cpu_fn(struct work_struct *work)
5564 : {
5565 : struct work_for_cpu *wfc = container_of(work, struct work_for_cpu, work);
5566 :
5567 : wfc->ret = wfc->fn(wfc->arg);
5568 : }
5569 :
5570 : /**
5571 : * work_on_cpu - run a function in thread context on a particular cpu
5572 : * @cpu: the cpu to run on
5573 : * @fn: the function to run
5574 : * @arg: the function arg
5575 : *
5576 : * It is up to the caller to ensure that the cpu doesn't go offline.
5577 : * The caller must not hold any locks which would prevent @fn from completing.
5578 : *
5579 : * Return: The value @fn returns.
5580 : */
5581 : long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
5582 : {
5583 : struct work_for_cpu wfc = { .fn = fn, .arg = arg };
5584 :
5585 : INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
5586 : schedule_work_on(cpu, &wfc.work);
5587 : flush_work(&wfc.work);
5588 : destroy_work_on_stack(&wfc.work);
5589 : return wfc.ret;
5590 : }
5591 : EXPORT_SYMBOL_GPL(work_on_cpu);
5592 :
5593 : /**
5594 : * work_on_cpu_safe - run a function in thread context on a particular cpu
5595 : * @cpu: the cpu to run on
5596 : * @fn: the function to run
5597 : * @arg: the function argument
5598 : *
5599 : * Disables CPU hotplug and calls work_on_cpu(). The caller must not hold
5600 : * any locks which would prevent @fn from completing.
5601 : *
5602 : * Return: The value @fn returns.
5603 : */
5604 : long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)
5605 : {
5606 : long ret = -ENODEV;
5607 :
5608 : cpus_read_lock();
5609 : if (cpu_online(cpu))
5610 : ret = work_on_cpu(cpu, fn, arg);
5611 : cpus_read_unlock();
5612 : return ret;
5613 : }
5614 : EXPORT_SYMBOL_GPL(work_on_cpu_safe);
5615 : #endif /* CONFIG_SMP */
5616 :
5617 : #ifdef CONFIG_FREEZER
5618 :
5619 : /**
5620 : * freeze_workqueues_begin - begin freezing workqueues
5621 : *
5622 : * Start freezing workqueues. After this function returns, all freezable
5623 : * workqueues will queue new works to their inactive_works list instead of
5624 : * pool->worklist.
5625 : *
5626 : * CONTEXT:
5627 : * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
5628 : */
5629 0 : void freeze_workqueues_begin(void)
5630 : {
5631 : struct workqueue_struct *wq;
5632 : struct pool_workqueue *pwq;
5633 :
5634 0 : mutex_lock(&wq_pool_mutex);
5635 :
5636 0 : WARN_ON_ONCE(workqueue_freezing);
5637 0 : workqueue_freezing = true;
5638 :
5639 0 : list_for_each_entry(wq, &workqueues, list) {
5640 0 : mutex_lock(&wq->mutex);
5641 0 : for_each_pwq(pwq, wq)
5642 0 : pwq_adjust_max_active(pwq);
5643 0 : mutex_unlock(&wq->mutex);
5644 : }
5645 :
5646 0 : mutex_unlock(&wq_pool_mutex);
5647 0 : }
5648 :
5649 : /**
5650 : * freeze_workqueues_busy - are freezable workqueues still busy?
5651 : *
5652 : * Check whether freezing is complete. This function must be called
5653 : * between freeze_workqueues_begin() and thaw_workqueues().
5654 : *
5655 : * CONTEXT:
5656 : * Grabs and releases wq_pool_mutex.
5657 : *
5658 : * Return:
5659 : * %true if some freezable workqueues are still busy. %false if freezing
5660 : * is complete.
5661 : */
5662 0 : bool freeze_workqueues_busy(void)
5663 : {
5664 0 : bool busy = false;
5665 : struct workqueue_struct *wq;
5666 : struct pool_workqueue *pwq;
5667 :
5668 0 : mutex_lock(&wq_pool_mutex);
5669 :
5670 0 : WARN_ON_ONCE(!workqueue_freezing);
5671 :
5672 0 : list_for_each_entry(wq, &workqueues, list) {
5673 0 : if (!(wq->flags & WQ_FREEZABLE))
5674 0 : continue;
5675 : /*
5676 : * nr_active is monotonically decreasing. It's safe
5677 : * to peek without lock.
5678 : */
5679 : rcu_read_lock();
5680 0 : for_each_pwq(pwq, wq) {
5681 0 : WARN_ON_ONCE(pwq->nr_active < 0);
5682 0 : if (pwq->nr_active) {
5683 0 : busy = true;
5684 : rcu_read_unlock();
5685 : goto out_unlock;
5686 : }
5687 : }
5688 : rcu_read_unlock();
5689 : }
5690 : out_unlock:
5691 0 : mutex_unlock(&wq_pool_mutex);
5692 0 : return busy;
5693 : }
5694 :
5695 : /**
5696 : * thaw_workqueues - thaw workqueues
5697 : *
5698 : * Thaw workqueues. Normal queueing is restored and all collected
5699 : * frozen works are transferred to their respective pool worklists.
5700 : *
5701 : * CONTEXT:
5702 : * Grabs and releases wq_pool_mutex, wq->mutex and pool->lock's.
5703 : */
5704 0 : void thaw_workqueues(void)
5705 : {
5706 : struct workqueue_struct *wq;
5707 : struct pool_workqueue *pwq;
5708 :
5709 0 : mutex_lock(&wq_pool_mutex);
5710 :
5711 0 : if (!workqueue_freezing)
5712 : goto out_unlock;
5713 :
5714 0 : workqueue_freezing = false;
5715 :
5716 : /* restore max_active and repopulate worklist */
5717 0 : list_for_each_entry(wq, &workqueues, list) {
5718 0 : mutex_lock(&wq->mutex);
5719 0 : for_each_pwq(pwq, wq)
5720 0 : pwq_adjust_max_active(pwq);
5721 0 : mutex_unlock(&wq->mutex);
5722 : }
5723 :
5724 : out_unlock:
5725 0 : mutex_unlock(&wq_pool_mutex);
5726 0 : }
5727 : #endif /* CONFIG_FREEZER */
5728 :
5729 0 : static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
5730 : {
5731 0 : LIST_HEAD(ctxs);
5732 0 : int ret = 0;
5733 : struct workqueue_struct *wq;
5734 : struct apply_wqattrs_ctx *ctx, *n;
5735 :
5736 : lockdep_assert_held(&wq_pool_mutex);
5737 :
5738 0 : list_for_each_entry(wq, &workqueues, list) {
5739 0 : if (!(wq->flags & WQ_UNBOUND))
5740 0 : continue;
5741 : /* creating multiple pwqs breaks ordering guarantee */
5742 0 : if (wq->flags & __WQ_ORDERED)
5743 0 : continue;
5744 :
5745 0 : ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask);
5746 0 : if (!ctx) {
5747 : ret = -ENOMEM;
5748 : break;
5749 : }
5750 :
5751 0 : list_add_tail(&ctx->list, &ctxs);
5752 : }
5753 :
5754 0 : list_for_each_entry_safe(ctx, n, &ctxs, list) {
5755 0 : if (!ret)
5756 0 : apply_wqattrs_commit(ctx);
5757 0 : apply_wqattrs_cleanup(ctx);
5758 : }
5759 :
5760 0 : if (!ret) {
5761 0 : mutex_lock(&wq_pool_attach_mutex);
5762 0 : cpumask_copy(wq_unbound_cpumask, unbound_cpumask);
5763 0 : mutex_unlock(&wq_pool_attach_mutex);
5764 : }
5765 0 : return ret;
5766 : }
5767 :
5768 : /**
5769 : * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
5770 : * @cpumask: the cpumask to set
5771 : *
5772 : * The low-level workqueues cpumask is a global cpumask that limits
5773 : * the affinity of all unbound workqueues. This function check the @cpumask
5774 : * and apply it to all unbound workqueues and updates all pwqs of them.
5775 : *
5776 : * Return: 0 - Success
5777 : * -EINVAL - Invalid @cpumask
5778 : * -ENOMEM - Failed to allocate memory for attrs or pwqs.
5779 : */
5780 0 : int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
5781 : {
5782 0 : int ret = -EINVAL;
5783 :
5784 : /*
5785 : * Not excluding isolated cpus on purpose.
5786 : * If the user wishes to include them, we allow that.
5787 : */
5788 0 : cpumask_and(cpumask, cpumask, cpu_possible_mask);
5789 0 : if (!cpumask_empty(cpumask)) {
5790 0 : apply_wqattrs_lock();
5791 0 : if (cpumask_equal(cpumask, wq_unbound_cpumask)) {
5792 : ret = 0;
5793 : goto out_unlock;
5794 : }
5795 :
5796 0 : ret = workqueue_apply_unbound_cpumask(cpumask);
5797 :
5798 : out_unlock:
5799 : apply_wqattrs_unlock();
5800 : }
5801 :
5802 0 : return ret;
5803 : }
5804 :
5805 : #ifdef CONFIG_SYSFS
5806 : /*
5807 : * Workqueues with WQ_SYSFS flag set is visible to userland via
5808 : * /sys/bus/workqueue/devices/WQ_NAME. All visible workqueues have the
5809 : * following attributes.
5810 : *
5811 : * per_cpu RO bool : whether the workqueue is per-cpu or unbound
5812 : * max_active RW int : maximum number of in-flight work items
5813 : *
5814 : * Unbound workqueues have the following extra attributes.
5815 : *
5816 : * pool_ids RO int : the associated pool IDs for each node
5817 : * nice RW int : nice value of the workers
5818 : * cpumask RW mask : bitmask of allowed CPUs for the workers
5819 : * numa RW bool : whether enable NUMA affinity
5820 : */
5821 : struct wq_device {
5822 : struct workqueue_struct *wq;
5823 : struct device dev;
5824 : };
5825 :
5826 : static struct workqueue_struct *dev_to_wq(struct device *dev)
5827 : {
5828 0 : struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
5829 :
5830 0 : return wq_dev->wq;
5831 : }
5832 :
5833 0 : static ssize_t per_cpu_show(struct device *dev, struct device_attribute *attr,
5834 : char *buf)
5835 : {
5836 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5837 :
5838 0 : return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
5839 : }
5840 : static DEVICE_ATTR_RO(per_cpu);
5841 :
5842 0 : static ssize_t max_active_show(struct device *dev,
5843 : struct device_attribute *attr, char *buf)
5844 : {
5845 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5846 :
5847 0 : return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
5848 : }
5849 :
5850 0 : static ssize_t max_active_store(struct device *dev,
5851 : struct device_attribute *attr, const char *buf,
5852 : size_t count)
5853 : {
5854 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5855 : int val;
5856 :
5857 0 : if (sscanf(buf, "%d", &val) != 1 || val <= 0)
5858 : return -EINVAL;
5859 :
5860 0 : workqueue_set_max_active(wq, val);
5861 0 : return count;
5862 : }
5863 : static DEVICE_ATTR_RW(max_active);
5864 :
5865 : static struct attribute *wq_sysfs_attrs[] = {
5866 : &dev_attr_per_cpu.attr,
5867 : &dev_attr_max_active.attr,
5868 : NULL,
5869 : };
5870 : ATTRIBUTE_GROUPS(wq_sysfs);
5871 :
5872 0 : static ssize_t wq_pool_ids_show(struct device *dev,
5873 : struct device_attribute *attr, char *buf)
5874 : {
5875 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5876 0 : const char *delim = "";
5877 0 : int node, written = 0;
5878 :
5879 : cpus_read_lock();
5880 : rcu_read_lock();
5881 0 : for_each_node(node) {
5882 0 : written += scnprintf(buf + written, PAGE_SIZE - written,
5883 : "%s%d:%d", delim, node,
5884 0 : unbound_pwq_by_node(wq, node)->pool->id);
5885 0 : delim = " ";
5886 : }
5887 0 : written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
5888 : rcu_read_unlock();
5889 : cpus_read_unlock();
5890 :
5891 0 : return written;
5892 : }
5893 :
5894 0 : static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
5895 : char *buf)
5896 : {
5897 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5898 : int written;
5899 :
5900 0 : mutex_lock(&wq->mutex);
5901 0 : written = scnprintf(buf, PAGE_SIZE, "%d\n", wq->unbound_attrs->nice);
5902 0 : mutex_unlock(&wq->mutex);
5903 :
5904 0 : return written;
5905 : }
5906 :
5907 : /* prepare workqueue_attrs for sysfs store operations */
5908 0 : static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
5909 : {
5910 : struct workqueue_attrs *attrs;
5911 :
5912 : lockdep_assert_held(&wq_pool_mutex);
5913 :
5914 0 : attrs = alloc_workqueue_attrs();
5915 0 : if (!attrs)
5916 : return NULL;
5917 :
5918 0 : copy_workqueue_attrs(attrs, wq->unbound_attrs);
5919 0 : return attrs;
5920 : }
5921 :
5922 0 : static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
5923 : const char *buf, size_t count)
5924 : {
5925 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5926 : struct workqueue_attrs *attrs;
5927 0 : int ret = -ENOMEM;
5928 :
5929 : apply_wqattrs_lock();
5930 :
5931 0 : attrs = wq_sysfs_prep_attrs(wq);
5932 0 : if (!attrs)
5933 : goto out_unlock;
5934 :
5935 0 : if (sscanf(buf, "%d", &attrs->nice) == 1 &&
5936 0 : attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
5937 0 : ret = apply_workqueue_attrs_locked(wq, attrs);
5938 : else
5939 : ret = -EINVAL;
5940 :
5941 : out_unlock:
5942 0 : apply_wqattrs_unlock();
5943 0 : free_workqueue_attrs(attrs);
5944 0 : return ret ?: count;
5945 : }
5946 :
5947 0 : static ssize_t wq_cpumask_show(struct device *dev,
5948 : struct device_attribute *attr, char *buf)
5949 : {
5950 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5951 : int written;
5952 :
5953 0 : mutex_lock(&wq->mutex);
5954 0 : written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5955 0 : cpumask_pr_args(wq->unbound_attrs->cpumask));
5956 0 : mutex_unlock(&wq->mutex);
5957 0 : return written;
5958 : }
5959 :
5960 0 : static ssize_t wq_cpumask_store(struct device *dev,
5961 : struct device_attribute *attr,
5962 : const char *buf, size_t count)
5963 : {
5964 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5965 : struct workqueue_attrs *attrs;
5966 0 : int ret = -ENOMEM;
5967 :
5968 : apply_wqattrs_lock();
5969 :
5970 0 : attrs = wq_sysfs_prep_attrs(wq);
5971 0 : if (!attrs)
5972 : goto out_unlock;
5973 :
5974 0 : ret = cpumask_parse(buf, attrs->cpumask);
5975 0 : if (!ret)
5976 0 : ret = apply_workqueue_attrs_locked(wq, attrs);
5977 :
5978 : out_unlock:
5979 0 : apply_wqattrs_unlock();
5980 0 : free_workqueue_attrs(attrs);
5981 0 : return ret ?: count;
5982 : }
5983 :
5984 0 : static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
5985 : char *buf)
5986 : {
5987 0 : struct workqueue_struct *wq = dev_to_wq(dev);
5988 : int written;
5989 :
5990 0 : mutex_lock(&wq->mutex);
5991 0 : written = scnprintf(buf, PAGE_SIZE, "%d\n",
5992 0 : !wq->unbound_attrs->no_numa);
5993 0 : mutex_unlock(&wq->mutex);
5994 :
5995 0 : return written;
5996 : }
5997 :
5998 0 : static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
5999 : const char *buf, size_t count)
6000 : {
6001 0 : struct workqueue_struct *wq = dev_to_wq(dev);
6002 : struct workqueue_attrs *attrs;
6003 0 : int v, ret = -ENOMEM;
6004 :
6005 : apply_wqattrs_lock();
6006 :
6007 0 : attrs = wq_sysfs_prep_attrs(wq);
6008 0 : if (!attrs)
6009 : goto out_unlock;
6010 :
6011 0 : ret = -EINVAL;
6012 0 : if (sscanf(buf, "%d", &v) == 1) {
6013 0 : attrs->no_numa = !v;
6014 0 : ret = apply_workqueue_attrs_locked(wq, attrs);
6015 : }
6016 :
6017 : out_unlock:
6018 0 : apply_wqattrs_unlock();
6019 0 : free_workqueue_attrs(attrs);
6020 0 : return ret ?: count;
6021 : }
6022 :
6023 : static struct device_attribute wq_sysfs_unbound_attrs[] = {
6024 : __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
6025 : __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
6026 : __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
6027 : __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
6028 : __ATTR_NULL,
6029 : };
6030 :
6031 : static struct bus_type wq_subsys = {
6032 : .name = "workqueue",
6033 : .dev_groups = wq_sysfs_groups,
6034 : };
6035 :
6036 0 : static ssize_t wq_unbound_cpumask_show(struct device *dev,
6037 : struct device_attribute *attr, char *buf)
6038 : {
6039 : int written;
6040 :
6041 0 : mutex_lock(&wq_pool_mutex);
6042 0 : written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
6043 : cpumask_pr_args(wq_unbound_cpumask));
6044 0 : mutex_unlock(&wq_pool_mutex);
6045 :
6046 0 : return written;
6047 : }
6048 :
6049 0 : static ssize_t wq_unbound_cpumask_store(struct device *dev,
6050 : struct device_attribute *attr, const char *buf, size_t count)
6051 : {
6052 : cpumask_var_t cpumask;
6053 : int ret;
6054 :
6055 0 : if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
6056 : return -ENOMEM;
6057 :
6058 0 : ret = cpumask_parse(buf, cpumask);
6059 0 : if (!ret)
6060 0 : ret = workqueue_set_unbound_cpumask(cpumask);
6061 :
6062 0 : free_cpumask_var(cpumask);
6063 0 : return ret ? ret : count;
6064 : }
6065 :
6066 : static struct device_attribute wq_sysfs_cpumask_attr =
6067 : __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
6068 : wq_unbound_cpumask_store);
6069 :
6070 1 : static int __init wq_sysfs_init(void)
6071 : {
6072 : struct device *dev_root;
6073 : int err;
6074 :
6075 1 : err = subsys_virtual_register(&wq_subsys, NULL);
6076 1 : if (err)
6077 : return err;
6078 :
6079 1 : dev_root = bus_get_dev_root(&wq_subsys);
6080 1 : if (dev_root) {
6081 1 : err = device_create_file(dev_root, &wq_sysfs_cpumask_attr);
6082 1 : put_device(dev_root);
6083 : }
6084 : return err;
6085 : }
6086 : core_initcall(wq_sysfs_init);
6087 :
6088 0 : static void wq_device_release(struct device *dev)
6089 : {
6090 0 : struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
6091 :
6092 0 : kfree(wq_dev);
6093 0 : }
6094 :
6095 : /**
6096 : * workqueue_sysfs_register - make a workqueue visible in sysfs
6097 : * @wq: the workqueue to register
6098 : *
6099 : * Expose @wq in sysfs under /sys/bus/workqueue/devices.
6100 : * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
6101 : * which is the preferred method.
6102 : *
6103 : * Workqueue user should use this function directly iff it wants to apply
6104 : * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
6105 : * apply_workqueue_attrs() may race against userland updating the
6106 : * attributes.
6107 : *
6108 : * Return: 0 on success, -errno on failure.
6109 : */
6110 1 : int workqueue_sysfs_register(struct workqueue_struct *wq)
6111 : {
6112 : struct wq_device *wq_dev;
6113 : int ret;
6114 :
6115 : /*
6116 : * Adjusting max_active or creating new pwqs by applying
6117 : * attributes breaks ordering guarantee. Disallow exposing ordered
6118 : * workqueues.
6119 : */
6120 1 : if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
6121 : return -EINVAL;
6122 :
6123 1 : wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
6124 1 : if (!wq_dev)
6125 : return -ENOMEM;
6126 :
6127 1 : wq_dev->wq = wq;
6128 1 : wq_dev->dev.bus = &wq_subsys;
6129 1 : wq_dev->dev.release = wq_device_release;
6130 1 : dev_set_name(&wq_dev->dev, "%s", wq->name);
6131 :
6132 : /*
6133 : * unbound_attrs are created separately. Suppress uevent until
6134 : * everything is ready.
6135 : */
6136 2 : dev_set_uevent_suppress(&wq_dev->dev, true);
6137 :
6138 1 : ret = device_register(&wq_dev->dev);
6139 1 : if (ret) {
6140 0 : put_device(&wq_dev->dev);
6141 0 : wq->wq_dev = NULL;
6142 0 : return ret;
6143 : }
6144 :
6145 1 : if (wq->flags & WQ_UNBOUND) {
6146 : struct device_attribute *attr;
6147 :
6148 4 : for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
6149 4 : ret = device_create_file(&wq_dev->dev, attr);
6150 4 : if (ret) {
6151 0 : device_unregister(&wq_dev->dev);
6152 0 : wq->wq_dev = NULL;
6153 0 : return ret;
6154 : }
6155 : }
6156 : }
6157 :
6158 2 : dev_set_uevent_suppress(&wq_dev->dev, false);
6159 1 : kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
6160 1 : return 0;
6161 : }
6162 :
6163 : /**
6164 : * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
6165 : * @wq: the workqueue to unregister
6166 : *
6167 : * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
6168 : */
6169 : static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
6170 : {
6171 0 : struct wq_device *wq_dev = wq->wq_dev;
6172 :
6173 0 : if (!wq->wq_dev)
6174 : return;
6175 :
6176 0 : wq->wq_dev = NULL;
6177 0 : device_unregister(&wq_dev->dev);
6178 : }
6179 : #else /* CONFIG_SYSFS */
6180 : static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
6181 : #endif /* CONFIG_SYSFS */
6182 :
6183 : /*
6184 : * Workqueue watchdog.
6185 : *
6186 : * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
6187 : * flush dependency, a concurrency managed work item which stays RUNNING
6188 : * indefinitely. Workqueue stalls can be very difficult to debug as the
6189 : * usual warning mechanisms don't trigger and internal workqueue state is
6190 : * largely opaque.
6191 : *
6192 : * Workqueue watchdog monitors all worker pools periodically and dumps
6193 : * state if some pools failed to make forward progress for a while where
6194 : * forward progress is defined as the first item on ->worklist changing.
6195 : *
6196 : * This mechanism is controlled through the kernel parameter
6197 : * "workqueue.watchdog_thresh" which can be updated at runtime through the
6198 : * corresponding sysfs parameter file.
6199 : */
6200 : #ifdef CONFIG_WQ_WATCHDOG
6201 :
6202 : static unsigned long wq_watchdog_thresh = 30;
6203 : static struct timer_list wq_watchdog_timer;
6204 :
6205 : static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
6206 : static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
6207 :
6208 : /*
6209 : * Show workers that might prevent the processing of pending work items.
6210 : * The only candidates are CPU-bound workers in the running state.
6211 : * Pending work items should be handled by another idle worker
6212 : * in all other situations.
6213 : */
6214 : static void show_cpu_pool_hog(struct worker_pool *pool)
6215 : {
6216 : struct worker *worker;
6217 : unsigned long flags;
6218 : int bkt;
6219 :
6220 : raw_spin_lock_irqsave(&pool->lock, flags);
6221 :
6222 : hash_for_each(pool->busy_hash, bkt, worker, hentry) {
6223 : if (task_is_running(worker->task)) {
6224 : /*
6225 : * Defer printing to avoid deadlocks in console
6226 : * drivers that queue work while holding locks
6227 : * also taken in their write paths.
6228 : */
6229 : printk_deferred_enter();
6230 :
6231 : pr_info("pool %d:\n", pool->id);
6232 : sched_show_task(worker->task);
6233 :
6234 : printk_deferred_exit();
6235 : }
6236 : }
6237 :
6238 : raw_spin_unlock_irqrestore(&pool->lock, flags);
6239 : }
6240 :
6241 : static void show_cpu_pools_hogs(void)
6242 : {
6243 : struct worker_pool *pool;
6244 : int pi;
6245 :
6246 : pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");
6247 :
6248 : rcu_read_lock();
6249 :
6250 : for_each_pool(pool, pi) {
6251 : if (pool->cpu_stall)
6252 : show_cpu_pool_hog(pool);
6253 :
6254 : }
6255 :
6256 : rcu_read_unlock();
6257 : }
6258 :
6259 : static void wq_watchdog_reset_touched(void)
6260 : {
6261 : int cpu;
6262 :
6263 : wq_watchdog_touched = jiffies;
6264 : for_each_possible_cpu(cpu)
6265 : per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
6266 : }
6267 :
6268 : static void wq_watchdog_timer_fn(struct timer_list *unused)
6269 : {
6270 : unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
6271 : bool lockup_detected = false;
6272 : bool cpu_pool_stall = false;
6273 : unsigned long now = jiffies;
6274 : struct worker_pool *pool;
6275 : int pi;
6276 :
6277 : if (!thresh)
6278 : return;
6279 :
6280 : rcu_read_lock();
6281 :
6282 : for_each_pool(pool, pi) {
6283 : unsigned long pool_ts, touched, ts;
6284 :
6285 : pool->cpu_stall = false;
6286 : if (list_empty(&pool->worklist))
6287 : continue;
6288 :
6289 : /*
6290 : * If a virtual machine is stopped by the host it can look to
6291 : * the watchdog like a stall.
6292 : */
6293 : kvm_check_and_clear_guest_paused();
6294 :
6295 : /* get the latest of pool and touched timestamps */
6296 : if (pool->cpu >= 0)
6297 : touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
6298 : else
6299 : touched = READ_ONCE(wq_watchdog_touched);
6300 : pool_ts = READ_ONCE(pool->watchdog_ts);
6301 :
6302 : if (time_after(pool_ts, touched))
6303 : ts = pool_ts;
6304 : else
6305 : ts = touched;
6306 :
6307 : /* did we stall? */
6308 : if (time_after(now, ts + thresh)) {
6309 : lockup_detected = true;
6310 : if (pool->cpu >= 0) {
6311 : pool->cpu_stall = true;
6312 : cpu_pool_stall = true;
6313 : }
6314 : pr_emerg("BUG: workqueue lockup - pool");
6315 : pr_cont_pool_info(pool);
6316 : pr_cont(" stuck for %us!\n",
6317 : jiffies_to_msecs(now - pool_ts) / 1000);
6318 : }
6319 :
6320 :
6321 : }
6322 :
6323 : rcu_read_unlock();
6324 :
6325 : if (lockup_detected)
6326 : show_all_workqueues();
6327 :
6328 : if (cpu_pool_stall)
6329 : show_cpu_pools_hogs();
6330 :
6331 : wq_watchdog_reset_touched();
6332 : mod_timer(&wq_watchdog_timer, jiffies + thresh);
6333 : }
6334 :
6335 : notrace void wq_watchdog_touch(int cpu)
6336 : {
6337 : if (cpu >= 0)
6338 : per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
6339 :
6340 : wq_watchdog_touched = jiffies;
6341 : }
6342 :
6343 : static void wq_watchdog_set_thresh(unsigned long thresh)
6344 : {
6345 : wq_watchdog_thresh = 0;
6346 : del_timer_sync(&wq_watchdog_timer);
6347 :
6348 : if (thresh) {
6349 : wq_watchdog_thresh = thresh;
6350 : wq_watchdog_reset_touched();
6351 : mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
6352 : }
6353 : }
6354 :
6355 : static int wq_watchdog_param_set_thresh(const char *val,
6356 : const struct kernel_param *kp)
6357 : {
6358 : unsigned long thresh;
6359 : int ret;
6360 :
6361 : ret = kstrtoul(val, 0, &thresh);
6362 : if (ret)
6363 : return ret;
6364 :
6365 : if (system_wq)
6366 : wq_watchdog_set_thresh(thresh);
6367 : else
6368 : wq_watchdog_thresh = thresh;
6369 :
6370 : return 0;
6371 : }
6372 :
6373 : static const struct kernel_param_ops wq_watchdog_thresh_ops = {
6374 : .set = wq_watchdog_param_set_thresh,
6375 : .get = param_get_ulong,
6376 : };
6377 :
6378 : module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
6379 : 0644);
6380 :
6381 : static void wq_watchdog_init(void)
6382 : {
6383 : timer_setup(&wq_watchdog_timer, wq_watchdog_timer_fn, TIMER_DEFERRABLE);
6384 : wq_watchdog_set_thresh(wq_watchdog_thresh);
6385 : }
6386 :
6387 : #else /* CONFIG_WQ_WATCHDOG */
6388 :
6389 : static inline void wq_watchdog_init(void) { }
6390 :
6391 : #endif /* CONFIG_WQ_WATCHDOG */
6392 :
6393 : static void __init wq_numa_init(void)
6394 : {
6395 : cpumask_var_t *tbl;
6396 : int node, cpu;
6397 :
6398 1 : if (num_possible_nodes() <= 1)
6399 : return;
6400 :
6401 : if (wq_disable_numa) {
6402 : pr_info("workqueue: NUMA affinity support disabled\n");
6403 : return;
6404 : }
6405 :
6406 : for_each_possible_cpu(cpu) {
6407 : if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) {
6408 : pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu);
6409 : return;
6410 : }
6411 : }
6412 :
6413 : wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();
6414 : BUG_ON(!wq_update_unbound_numa_attrs_buf);
6415 :
6416 : /*
6417 : * We want masks of possible CPUs of each node which isn't readily
6418 : * available. Build one from cpu_to_node() which should have been
6419 : * fully initialized by now.
6420 : */
6421 : tbl = kcalloc(nr_node_ids, sizeof(tbl[0]), GFP_KERNEL);
6422 : BUG_ON(!tbl);
6423 :
6424 : for_each_node(node)
6425 : BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
6426 : node_online(node) ? node : NUMA_NO_NODE));
6427 :
6428 : for_each_possible_cpu(cpu) {
6429 : node = cpu_to_node(cpu);
6430 : cpumask_set_cpu(cpu, tbl[node]);
6431 : }
6432 :
6433 : wq_numa_possible_cpumask = tbl;
6434 : wq_numa_enabled = true;
6435 : }
6436 :
6437 : /**
6438 : * workqueue_init_early - early init for workqueue subsystem
6439 : *
6440 : * This is the first half of two-staged workqueue subsystem initialization
6441 : * and invoked as soon as the bare basics - memory allocation, cpumasks and
6442 : * idr are up. It sets up all the data structures and system workqueues
6443 : * and allows early boot code to create workqueues and queue/cancel work
6444 : * items. Actual work item execution starts only after kthreads can be
6445 : * created and scheduled right before early initcalls.
6446 : */
6447 1 : void __init workqueue_init_early(void)
6448 : {
6449 1 : int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
6450 : int i, cpu;
6451 :
6452 : BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
6453 :
6454 1 : BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
6455 2 : cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
6456 2 : cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
6457 :
6458 1 : pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
6459 :
6460 : /* initialize CPU pools */
6461 2 : for_each_possible_cpu(cpu) {
6462 : struct worker_pool *pool;
6463 :
6464 : i = 0;
6465 2 : for_each_cpu_worker_pool(pool, cpu) {
6466 2 : BUG_ON(init_worker_pool(pool));
6467 2 : pool->cpu = cpu;
6468 6 : cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
6469 2 : pool->attrs->nice = std_nice[i++];
6470 2 : pool->node = cpu_to_node(cpu);
6471 :
6472 : /* alloc pool ID */
6473 2 : mutex_lock(&wq_pool_mutex);
6474 2 : BUG_ON(worker_pool_assign_id(pool));
6475 2 : mutex_unlock(&wq_pool_mutex);
6476 : }
6477 : }
6478 :
6479 : /* create default unbound and ordered wq attrs */
6480 2 : for (i = 0; i < NR_STD_WORKER_POOLS; i++) {
6481 : struct workqueue_attrs *attrs;
6482 :
6483 2 : BUG_ON(!(attrs = alloc_workqueue_attrs()));
6484 2 : attrs->nice = std_nice[i];
6485 2 : unbound_std_wq_attrs[i] = attrs;
6486 :
6487 : /*
6488 : * An ordered wq should have only one pwq as ordering is
6489 : * guaranteed by max_active which is enforced by pwqs.
6490 : * Turn off NUMA so that dfl_pwq is used for all nodes.
6491 : */
6492 2 : BUG_ON(!(attrs = alloc_workqueue_attrs()));
6493 2 : attrs->nice = std_nice[i];
6494 2 : attrs->no_numa = true;
6495 2 : ordered_wq_attrs[i] = attrs;
6496 : }
6497 :
6498 1 : system_wq = alloc_workqueue("events", 0, 0);
6499 1 : system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
6500 1 : system_long_wq = alloc_workqueue("events_long", 0, 0);
6501 1 : system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
6502 : WQ_UNBOUND_MAX_ACTIVE);
6503 1 : system_freezable_wq = alloc_workqueue("events_freezable",
6504 : WQ_FREEZABLE, 0);
6505 1 : system_power_efficient_wq = alloc_workqueue("events_power_efficient",
6506 : WQ_POWER_EFFICIENT, 0);
6507 1 : system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
6508 : WQ_FREEZABLE | WQ_POWER_EFFICIENT,
6509 : 0);
6510 1 : BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
6511 : !system_unbound_wq || !system_freezable_wq ||
6512 : !system_power_efficient_wq ||
6513 : !system_freezable_power_efficient_wq);
6514 1 : }
6515 :
6516 : /**
6517 : * workqueue_init - bring workqueue subsystem fully online
6518 : *
6519 : * This is the latter half of two-staged workqueue subsystem initialization
6520 : * and invoked as soon as kthreads can be created and scheduled.
6521 : * Workqueues have been created and work items queued on them, but there
6522 : * are no kworkers executing the work items yet. Populate the worker pools
6523 : * with the initial workers and enable future kworker creations.
6524 : */
6525 1 : void __init workqueue_init(void)
6526 : {
6527 : struct workqueue_struct *wq;
6528 : struct worker_pool *pool;
6529 : int cpu, bkt;
6530 :
6531 : /*
6532 : * It'd be simpler to initialize NUMA in workqueue_init_early() but
6533 : * CPU to node mapping may not be available that early on some
6534 : * archs such as power and arm64. As per-cpu pools created
6535 : * previously could be missing node hint and unbound pools NUMA
6536 : * affinity, fix them up.
6537 : *
6538 : * Also, while iterating workqueues, create rescuers if requested.
6539 : */
6540 : wq_numa_init();
6541 :
6542 1 : mutex_lock(&wq_pool_mutex);
6543 :
6544 2 : for_each_possible_cpu(cpu) {
6545 2 : for_each_cpu_worker_pool(pool, cpu) {
6546 2 : pool->node = cpu_to_node(cpu);
6547 : }
6548 : }
6549 :
6550 9 : list_for_each_entry(wq, &workqueues, list) {
6551 8 : wq_update_unbound_numa(wq, smp_processor_id(), true);
6552 8 : WARN(init_rescuer(wq),
6553 : "workqueue: failed to create early rescuer for %s",
6554 : wq->name);
6555 : }
6556 :
6557 1 : mutex_unlock(&wq_pool_mutex);
6558 :
6559 : /* create the initial workers */
6560 2 : for_each_online_cpu(cpu) {
6561 2 : for_each_cpu_worker_pool(pool, cpu) {
6562 2 : pool->flags &= ~POOL_DISASSOCIATED;
6563 2 : BUG_ON(!create_worker(pool));
6564 : }
6565 : }
6566 :
6567 65 : hash_for_each(unbound_pool_hash, bkt, pool, hash_node)
6568 1 : BUG_ON(!create_worker(pool));
6569 :
6570 1 : wq_online = true;
6571 : wq_watchdog_init();
6572 1 : }
6573 :
6574 : /*
6575 : * Despite the naming, this is a no-op function which is here only for avoiding
6576 : * link error. Since compile-time warning may fail to catch, we will need to
6577 : * emit run-time warning from __flush_workqueue().
6578 : */
6579 0 : void __warn_flushing_systemwide_wq(void) { }
6580 : EXPORT_SYMBOL(__warn_flushing_systemwide_wq);
|