Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * Resizable, Scalable, Concurrent Hash Table
4 : *
5 : * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
6 : * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
7 : * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
8 : *
9 : * Code partially derived from nft_hash
10 : * Rewritten with rehash code from br_multicast plus single list
11 : * pointer as suggested by Josh Triplett
12 : */
13 :
14 : #include <linux/atomic.h>
15 : #include <linux/kernel.h>
16 : #include <linux/init.h>
17 : #include <linux/log2.h>
18 : #include <linux/sched.h>
19 : #include <linux/rculist.h>
20 : #include <linux/slab.h>
21 : #include <linux/vmalloc.h>
22 : #include <linux/mm.h>
23 : #include <linux/jhash.h>
24 : #include <linux/random.h>
25 : #include <linux/rhashtable.h>
26 : #include <linux/err.h>
27 : #include <linux/export.h>
28 :
29 : #define HASH_DEFAULT_SIZE 64UL
30 : #define HASH_MIN_SIZE 4U
31 :
32 : union nested_table {
33 : union nested_table __rcu *table;
34 : struct rhash_lock_head __rcu *bucket;
35 : };
36 :
37 : static u32 head_hashfn(struct rhashtable *ht,
38 : const struct bucket_table *tbl,
39 : const struct rhash_head *he)
40 : {
41 0 : return rht_head_hashfn(ht, tbl, he, ht->p);
42 : }
43 :
44 : #ifdef CONFIG_PROVE_LOCKING
45 : #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
46 :
47 : int lockdep_rht_mutex_is_held(struct rhashtable *ht)
48 : {
49 : return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
50 : }
51 : EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
52 :
53 : int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
54 : {
55 : if (!debug_locks)
56 : return 1;
57 : if (unlikely(tbl->nest))
58 : return 1;
59 : return bit_spin_is_locked(0, (unsigned long *)&tbl->buckets[hash]);
60 : }
61 : EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
62 : #else
63 : #define ASSERT_RHT_MUTEX(HT)
64 : #endif
65 :
66 : static inline union nested_table *nested_table_top(
67 : const struct bucket_table *tbl)
68 : {
69 : /* The top-level bucket entry does not need RCU protection
70 : * because it's set at the same time as tbl->nest.
71 : */
72 0 : return (void *)rcu_dereference_protected(tbl->buckets[0], 1);
73 : }
74 :
75 0 : static void nested_table_free(union nested_table *ntbl, unsigned int size)
76 : {
77 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
78 0 : const unsigned int len = 1 << shift;
79 : unsigned int i;
80 :
81 0 : ntbl = rcu_dereference_protected(ntbl->table, 1);
82 0 : if (!ntbl)
83 : return;
84 :
85 0 : if (size > len) {
86 0 : size >>= shift;
87 0 : for (i = 0; i < len; i++)
88 0 : nested_table_free(ntbl + i, size);
89 : }
90 :
91 0 : kfree(ntbl);
92 : }
93 :
94 0 : static void nested_bucket_table_free(const struct bucket_table *tbl)
95 : {
96 0 : unsigned int size = tbl->size >> tbl->nest;
97 0 : unsigned int len = 1 << tbl->nest;
98 : union nested_table *ntbl;
99 : unsigned int i;
100 :
101 0 : ntbl = nested_table_top(tbl);
102 :
103 0 : for (i = 0; i < len; i++)
104 0 : nested_table_free(ntbl + i, size);
105 :
106 0 : kfree(ntbl);
107 0 : }
108 :
109 0 : static void bucket_table_free(const struct bucket_table *tbl)
110 : {
111 0 : if (tbl->nest)
112 0 : nested_bucket_table_free(tbl);
113 :
114 0 : kvfree(tbl);
115 0 : }
116 :
117 0 : static void bucket_table_free_rcu(struct rcu_head *head)
118 : {
119 0 : bucket_table_free(container_of(head, struct bucket_table, rcu));
120 0 : }
121 :
122 0 : static union nested_table *nested_table_alloc(struct rhashtable *ht,
123 : union nested_table __rcu **prev,
124 : bool leaf)
125 : {
126 : union nested_table *ntbl;
127 : int i;
128 :
129 0 : ntbl = rcu_dereference(*prev);
130 0 : if (ntbl)
131 : return ntbl;
132 :
133 0 : ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC);
134 :
135 0 : if (ntbl && leaf) {
136 0 : for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++)
137 0 : INIT_RHT_NULLS_HEAD(ntbl[i].bucket);
138 : }
139 :
140 0 : if (cmpxchg((union nested_table **)prev, NULL, ntbl) == NULL)
141 : return ntbl;
142 : /* Raced with another thread. */
143 0 : kfree(ntbl);
144 0 : return rcu_dereference(*prev);
145 : }
146 :
147 0 : static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht,
148 : size_t nbuckets,
149 : gfp_t gfp)
150 : {
151 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
152 : struct bucket_table *tbl;
153 : size_t size;
154 :
155 0 : if (nbuckets < (1 << (shift + 1)))
156 : return NULL;
157 :
158 0 : size = sizeof(*tbl) + sizeof(tbl->buckets[0]);
159 :
160 0 : tbl = kzalloc(size, gfp);
161 0 : if (!tbl)
162 : return NULL;
163 :
164 0 : if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets,
165 : false)) {
166 0 : kfree(tbl);
167 0 : return NULL;
168 : }
169 :
170 0 : tbl->nest = (ilog2(nbuckets) - 1) % shift + 1;
171 :
172 0 : return tbl;
173 : }
174 :
175 0 : static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
176 : size_t nbuckets,
177 : gfp_t gfp)
178 : {
179 0 : struct bucket_table *tbl = NULL;
180 : size_t size;
181 : int i;
182 : static struct lock_class_key __key;
183 :
184 0 : tbl = kvzalloc(struct_size(tbl, buckets, nbuckets), gfp);
185 :
186 0 : size = nbuckets;
187 :
188 0 : if (tbl == NULL && (gfp & ~__GFP_NOFAIL) != GFP_KERNEL) {
189 0 : tbl = nested_bucket_table_alloc(ht, nbuckets, gfp);
190 0 : nbuckets = 0;
191 : }
192 :
193 0 : if (tbl == NULL)
194 : return NULL;
195 :
196 : lockdep_init_map(&tbl->dep_map, "rhashtable_bucket", &__key, 0);
197 :
198 0 : tbl->size = size;
199 :
200 0 : rcu_head_init(&tbl->rcu);
201 0 : INIT_LIST_HEAD(&tbl->walkers);
202 :
203 0 : tbl->hash_rnd = get_random_u32();
204 :
205 0 : for (i = 0; i < nbuckets; i++)
206 0 : INIT_RHT_NULLS_HEAD(tbl->buckets[i]);
207 :
208 : return tbl;
209 : }
210 :
211 : static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
212 : struct bucket_table *tbl)
213 : {
214 : struct bucket_table *new_tbl;
215 :
216 : do {
217 0 : new_tbl = tbl;
218 0 : tbl = rht_dereference_rcu(tbl->future_tbl, ht);
219 0 : } while (tbl);
220 :
221 : return new_tbl;
222 : }
223 :
224 0 : static int rhashtable_rehash_one(struct rhashtable *ht,
225 : struct rhash_lock_head __rcu **bkt,
226 : unsigned int old_hash)
227 : {
228 0 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
229 0 : struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl);
230 0 : int err = -EAGAIN;
231 : struct rhash_head *head, *next, *entry;
232 0 : struct rhash_head __rcu **pprev = NULL;
233 : unsigned int new_hash;
234 : unsigned long flags;
235 :
236 0 : if (new_tbl->nest)
237 : goto out;
238 :
239 0 : err = -ENOENT;
240 :
241 0 : rht_for_each_from(entry, rht_ptr(bkt, old_tbl, old_hash),
242 : old_tbl, old_hash) {
243 0 : err = 0;
244 0 : next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
245 :
246 0 : if (rht_is_a_nulls(next))
247 : break;
248 :
249 0 : pprev = &entry->next;
250 : }
251 :
252 0 : if (err)
253 : goto out;
254 :
255 0 : new_hash = head_hashfn(ht, new_tbl, entry);
256 :
257 0 : flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash],
258 : SINGLE_DEPTH_NESTING);
259 :
260 0 : head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash);
261 :
262 0 : RCU_INIT_POINTER(entry->next, head);
263 :
264 0 : rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags);
265 :
266 0 : if (pprev)
267 0 : rcu_assign_pointer(*pprev, next);
268 : else
269 : /* Need to preserved the bit lock. */
270 : rht_assign_locked(bkt, next);
271 :
272 : out:
273 0 : return err;
274 : }
275 :
276 0 : static int rhashtable_rehash_chain(struct rhashtable *ht,
277 : unsigned int old_hash)
278 : {
279 0 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
280 0 : struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash);
281 : unsigned long flags;
282 : int err;
283 :
284 0 : if (!bkt)
285 : return 0;
286 0 : flags = rht_lock(old_tbl, bkt);
287 :
288 0 : while (!(err = rhashtable_rehash_one(ht, bkt, old_hash)))
289 : ;
290 :
291 0 : if (err == -ENOENT)
292 0 : err = 0;
293 0 : rht_unlock(old_tbl, bkt, flags);
294 :
295 0 : return err;
296 : }
297 :
298 : static int rhashtable_rehash_attach(struct rhashtable *ht,
299 : struct bucket_table *old_tbl,
300 : struct bucket_table *new_tbl)
301 : {
302 : /* Make insertions go into the new, empty table right away. Deletions
303 : * and lookups will be attempted in both tables until we synchronize.
304 : * As cmpxchg() provides strong barriers, we do not need
305 : * rcu_assign_pointer().
306 : */
307 :
308 0 : if (cmpxchg((struct bucket_table **)&old_tbl->future_tbl, NULL,
309 : new_tbl) != NULL)
310 : return -EEXIST;
311 :
312 : return 0;
313 : }
314 :
315 0 : static int rhashtable_rehash_table(struct rhashtable *ht)
316 : {
317 0 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
318 : struct bucket_table *new_tbl;
319 : struct rhashtable_walker *walker;
320 : unsigned int old_hash;
321 : int err;
322 :
323 0 : new_tbl = rht_dereference(old_tbl->future_tbl, ht);
324 0 : if (!new_tbl)
325 : return 0;
326 :
327 0 : for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
328 0 : err = rhashtable_rehash_chain(ht, old_hash);
329 0 : if (err)
330 : return err;
331 0 : cond_resched();
332 : }
333 :
334 : /* Publish the new table pointer. */
335 0 : rcu_assign_pointer(ht->tbl, new_tbl);
336 :
337 0 : spin_lock(&ht->lock);
338 0 : list_for_each_entry(walker, &old_tbl->walkers, list)
339 0 : walker->tbl = NULL;
340 :
341 : /* Wait for readers. All new readers will see the new
342 : * table, and thus no references to the old table will
343 : * remain.
344 : * We do this inside the locked region so that
345 : * rhashtable_walk_stop() can use rcu_head_after_call_rcu()
346 : * to check if it should not re-link the table.
347 : */
348 0 : call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
349 0 : spin_unlock(&ht->lock);
350 :
351 0 : return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
352 : }
353 :
354 0 : static int rhashtable_rehash_alloc(struct rhashtable *ht,
355 : struct bucket_table *old_tbl,
356 : unsigned int size)
357 : {
358 : struct bucket_table *new_tbl;
359 : int err;
360 :
361 : ASSERT_RHT_MUTEX(ht);
362 :
363 0 : new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
364 0 : if (new_tbl == NULL)
365 : return -ENOMEM;
366 :
367 0 : err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
368 0 : if (err)
369 0 : bucket_table_free(new_tbl);
370 :
371 : return err;
372 : }
373 :
374 : /**
375 : * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
376 : * @ht: the hash table to shrink
377 : *
378 : * This function shrinks the hash table to fit, i.e., the smallest
379 : * size would not cause it to expand right away automatically.
380 : *
381 : * The caller must ensure that no concurrent resizing occurs by holding
382 : * ht->mutex.
383 : *
384 : * The caller must ensure that no concurrent table mutations take place.
385 : * It is however valid to have concurrent lookups if they are RCU protected.
386 : *
387 : * It is valid to have concurrent insertions and deletions protected by per
388 : * bucket locks or concurrent RCU protected lookups and traversals.
389 : */
390 0 : static int rhashtable_shrink(struct rhashtable *ht)
391 : {
392 0 : struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
393 0 : unsigned int nelems = atomic_read(&ht->nelems);
394 0 : unsigned int size = 0;
395 :
396 0 : if (nelems)
397 0 : size = roundup_pow_of_two(nelems * 3 / 2);
398 0 : if (size < ht->p.min_size)
399 0 : size = ht->p.min_size;
400 :
401 0 : if (old_tbl->size <= size)
402 : return 0;
403 :
404 0 : if (rht_dereference(old_tbl->future_tbl, ht))
405 : return -EEXIST;
406 :
407 0 : return rhashtable_rehash_alloc(ht, old_tbl, size);
408 : }
409 :
410 0 : static void rht_deferred_worker(struct work_struct *work)
411 : {
412 : struct rhashtable *ht;
413 : struct bucket_table *tbl;
414 0 : int err = 0;
415 :
416 0 : ht = container_of(work, struct rhashtable, run_work);
417 0 : mutex_lock(&ht->mutex);
418 :
419 0 : tbl = rht_dereference(ht->tbl, ht);
420 0 : tbl = rhashtable_last_table(ht, tbl);
421 :
422 0 : if (rht_grow_above_75(ht, tbl))
423 0 : err = rhashtable_rehash_alloc(ht, tbl, tbl->size * 2);
424 0 : else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
425 0 : err = rhashtable_shrink(ht);
426 0 : else if (tbl->nest)
427 0 : err = rhashtable_rehash_alloc(ht, tbl, tbl->size);
428 :
429 0 : if (!err || err == -EEXIST) {
430 : int nerr;
431 :
432 0 : nerr = rhashtable_rehash_table(ht);
433 0 : err = err ?: nerr;
434 : }
435 :
436 0 : mutex_unlock(&ht->mutex);
437 :
438 0 : if (err)
439 0 : schedule_work(&ht->run_work);
440 0 : }
441 :
442 0 : static int rhashtable_insert_rehash(struct rhashtable *ht,
443 : struct bucket_table *tbl)
444 : {
445 : struct bucket_table *old_tbl;
446 : struct bucket_table *new_tbl;
447 : unsigned int size;
448 : int err;
449 :
450 0 : old_tbl = rht_dereference_rcu(ht->tbl, ht);
451 :
452 0 : size = tbl->size;
453 :
454 0 : err = -EBUSY;
455 :
456 0 : if (rht_grow_above_75(ht, tbl))
457 0 : size *= 2;
458 : /* Do not schedule more than one rehash */
459 0 : else if (old_tbl != tbl)
460 : goto fail;
461 :
462 0 : err = -ENOMEM;
463 :
464 0 : new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC | __GFP_NOWARN);
465 0 : if (new_tbl == NULL)
466 : goto fail;
467 :
468 0 : err = rhashtable_rehash_attach(ht, tbl, new_tbl);
469 0 : if (err) {
470 0 : bucket_table_free(new_tbl);
471 0 : if (err == -EEXIST)
472 0 : err = 0;
473 : } else
474 0 : schedule_work(&ht->run_work);
475 :
476 : return err;
477 :
478 : fail:
479 : /* Do not fail the insert if someone else did a rehash. */
480 0 : if (likely(rcu_access_pointer(tbl->future_tbl)))
481 : return 0;
482 :
483 : /* Schedule async rehash to retry allocation in process context. */
484 0 : if (err == -ENOMEM)
485 0 : schedule_work(&ht->run_work);
486 :
487 : return err;
488 : }
489 :
490 0 : static void *rhashtable_lookup_one(struct rhashtable *ht,
491 : struct rhash_lock_head __rcu **bkt,
492 : struct bucket_table *tbl, unsigned int hash,
493 : const void *key, struct rhash_head *obj)
494 : {
495 0 : struct rhashtable_compare_arg arg = {
496 : .ht = ht,
497 : .key = key,
498 : };
499 0 : struct rhash_head __rcu **pprev = NULL;
500 : struct rhash_head *head;
501 : int elasticity;
502 :
503 0 : elasticity = RHT_ELASTICITY;
504 0 : rht_for_each_from(head, rht_ptr(bkt, tbl, hash), tbl, hash) {
505 : struct rhlist_head *list;
506 : struct rhlist_head *plist;
507 :
508 0 : elasticity--;
509 0 : if (!key ||
510 0 : (ht->p.obj_cmpfn ?
511 0 : ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
512 0 : rhashtable_compare(&arg, rht_obj(ht, head)))) {
513 0 : pprev = &head->next;
514 0 : continue;
515 : }
516 :
517 0 : if (!ht->rhlist)
518 0 : return rht_obj(ht, head);
519 :
520 0 : list = container_of(obj, struct rhlist_head, rhead);
521 0 : plist = container_of(head, struct rhlist_head, rhead);
522 :
523 0 : RCU_INIT_POINTER(list->next, plist);
524 0 : head = rht_dereference_bucket(head->next, tbl, hash);
525 0 : RCU_INIT_POINTER(list->rhead.next, head);
526 0 : if (pprev)
527 0 : rcu_assign_pointer(*pprev, obj);
528 : else
529 : /* Need to preserve the bit lock */
530 : rht_assign_locked(bkt, obj);
531 :
532 : return NULL;
533 : }
534 :
535 0 : if (elasticity <= 0)
536 : return ERR_PTR(-EAGAIN);
537 :
538 0 : return ERR_PTR(-ENOENT);
539 : }
540 :
541 0 : static struct bucket_table *rhashtable_insert_one(
542 : struct rhashtable *ht, struct rhash_lock_head __rcu **bkt,
543 : struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj,
544 : void *data)
545 : {
546 : struct bucket_table *new_tbl;
547 : struct rhash_head *head;
548 :
549 0 : if (!IS_ERR_OR_NULL(data))
550 : return ERR_PTR(-EEXIST);
551 :
552 0 : if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
553 : return ERR_CAST(data);
554 :
555 0 : new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
556 0 : if (new_tbl)
557 : return new_tbl;
558 :
559 0 : if (PTR_ERR(data) != -ENOENT)
560 : return ERR_CAST(data);
561 :
562 0 : if (unlikely(rht_grow_above_max(ht, tbl)))
563 : return ERR_PTR(-E2BIG);
564 :
565 0 : if (unlikely(rht_grow_above_100(ht, tbl)))
566 : return ERR_PTR(-EAGAIN);
567 :
568 0 : head = rht_ptr(bkt, tbl, hash);
569 :
570 0 : RCU_INIT_POINTER(obj->next, head);
571 0 : if (ht->rhlist) {
572 : struct rhlist_head *list;
573 :
574 0 : list = container_of(obj, struct rhlist_head, rhead);
575 0 : RCU_INIT_POINTER(list->next, NULL);
576 : }
577 :
578 : /* bkt is always the head of the list, so it holds
579 : * the lock, which we need to preserve
580 : */
581 0 : rht_assign_locked(bkt, obj);
582 :
583 0 : atomic_inc(&ht->nelems);
584 0 : if (rht_grow_above_75(ht, tbl))
585 0 : schedule_work(&ht->run_work);
586 :
587 : return NULL;
588 : }
589 :
590 0 : static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
591 : struct rhash_head *obj)
592 : {
593 : struct bucket_table *new_tbl;
594 : struct bucket_table *tbl;
595 : struct rhash_lock_head __rcu **bkt;
596 : unsigned long flags;
597 : unsigned int hash;
598 : void *data;
599 :
600 0 : new_tbl = rcu_dereference(ht->tbl);
601 :
602 : do {
603 0 : tbl = new_tbl;
604 0 : hash = rht_head_hashfn(ht, tbl, obj, ht->p);
605 0 : if (rcu_access_pointer(tbl->future_tbl))
606 : /* Failure is OK */
607 : bkt = rht_bucket_var(tbl, hash);
608 : else
609 : bkt = rht_bucket_insert(ht, tbl, hash);
610 0 : if (bkt == NULL) {
611 0 : new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
612 0 : data = ERR_PTR(-EAGAIN);
613 : } else {
614 0 : flags = rht_lock(tbl, bkt);
615 0 : data = rhashtable_lookup_one(ht, bkt, tbl,
616 : hash, key, obj);
617 0 : new_tbl = rhashtable_insert_one(ht, bkt, tbl,
618 : hash, obj, data);
619 0 : if (PTR_ERR(new_tbl) != -EEXIST)
620 0 : data = ERR_CAST(new_tbl);
621 :
622 0 : rht_unlock(tbl, bkt, flags);
623 : }
624 0 : } while (!IS_ERR_OR_NULL(new_tbl));
625 :
626 0 : if (PTR_ERR(data) == -EAGAIN)
627 0 : data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
628 : -EAGAIN);
629 :
630 0 : return data;
631 : }
632 :
633 0 : void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
634 : struct rhash_head *obj)
635 : {
636 : void *data;
637 :
638 : do {
639 : rcu_read_lock();
640 0 : data = rhashtable_try_insert(ht, key, obj);
641 0 : rcu_read_unlock();
642 0 : } while (PTR_ERR(data) == -EAGAIN);
643 :
644 0 : return data;
645 : }
646 : EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
647 :
648 : /**
649 : * rhashtable_walk_enter - Initialise an iterator
650 : * @ht: Table to walk over
651 : * @iter: Hash table Iterator
652 : *
653 : * This function prepares a hash table walk.
654 : *
655 : * Note that if you restart a walk after rhashtable_walk_stop you
656 : * may see the same object twice. Also, you may miss objects if
657 : * there are removals in between rhashtable_walk_stop and the next
658 : * call to rhashtable_walk_start.
659 : *
660 : * For a completely stable walk you should construct your own data
661 : * structure outside the hash table.
662 : *
663 : * This function may be called from any process context, including
664 : * non-preemptable context, but cannot be called from softirq or
665 : * hardirq context.
666 : *
667 : * You must call rhashtable_walk_exit after this function returns.
668 : */
669 0 : void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
670 : {
671 0 : iter->ht = ht;
672 0 : iter->p = NULL;
673 0 : iter->slot = 0;
674 0 : iter->skip = 0;
675 0 : iter->end_of_table = 0;
676 :
677 0 : spin_lock(&ht->lock);
678 0 : iter->walker.tbl =
679 0 : rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
680 0 : list_add(&iter->walker.list, &iter->walker.tbl->walkers);
681 0 : spin_unlock(&ht->lock);
682 0 : }
683 : EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
684 :
685 : /**
686 : * rhashtable_walk_exit - Free an iterator
687 : * @iter: Hash table Iterator
688 : *
689 : * This function frees resources allocated by rhashtable_walk_enter.
690 : */
691 0 : void rhashtable_walk_exit(struct rhashtable_iter *iter)
692 : {
693 0 : spin_lock(&iter->ht->lock);
694 0 : if (iter->walker.tbl)
695 0 : list_del(&iter->walker.list);
696 0 : spin_unlock(&iter->ht->lock);
697 0 : }
698 : EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
699 :
700 : /**
701 : * rhashtable_walk_start_check - Start a hash table walk
702 : * @iter: Hash table iterator
703 : *
704 : * Start a hash table walk at the current iterator position. Note that we take
705 : * the RCU lock in all cases including when we return an error. So you must
706 : * always call rhashtable_walk_stop to clean up.
707 : *
708 : * Returns zero if successful.
709 : *
710 : * Returns -EAGAIN if resize event occurred. Note that the iterator
711 : * will rewind back to the beginning and you may use it immediately
712 : * by calling rhashtable_walk_next.
713 : *
714 : * rhashtable_walk_start is defined as an inline variant that returns
715 : * void. This is preferred in cases where the caller would ignore
716 : * resize events and always continue.
717 : */
718 0 : int rhashtable_walk_start_check(struct rhashtable_iter *iter)
719 : __acquires(RCU)
720 : {
721 0 : struct rhashtable *ht = iter->ht;
722 0 : bool rhlist = ht->rhlist;
723 :
724 : rcu_read_lock();
725 :
726 0 : spin_lock(&ht->lock);
727 0 : if (iter->walker.tbl)
728 0 : list_del(&iter->walker.list);
729 0 : spin_unlock(&ht->lock);
730 :
731 0 : if (iter->end_of_table)
732 : return 0;
733 0 : if (!iter->walker.tbl) {
734 0 : iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
735 0 : iter->slot = 0;
736 0 : iter->skip = 0;
737 0 : return -EAGAIN;
738 : }
739 :
740 0 : if (iter->p && !rhlist) {
741 : /*
742 : * We need to validate that 'p' is still in the table, and
743 : * if so, update 'skip'
744 : */
745 : struct rhash_head *p;
746 0 : int skip = 0;
747 0 : rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
748 0 : skip++;
749 0 : if (p == iter->p) {
750 0 : iter->skip = skip;
751 0 : goto found;
752 : }
753 : }
754 0 : iter->p = NULL;
755 0 : } else if (iter->p && rhlist) {
756 : /* Need to validate that 'list' is still in the table, and
757 : * if so, update 'skip' and 'p'.
758 : */
759 : struct rhash_head *p;
760 : struct rhlist_head *list;
761 0 : int skip = 0;
762 0 : rht_for_each_rcu(p, iter->walker.tbl, iter->slot) {
763 0 : for (list = container_of(p, struct rhlist_head, rhead);
764 : list;
765 0 : list = rcu_dereference(list->next)) {
766 0 : skip++;
767 0 : if (list == iter->list) {
768 0 : iter->p = p;
769 0 : iter->skip = skip;
770 0 : goto found;
771 : }
772 : }
773 : }
774 0 : iter->p = NULL;
775 : }
776 : found:
777 : return 0;
778 : }
779 : EXPORT_SYMBOL_GPL(rhashtable_walk_start_check);
780 :
781 : /**
782 : * __rhashtable_walk_find_next - Find the next element in a table (or the first
783 : * one in case of a new walk).
784 : *
785 : * @iter: Hash table iterator
786 : *
787 : * Returns the found object or NULL when the end of the table is reached.
788 : *
789 : * Returns -EAGAIN if resize event occurred.
790 : */
791 0 : static void *__rhashtable_walk_find_next(struct rhashtable_iter *iter)
792 : {
793 0 : struct bucket_table *tbl = iter->walker.tbl;
794 0 : struct rhlist_head *list = iter->list;
795 0 : struct rhashtable *ht = iter->ht;
796 0 : struct rhash_head *p = iter->p;
797 0 : bool rhlist = ht->rhlist;
798 :
799 0 : if (!tbl)
800 : return NULL;
801 :
802 0 : for (; iter->slot < tbl->size; iter->slot++) {
803 0 : int skip = iter->skip;
804 :
805 0 : rht_for_each_rcu(p, tbl, iter->slot) {
806 0 : if (rhlist) {
807 : list = container_of(p, struct rhlist_head,
808 : rhead);
809 : do {
810 0 : if (!skip)
811 : goto next;
812 0 : skip--;
813 0 : list = rcu_dereference(list->next);
814 0 : } while (list);
815 :
816 0 : continue;
817 : }
818 0 : if (!skip)
819 : break;
820 0 : skip--;
821 : }
822 :
823 : next:
824 0 : if (!rht_is_a_nulls(p)) {
825 0 : iter->skip++;
826 0 : iter->p = p;
827 0 : iter->list = list;
828 0 : return rht_obj(ht, rhlist ? &list->rhead : p);
829 : }
830 :
831 0 : iter->skip = 0;
832 : }
833 :
834 0 : iter->p = NULL;
835 :
836 : /* Ensure we see any new tables. */
837 0 : smp_rmb();
838 :
839 0 : iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
840 0 : if (iter->walker.tbl) {
841 0 : iter->slot = 0;
842 0 : iter->skip = 0;
843 0 : return ERR_PTR(-EAGAIN);
844 : } else {
845 0 : iter->end_of_table = true;
846 : }
847 :
848 0 : return NULL;
849 : }
850 :
851 : /**
852 : * rhashtable_walk_next - Return the next object and advance the iterator
853 : * @iter: Hash table iterator
854 : *
855 : * Note that you must call rhashtable_walk_stop when you are finished
856 : * with the walk.
857 : *
858 : * Returns the next object or NULL when the end of the table is reached.
859 : *
860 : * Returns -EAGAIN if resize event occurred. Note that the iterator
861 : * will rewind back to the beginning and you may continue to use it.
862 : */
863 0 : void *rhashtable_walk_next(struct rhashtable_iter *iter)
864 : {
865 0 : struct rhlist_head *list = iter->list;
866 0 : struct rhashtable *ht = iter->ht;
867 0 : struct rhash_head *p = iter->p;
868 0 : bool rhlist = ht->rhlist;
869 :
870 0 : if (p) {
871 0 : if (!rhlist || !(list = rcu_dereference(list->next))) {
872 0 : p = rcu_dereference(p->next);
873 0 : list = container_of(p, struct rhlist_head, rhead);
874 : }
875 0 : if (!rht_is_a_nulls(p)) {
876 0 : iter->skip++;
877 0 : iter->p = p;
878 0 : iter->list = list;
879 0 : return rht_obj(ht, rhlist ? &list->rhead : p);
880 : }
881 :
882 : /* At the end of this slot, switch to next one and then find
883 : * next entry from that point.
884 : */
885 0 : iter->skip = 0;
886 0 : iter->slot++;
887 : }
888 :
889 0 : return __rhashtable_walk_find_next(iter);
890 : }
891 : EXPORT_SYMBOL_GPL(rhashtable_walk_next);
892 :
893 : /**
894 : * rhashtable_walk_peek - Return the next object but don't advance the iterator
895 : * @iter: Hash table iterator
896 : *
897 : * Returns the next object or NULL when the end of the table is reached.
898 : *
899 : * Returns -EAGAIN if resize event occurred. Note that the iterator
900 : * will rewind back to the beginning and you may continue to use it.
901 : */
902 0 : void *rhashtable_walk_peek(struct rhashtable_iter *iter)
903 : {
904 0 : struct rhlist_head *list = iter->list;
905 0 : struct rhashtable *ht = iter->ht;
906 0 : struct rhash_head *p = iter->p;
907 :
908 0 : if (p)
909 0 : return rht_obj(ht, ht->rhlist ? &list->rhead : p);
910 :
911 : /* No object found in current iter, find next one in the table. */
912 :
913 0 : if (iter->skip) {
914 : /* A nonzero skip value points to the next entry in the table
915 : * beyond that last one that was found. Decrement skip so
916 : * we find the current value. __rhashtable_walk_find_next
917 : * will restore the original value of skip assuming that
918 : * the table hasn't changed.
919 : */
920 0 : iter->skip--;
921 : }
922 :
923 0 : return __rhashtable_walk_find_next(iter);
924 : }
925 : EXPORT_SYMBOL_GPL(rhashtable_walk_peek);
926 :
927 : /**
928 : * rhashtable_walk_stop - Finish a hash table walk
929 : * @iter: Hash table iterator
930 : *
931 : * Finish a hash table walk. Does not reset the iterator to the start of the
932 : * hash table.
933 : */
934 0 : void rhashtable_walk_stop(struct rhashtable_iter *iter)
935 : __releases(RCU)
936 : {
937 : struct rhashtable *ht;
938 0 : struct bucket_table *tbl = iter->walker.tbl;
939 :
940 0 : if (!tbl)
941 : goto out;
942 :
943 0 : ht = iter->ht;
944 :
945 0 : spin_lock(&ht->lock);
946 0 : if (rcu_head_after_call_rcu(&tbl->rcu, bucket_table_free_rcu))
947 : /* This bucket table is being freed, don't re-link it. */
948 0 : iter->walker.tbl = NULL;
949 : else
950 0 : list_add(&iter->walker.list, &tbl->walkers);
951 0 : spin_unlock(&ht->lock);
952 :
953 : out:
954 : rcu_read_unlock();
955 0 : }
956 : EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
957 :
958 0 : static size_t rounded_hashtable_size(const struct rhashtable_params *params)
959 : {
960 : size_t retsize;
961 :
962 0 : if (params->nelem_hint)
963 0 : retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
964 : (unsigned long)params->min_size);
965 : else
966 0 : retsize = max(HASH_DEFAULT_SIZE,
967 : (unsigned long)params->min_size);
968 :
969 0 : return retsize;
970 : }
971 :
972 0 : static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
973 : {
974 0 : return jhash2(key, length, seed);
975 : }
976 :
977 : /**
978 : * rhashtable_init - initialize a new hash table
979 : * @ht: hash table to be initialized
980 : * @params: configuration parameters
981 : *
982 : * Initializes a new hash table based on the provided configuration
983 : * parameters. A table can be configured either with a variable or
984 : * fixed length key:
985 : *
986 : * Configuration Example 1: Fixed length keys
987 : * struct test_obj {
988 : * int key;
989 : * void * my_member;
990 : * struct rhash_head node;
991 : * };
992 : *
993 : * struct rhashtable_params params = {
994 : * .head_offset = offsetof(struct test_obj, node),
995 : * .key_offset = offsetof(struct test_obj, key),
996 : * .key_len = sizeof(int),
997 : * .hashfn = jhash,
998 : * };
999 : *
1000 : * Configuration Example 2: Variable length keys
1001 : * struct test_obj {
1002 : * [...]
1003 : * struct rhash_head node;
1004 : * };
1005 : *
1006 : * u32 my_hash_fn(const void *data, u32 len, u32 seed)
1007 : * {
1008 : * struct test_obj *obj = data;
1009 : *
1010 : * return [... hash ...];
1011 : * }
1012 : *
1013 : * struct rhashtable_params params = {
1014 : * .head_offset = offsetof(struct test_obj, node),
1015 : * .hashfn = jhash,
1016 : * .obj_hashfn = my_hash_fn,
1017 : * };
1018 : */
1019 0 : int rhashtable_init(struct rhashtable *ht,
1020 : const struct rhashtable_params *params)
1021 : {
1022 : struct bucket_table *tbl;
1023 : size_t size;
1024 :
1025 0 : if ((!params->key_len && !params->obj_hashfn) ||
1026 0 : (params->obj_hashfn && !params->obj_cmpfn))
1027 : return -EINVAL;
1028 :
1029 0 : memset(ht, 0, sizeof(*ht));
1030 0 : mutex_init(&ht->mutex);
1031 0 : spin_lock_init(&ht->lock);
1032 0 : memcpy(&ht->p, params, sizeof(*params));
1033 :
1034 0 : if (params->min_size)
1035 0 : ht->p.min_size = roundup_pow_of_two(params->min_size);
1036 :
1037 : /* Cap total entries at 2^31 to avoid nelems overflow. */
1038 0 : ht->max_elems = 1u << 31;
1039 :
1040 0 : if (params->max_size) {
1041 0 : ht->p.max_size = rounddown_pow_of_two(params->max_size);
1042 0 : if (ht->p.max_size < ht->max_elems / 2)
1043 0 : ht->max_elems = ht->p.max_size * 2;
1044 : }
1045 :
1046 0 : ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1047 :
1048 0 : size = rounded_hashtable_size(&ht->p);
1049 :
1050 0 : ht->key_len = ht->p.key_len;
1051 0 : if (!params->hashfn) {
1052 0 : ht->p.hashfn = jhash;
1053 :
1054 0 : if (!(ht->key_len & (sizeof(u32) - 1))) {
1055 0 : ht->key_len /= sizeof(u32);
1056 0 : ht->p.hashfn = rhashtable_jhash2;
1057 : }
1058 : }
1059 :
1060 : /*
1061 : * This is api initialization and thus we need to guarantee the
1062 : * initial rhashtable allocation. Upon failure, retry with the
1063 : * smallest possible size with __GFP_NOFAIL semantics.
1064 : */
1065 0 : tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
1066 0 : if (unlikely(tbl == NULL)) {
1067 0 : size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE);
1068 0 : tbl = bucket_table_alloc(ht, size, GFP_KERNEL | __GFP_NOFAIL);
1069 : }
1070 :
1071 0 : atomic_set(&ht->nelems, 0);
1072 :
1073 0 : RCU_INIT_POINTER(ht->tbl, tbl);
1074 :
1075 0 : INIT_WORK(&ht->run_work, rht_deferred_worker);
1076 :
1077 0 : return 0;
1078 : }
1079 : EXPORT_SYMBOL_GPL(rhashtable_init);
1080 :
1081 : /**
1082 : * rhltable_init - initialize a new hash list table
1083 : * @hlt: hash list table to be initialized
1084 : * @params: configuration parameters
1085 : *
1086 : * Initializes a new hash list table.
1087 : *
1088 : * See documentation for rhashtable_init.
1089 : */
1090 0 : int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
1091 : {
1092 : int err;
1093 :
1094 0 : err = rhashtable_init(&hlt->ht, params);
1095 0 : hlt->ht.rhlist = true;
1096 0 : return err;
1097 : }
1098 : EXPORT_SYMBOL_GPL(rhltable_init);
1099 :
1100 0 : static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
1101 : void (*free_fn)(void *ptr, void *arg),
1102 : void *arg)
1103 : {
1104 : struct rhlist_head *list;
1105 :
1106 0 : if (!ht->rhlist) {
1107 0 : free_fn(rht_obj(ht, obj), arg);
1108 0 : return;
1109 : }
1110 :
1111 : list = container_of(obj, struct rhlist_head, rhead);
1112 : do {
1113 0 : obj = &list->rhead;
1114 0 : list = rht_dereference(list->next, ht);
1115 0 : free_fn(rht_obj(ht, obj), arg);
1116 0 : } while (list);
1117 : }
1118 :
1119 : /**
1120 : * rhashtable_free_and_destroy - free elements and destroy hash table
1121 : * @ht: the hash table to destroy
1122 : * @free_fn: callback to release resources of element
1123 : * @arg: pointer passed to free_fn
1124 : *
1125 : * Stops an eventual async resize. If defined, invokes free_fn for each
1126 : * element to releasal resources. Please note that RCU protected
1127 : * readers may still be accessing the elements. Releasing of resources
1128 : * must occur in a compatible manner. Then frees the bucket array.
1129 : *
1130 : * This function will eventually sleep to wait for an async resize
1131 : * to complete. The caller is responsible that no further write operations
1132 : * occurs in parallel.
1133 : */
1134 0 : void rhashtable_free_and_destroy(struct rhashtable *ht,
1135 : void (*free_fn)(void *ptr, void *arg),
1136 : void *arg)
1137 : {
1138 : struct bucket_table *tbl, *next_tbl;
1139 : unsigned int i;
1140 :
1141 0 : cancel_work_sync(&ht->run_work);
1142 :
1143 0 : mutex_lock(&ht->mutex);
1144 0 : tbl = rht_dereference(ht->tbl, ht);
1145 : restart:
1146 0 : if (free_fn) {
1147 0 : for (i = 0; i < tbl->size; i++) {
1148 : struct rhash_head *pos, *next;
1149 :
1150 0 : cond_resched();
1151 0 : for (pos = rht_ptr_exclusive(rht_bucket(tbl, i)),
1152 0 : next = !rht_is_a_nulls(pos) ?
1153 0 : rht_dereference(pos->next, ht) : NULL;
1154 0 : !rht_is_a_nulls(pos);
1155 0 : pos = next,
1156 0 : next = !rht_is_a_nulls(pos) ?
1157 0 : rht_dereference(pos->next, ht) : NULL)
1158 0 : rhashtable_free_one(ht, pos, free_fn, arg);
1159 : }
1160 : }
1161 :
1162 0 : next_tbl = rht_dereference(tbl->future_tbl, ht);
1163 0 : bucket_table_free(tbl);
1164 0 : if (next_tbl) {
1165 : tbl = next_tbl;
1166 : goto restart;
1167 : }
1168 0 : mutex_unlock(&ht->mutex);
1169 0 : }
1170 : EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy);
1171 :
1172 0 : void rhashtable_destroy(struct rhashtable *ht)
1173 : {
1174 0 : return rhashtable_free_and_destroy(ht, NULL, NULL);
1175 : }
1176 : EXPORT_SYMBOL_GPL(rhashtable_destroy);
1177 :
1178 0 : struct rhash_lock_head __rcu **__rht_bucket_nested(
1179 : const struct bucket_table *tbl, unsigned int hash)
1180 : {
1181 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1182 0 : unsigned int index = hash & ((1 << tbl->nest) - 1);
1183 0 : unsigned int size = tbl->size >> tbl->nest;
1184 0 : unsigned int subhash = hash;
1185 : union nested_table *ntbl;
1186 :
1187 0 : ntbl = nested_table_top(tbl);
1188 0 : ntbl = rht_dereference_bucket_rcu(ntbl[index].table, tbl, hash);
1189 0 : subhash >>= tbl->nest;
1190 :
1191 0 : while (ntbl && size > (1 << shift)) {
1192 0 : index = subhash & ((1 << shift) - 1);
1193 0 : ntbl = rht_dereference_bucket_rcu(ntbl[index].table,
1194 : tbl, hash);
1195 0 : size >>= shift;
1196 0 : subhash >>= shift;
1197 : }
1198 :
1199 0 : if (!ntbl)
1200 : return NULL;
1201 :
1202 0 : return &ntbl[subhash].bucket;
1203 :
1204 : }
1205 : EXPORT_SYMBOL_GPL(__rht_bucket_nested);
1206 :
1207 0 : struct rhash_lock_head __rcu **rht_bucket_nested(
1208 : const struct bucket_table *tbl, unsigned int hash)
1209 : {
1210 : static struct rhash_lock_head __rcu *rhnull;
1211 :
1212 0 : if (!rhnull)
1213 0 : INIT_RHT_NULLS_HEAD(rhnull);
1214 0 : return __rht_bucket_nested(tbl, hash) ?: &rhnull;
1215 : }
1216 : EXPORT_SYMBOL_GPL(rht_bucket_nested);
1217 :
1218 0 : struct rhash_lock_head __rcu **rht_bucket_nested_insert(
1219 : struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
1220 : {
1221 0 : const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *));
1222 0 : unsigned int index = hash & ((1 << tbl->nest) - 1);
1223 0 : unsigned int size = tbl->size >> tbl->nest;
1224 : union nested_table *ntbl;
1225 :
1226 0 : ntbl = nested_table_top(tbl);
1227 0 : hash >>= tbl->nest;
1228 0 : ntbl = nested_table_alloc(ht, &ntbl[index].table,
1229 : size <= (1 << shift));
1230 :
1231 0 : while (ntbl && size > (1 << shift)) {
1232 0 : index = hash & ((1 << shift) - 1);
1233 0 : size >>= shift;
1234 0 : hash >>= shift;
1235 0 : ntbl = nested_table_alloc(ht, &ntbl[index].table,
1236 : size <= (1 << shift));
1237 : }
1238 :
1239 0 : if (!ntbl)
1240 : return NULL;
1241 :
1242 0 : return &ntbl[hash].bucket;
1243 :
1244 : }
1245 : EXPORT_SYMBOL_GPL(rht_bucket_nested_insert);
|