LCOV - code coverage report
Current view: top level - lib - radix-tree.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 276 505 54.7 %
Date: 2023-04-06 08:38:28 Functions: 23 41 56.1 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : /*
       3             :  * Copyright (C) 2001 Momchil Velikov
       4             :  * Portions Copyright (C) 2001 Christoph Hellwig
       5             :  * Copyright (C) 2005 SGI, Christoph Lameter
       6             :  * Copyright (C) 2006 Nick Piggin
       7             :  * Copyright (C) 2012 Konstantin Khlebnikov
       8             :  * Copyright (C) 2016 Intel, Matthew Wilcox
       9             :  * Copyright (C) 2016 Intel, Ross Zwisler
      10             :  */
      11             : 
      12             : #include <linux/bitmap.h>
      13             : #include <linux/bitops.h>
      14             : #include <linux/bug.h>
      15             : #include <linux/cpu.h>
      16             : #include <linux/errno.h>
      17             : #include <linux/export.h>
      18             : #include <linux/idr.h>
      19             : #include <linux/init.h>
      20             : #include <linux/kernel.h>
      21             : #include <linux/kmemleak.h>
      22             : #include <linux/percpu.h>
      23             : #include <linux/preempt.h>                /* in_interrupt() */
      24             : #include <linux/radix-tree.h>
      25             : #include <linux/rcupdate.h>
      26             : #include <linux/slab.h>
      27             : #include <linux/string.h>
      28             : #include <linux/xarray.h>
      29             : 
      30             : /*
      31             :  * Radix tree node cache.
      32             :  */
      33             : struct kmem_cache *radix_tree_node_cachep;
      34             : 
      35             : /*
      36             :  * The radix tree is variable-height, so an insert operation not only has
      37             :  * to build the branch to its corresponding item, it also has to build the
      38             :  * branch to existing items if the size has to be increased (by
      39             :  * radix_tree_extend).
      40             :  *
      41             :  * The worst case is a zero height tree with just a single item at index 0,
      42             :  * and then inserting an item at index ULONG_MAX. This requires 2 new branches
      43             :  * of RADIX_TREE_MAX_PATH size to be created, with only the root node shared.
      44             :  * Hence:
      45             :  */
      46             : #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
      47             : 
      48             : /*
      49             :  * The IDR does not have to be as high as the radix tree since it uses
      50             :  * signed integers, not unsigned longs.
      51             :  */
      52             : #define IDR_INDEX_BITS          (8 /* CHAR_BIT */ * sizeof(int) - 1)
      53             : #define IDR_MAX_PATH            (DIV_ROUND_UP(IDR_INDEX_BITS, \
      54             :                                                 RADIX_TREE_MAP_SHIFT))
      55             : #define IDR_PRELOAD_SIZE        (IDR_MAX_PATH * 2 - 1)
      56             : 
      57             : /*
      58             :  * Per-cpu pool of preloaded nodes
      59             :  */
      60             : DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = {
      61             :         .lock = INIT_LOCAL_LOCK(lock),
      62             : };
      63             : EXPORT_PER_CPU_SYMBOL_GPL(radix_tree_preloads);
      64             : 
      65             : static inline struct radix_tree_node *entry_to_node(void *ptr)
      66             : {
      67       41221 :         return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE);
      68             : }
      69             : 
      70             : static inline void *node_to_entry(void *ptr)
      71             : {
      72       12101 :         return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
      73             : }
      74             : 
      75             : #define RADIX_TREE_RETRY        XA_RETRY_ENTRY
      76             : 
      77             : static inline unsigned long
      78             : get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
      79             : {
      80       11401 :         return parent ? slot - parent->slots : 0;
      81             : }
      82             : 
      83             : static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
      84             :                         struct radix_tree_node **nodep, unsigned long index)
      85             : {
      86       26893 :         unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
      87       26893 :         void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
      88             : 
      89       26893 :         *nodep = (void *)entry;
      90             :         return offset;
      91             : }
      92             : 
      93             : static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
      94             : {
      95           0 :         return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
      96             : }
      97             : 
      98             : static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
      99             :                 int offset)
     100             : {
     101        2796 :         __set_bit(offset, node->tags[tag]);
     102             : }
     103             : 
     104             : static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
     105             :                 int offset)
     106             : {
     107       19500 :         __clear_bit(offset, node->tags[tag]);
     108             : }
     109             : 
     110       45546 : static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
     111             :                 int offset)
     112             : {
     113       91662 :         return test_bit(offset, node->tags[tag]);
     114             : }
     115             : 
     116             : static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
     117             : {
     118          34 :         root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
     119             : }
     120             : 
     121             : static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
     122             : {
     123           0 :         root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
     124             : }
     125             : 
     126             : static inline void root_tag_clear_all(struct radix_tree_root *root)
     127             : {
     128           0 :         root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
     129             : }
     130             : 
     131             : static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
     132             : {
     133       10942 :         return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
     134             : }
     135             : 
     136             : static inline unsigned root_tags_get(const struct radix_tree_root *root)
     137             : {
     138           0 :         return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
     139             : }
     140             : 
     141             : static inline bool is_idr(const struct radix_tree_root *root)
     142             : {
     143       12028 :         return !!(root->xa_flags & ROOT_IS_IDR);
     144             : }
     145             : 
     146             : /*
     147             :  * Returns 1 if any slot in the node has this tag set.
     148             :  * Otherwise returns 0.
     149             :  */
     150             : static inline int any_tag_set(const struct radix_tree_node *node,
     151             :                                                         unsigned int tag)
     152             : {
     153             :         unsigned idx;
     154         125 :         for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
     155        9750 :                 if (node->tags[tag][idx])
     156             :                         return 1;
     157             :         }
     158             :         return 0;
     159             : }
     160             : 
     161             : static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
     162             : {
     163        1526 :         bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
     164             : }
     165             : 
     166             : /**
     167             :  * radix_tree_find_next_bit - find the next set bit in a memory region
     168             :  *
     169             :  * @node: where to begin the search
     170             :  * @tag: the tag index
     171             :  * @offset: the bitnumber to start searching at
     172             :  *
     173             :  * Unrollable variant of find_next_bit() for constant size arrays.
     174             :  * Tail bits starting from size to roundup(size, BITS_PER_LONG) must be zero.
     175             :  * Returns next bit offset, or size if nothing found.
     176             :  */
     177             : static __always_inline unsigned long
     178             : radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
     179             :                          unsigned long offset)
     180             : {
     181         598 :         const unsigned long *addr = node->tags[tag];
     182             : 
     183         598 :         if (offset < RADIX_TREE_MAP_SIZE) {
     184             :                 unsigned long tmp;
     185             : 
     186         598 :                 addr += offset / BITS_PER_LONG;
     187         598 :                 tmp = *addr >> (offset % BITS_PER_LONG);
     188         598 :                 if (tmp)
     189        1196 :                         return __ffs(tmp) + offset;
     190           0 :                 offset = (offset + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
     191           0 :                 while (offset < RADIX_TREE_MAP_SIZE) {
     192           0 :                         tmp = *++addr;
     193           0 :                         if (tmp)
     194           0 :                                 return __ffs(tmp) + offset;
     195           0 :                         offset += BITS_PER_LONG;
     196             :                 }
     197             :         }
     198             :         return RADIX_TREE_MAP_SIZE;
     199             : }
     200             : 
     201             : static unsigned int iter_offset(const struct radix_tree_iter *iter)
     202             : {
     203        9625 :         return iter->index & RADIX_TREE_MAP_MASK;
     204             : }
     205             : 
     206             : /*
     207             :  * The maximum index which can be stored in a radix tree
     208             :  */
     209             : static inline unsigned long shift_maxindex(unsigned int shift)
     210             : {
     211       22230 :         return (RADIX_TREE_MAP_SIZE << shift) - 1;
     212             : }
     213             : 
     214             : static inline unsigned long node_maxindex(const struct radix_tree_node *node)
     215             : {
     216       43882 :         return shift_maxindex(node->shift);
     217             : }
     218             : 
     219             : static unsigned long next_index(unsigned long index,
     220             :                                 const struct radix_tree_node *node,
     221             :                                 unsigned long offset)
     222             : {
     223        1196 :         return (index & ~node_maxindex(node)) + (offset << node->shift);
     224             : }
     225             : 
     226             : /*
     227             :  * This assumes that the caller has performed appropriate preallocation, and
     228             :  * that the caller has pinned this thread of control to the current CPU.
     229             :  */
     230             : static struct radix_tree_node *
     231         763 : radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
     232             :                         struct radix_tree_root *root,
     233             :                         unsigned int shift, unsigned int offset,
     234             :                         unsigned int count, unsigned int nr_values)
     235             : {
     236         763 :         struct radix_tree_node *ret = NULL;
     237             : 
     238             :         /*
     239             :          * Preload code isn't irq safe and it doesn't make sense to use
     240             :          * preloading during an interrupt anyway as all the allocations have
     241             :          * to be atomic. So just do normal allocation when in interrupt.
     242             :          */
     243        1508 :         if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
     244             :                 struct radix_tree_preload *rtp;
     245             : 
     246             :                 /*
     247             :                  * Even if the caller has preloaded, try to allocate from the
     248             :                  * cache first for the new node to get accounted to the memory
     249             :                  * cgroup.
     250             :                  */
     251         745 :                 ret = kmem_cache_alloc(radix_tree_node_cachep,
     252             :                                        gfp_mask | __GFP_NOWARN);
     253         745 :                 if (ret)
     254             :                         goto out;
     255             : 
     256             :                 /*
     257             :                  * Provided the caller has preloaded here, we will always
     258             :                  * succeed in getting a node here (and never reach
     259             :                  * kmem_cache_alloc)
     260             :                  */
     261           0 :                 rtp = this_cpu_ptr(&radix_tree_preloads);
     262           0 :                 if (rtp->nr) {
     263           0 :                         ret = rtp->nodes;
     264           0 :                         rtp->nodes = ret->parent;
     265           0 :                         rtp->nr--;
     266             :                 }
     267             :                 /*
     268             :                  * Update the allocation stack trace as this is more useful
     269             :                  * for debugging.
     270             :                  */
     271             :                 kmemleak_update_trace(ret);
     272             :                 goto out;
     273             :         }
     274          18 :         ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
     275             : out:
     276         763 :         BUG_ON(radix_tree_is_internal_node(ret));
     277         763 :         if (ret) {
     278         763 :                 ret->shift = shift;
     279         763 :                 ret->offset = offset;
     280         763 :                 ret->count = count;
     281         763 :                 ret->nr_values = nr_values;
     282         763 :                 ret->parent = parent;
     283         763 :                 ret->array = root;
     284             :         }
     285         763 :         return ret;
     286             : }
     287             : 
     288         624 : void radix_tree_node_rcu_free(struct rcu_head *head)
     289             : {
     290         624 :         struct radix_tree_node *node =
     291         624 :                         container_of(head, struct radix_tree_node, rcu_head);
     292             : 
     293             :         /*
     294             :          * Must only free zeroed nodes into the slab.  We can be left with
     295             :          * non-NULL entries by radix_tree_free_nodes, so clear the entries
     296             :          * and tags here.
     297             :          */
     298         624 :         memset(node->slots, 0, sizeof(node->slots));
     299         624 :         memset(node->tags, 0, sizeof(node->tags));
     300        1248 :         INIT_LIST_HEAD(&node->private_list);
     301             : 
     302         624 :         kmem_cache_free(radix_tree_node_cachep, node);
     303         624 : }
     304             : 
     305             : static inline void
     306             : radix_tree_node_free(struct radix_tree_node *node)
     307             : {
     308         626 :         call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
     309             : }
     310             : 
     311             : /*
     312             :  * Load up this CPU's radix_tree_node buffer with sufficient objects to
     313             :  * ensure that the addition of a single element in the tree cannot fail.  On
     314             :  * success, return zero, with preemption disabled.  On error, return -ENOMEM
     315             :  * with preemption not disabled.
     316             :  *
     317             :  * To make use of this facility, the radix tree must be initialised without
     318             :  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
     319             :  */
     320        9009 : static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
     321             : {
     322             :         struct radix_tree_preload *rtp;
     323             :         struct radix_tree_node *node;
     324        9009 :         int ret = -ENOMEM;
     325             : 
     326             :         /*
     327             :          * Nodes preloaded by one cgroup can be used by another cgroup, so
     328             :          * they should never be accounted to any particular memory cgroup.
     329             :          */
     330        9009 :         gfp_mask &= ~__GFP_ACCOUNT;
     331             : 
     332        9009 :         local_lock(&radix_tree_preloads.lock);
     333        9009 :         rtp = this_cpu_ptr(&radix_tree_preloads);
     334       18029 :         while (rtp->nr < nr) {
     335          11 :                 local_unlock(&radix_tree_preloads.lock);
     336          11 :                 node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
     337          11 :                 if (node == NULL)
     338             :                         goto out;
     339          11 :                 local_lock(&radix_tree_preloads.lock);
     340          11 :                 rtp = this_cpu_ptr(&radix_tree_preloads);
     341          11 :                 if (rtp->nr < nr) {
     342          11 :                         node->parent = rtp->nodes;
     343          11 :                         rtp->nodes = node;
     344          11 :                         rtp->nr++;
     345             :                 } else {
     346           0 :                         kmem_cache_free(radix_tree_node_cachep, node);
     347             :                 }
     348             :         }
     349             :         ret = 0;
     350             : out:
     351        9009 :         return ret;
     352             : }
     353             : 
     354             : /*
     355             :  * Load up this CPU's radix_tree_node buffer with sufficient objects to
     356             :  * ensure that the addition of a single element in the tree cannot fail.  On
     357             :  * success, return zero, with preemption disabled.  On error, return -ENOMEM
     358             :  * with preemption not disabled.
     359             :  *
     360             :  * To make use of this facility, the radix tree must be initialised without
     361             :  * __GFP_DIRECT_RECLAIM being passed to INIT_RADIX_TREE().
     362             :  */
     363           0 : int radix_tree_preload(gfp_t gfp_mask)
     364             : {
     365             :         /* Warn on non-sensical use... */
     366           0 :         WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
     367           0 :         return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
     368             : }
     369             : EXPORT_SYMBOL(radix_tree_preload);
     370             : 
     371             : /*
     372             :  * The same as above function, except we don't guarantee preloading happens.
     373             :  * We do it, if we decide it helps. On success, return zero with preemption
     374             :  * disabled. On error, return -ENOMEM with preemption not disabled.
     375             :  */
     376           0 : int radix_tree_maybe_preload(gfp_t gfp_mask)
     377             : {
     378           0 :         if (gfpflags_allow_blocking(gfp_mask))
     379           0 :                 return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
     380             :         /* Preloading doesn't help anything with this gfp mask, skip it */
     381           0 :         local_lock(&radix_tree_preloads.lock);
     382           0 :         return 0;
     383             : }
     384             : EXPORT_SYMBOL(radix_tree_maybe_preload);
     385             : 
     386             : static unsigned radix_tree_load_root(const struct radix_tree_root *root,
     387             :                 struct radix_tree_node **nodep, unsigned long *maxindex)
     388             : {
     389       11755 :         struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
     390             : 
     391       11755 :         *nodep = node;
     392             : 
     393       11755 :         if (likely(radix_tree_is_internal_node(node))) {
     394       11718 :                 node = entry_to_node(node);
     395       11718 :                 *maxindex = node_maxindex(node);
     396        9588 :                 return node->shift + RADIX_TREE_MAP_SHIFT;
     397             :         }
     398             : 
     399             :         *maxindex = 0;
     400             :         return 0;
     401             : }
     402             : 
     403             : /*
     404             :  *      Extend a radix tree so it can store key @index.
     405             :  */
     406         289 : static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
     407             :                                 unsigned long index, unsigned int shift)
     408             : {
     409             :         void *entry;
     410             :         unsigned int maxshift;
     411             :         int tag;
     412             : 
     413             :         /* Figure out what the shift should be.  */
     414         289 :         maxshift = shift;
     415         578 :         while (index > shift_maxindex(maxshift))
     416           0 :                 maxshift += RADIX_TREE_MAP_SHIFT;
     417             : 
     418         289 :         entry = rcu_dereference_raw(root->xa_head);
     419         293 :         if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
     420             :                 goto out;
     421             : 
     422             :         do {
     423         287 :                 struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
     424             :                                                         root, shift, 0, 1, 0);
     425         287 :                 if (!node)
     426             :                         return -ENOMEM;
     427             : 
     428         574 :                 if (is_idr(root)) {
     429         287 :                         all_tag_set(node, IDR_FREE);
     430         574 :                         if (!root_tag_get(root, IDR_FREE)) {
     431           0 :                                 tag_clear(node, IDR_FREE, 0);
     432           0 :                                 root_tag_set(root, IDR_FREE);
     433             :                         }
     434             :                 } else {
     435             :                         /* Propagate the aggregated tag info to the new child */
     436           0 :                         for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
     437           0 :                                 if (root_tag_get(root, tag))
     438           0 :                                         tag_set(node, tag, 0);
     439             :                         }
     440             :                 }
     441             : 
     442         287 :                 BUG_ON(shift > BITS_PER_LONG);
     443         287 :                 if (radix_tree_is_internal_node(entry)) {
     444         287 :                         entry_to_node(entry)->parent = node;
     445           0 :                 } else if (xa_is_value(entry)) {
     446             :                         /* Moving a value entry root->xa_head to a node */
     447           0 :                         node->nr_values = 1;
     448             :                 }
     449             :                 /*
     450             :                  * entry was already in the radix tree, so we do not need
     451             :                  * rcu_assign_pointer here
     452             :                  */
     453         287 :                 node->slots[0] = (void __rcu *)entry;
     454         287 :                 entry = node_to_entry(node);
     455         287 :                 rcu_assign_pointer(root->xa_head, entry);
     456         287 :                 shift += RADIX_TREE_MAP_SHIFT;
     457         287 :         } while (shift <= maxshift);
     458             : out:
     459         289 :         return maxshift + RADIX_TREE_MAP_SHIFT;
     460             : }
     461             : 
     462             : /**
     463             :  *      radix_tree_shrink    -    shrink radix tree to minimum height
     464             :  *      @root:          radix tree root
     465             :  */
     466        1753 : static inline bool radix_tree_shrink(struct radix_tree_root *root)
     467             : {
     468        1753 :         bool shrunk = false;
     469             : 
     470         285 :         for (;;) {
     471        2038 :                 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
     472             :                 struct radix_tree_node *child;
     473             : 
     474        2038 :                 if (!radix_tree_is_internal_node(node))
     475             :                         break;
     476        2038 :                 node = entry_to_node(node);
     477             : 
     478             :                 /*
     479             :                  * The candidate node has more than one child, or its child
     480             :                  * is not at the leftmost slot, we cannot shrink.
     481             :                  */
     482        2038 :                 if (node->count != 1)
     483             :                         break;
     484         342 :                 child = rcu_dereference_raw(node->slots[0]);
     485         342 :                 if (!child)
     486             :                         break;
     487             : 
     488             :                 /*
     489             :                  * For an IDR, we must not shrink entry 0 into the root in
     490             :                  * case somebody calls idr_replace() with a pointer that
     491             :                  * appears to be an internal entry
     492             :                  */
     493         323 :                 if (!node->shift && is_idr(root))
     494             :                         break;
     495             : 
     496         285 :                 if (radix_tree_is_internal_node(child))
     497         285 :                         entry_to_node(child)->parent = NULL;
     498             : 
     499             :                 /*
     500             :                  * We don't need rcu_assign_pointer(), since we are simply
     501             :                  * moving the node from one part of the tree to another: if it
     502             :                  * was safe to dereference the old pointer to it
     503             :                  * (node->slots[0]), it will be safe to dereference the new
     504             :                  * one (root->xa_head) as far as dependent read barriers go.
     505             :                  */
     506         285 :                 root->xa_head = (void __rcu *)child;
     507         855 :                 if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
     508           0 :                         root_tag_clear(root, IDR_FREE);
     509             : 
     510             :                 /*
     511             :                  * We have a dilemma here. The node's slot[0] must not be
     512             :                  * NULLed in case there are concurrent lookups expecting to
     513             :                  * find the item. However if this was a bottom-level node,
     514             :                  * then it may be subject to the slot pointer being visible
     515             :                  * to callers dereferencing it. If item corresponding to
     516             :                  * slot[0] is subsequently deleted, these callers would expect
     517             :                  * their slot to become empty sooner or later.
     518             :                  *
     519             :                  * For example, lockless pagecache will look up a slot, deref
     520             :                  * the page pointer, and if the page has 0 refcount it means it
     521             :                  * was concurrently deleted from pagecache so try the deref
     522             :                  * again. Fortunately there is already a requirement for logic
     523             :                  * to retry the entire slot lookup -- the indirect pointer
     524             :                  * problem (replacing direct root node with an indirect pointer
     525             :                  * also results in a stale slot). So tag the slot as indirect
     526             :                  * to force callers to retry.
     527             :                  */
     528         285 :                 node->count = 0;
     529         285 :                 if (!radix_tree_is_internal_node(child)) {
     530           0 :                         node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
     531             :                 }
     532             : 
     533         570 :                 WARN_ON_ONCE(!list_empty(&node->private_list));
     534         285 :                 radix_tree_node_free(node);
     535         285 :                 shrunk = true;
     536             :         }
     537             : 
     538        1753 :         return shrunk;
     539             : }
     540             : 
     541       11372 : static bool delete_node(struct radix_tree_root *root,
     542             :                         struct radix_tree_node *node)
     543             : {
     544       11372 :         bool deleted = false;
     545             : 
     546             :         do {
     547             :                 struct radix_tree_node *parent;
     548             : 
     549       11679 :                 if (node->count) {
     550       22676 :                         if (node_to_entry(node) ==
     551       11338 :                                         rcu_dereference_raw(root->xa_head))
     552        1753 :                                 deleted |= radix_tree_shrink(root);
     553             :                         return deleted;
     554             :                 }
     555             : 
     556         341 :                 parent = node->parent;
     557         341 :                 if (parent) {
     558         307 :                         parent->slots[node->offset] = NULL;
     559         307 :                         parent->count--;
     560             :                 } else {
     561             :                         /*
     562             :                          * Shouldn't the tags already have all been cleared
     563             :                          * by the caller?
     564             :                          */
     565          68 :                         if (!is_idr(root))
     566           0 :                                 root_tag_clear_all(root);
     567          34 :                         root->xa_head = NULL;
     568             :                 }
     569             : 
     570         682 :                 WARN_ON_ONCE(!list_empty(&node->private_list));
     571         341 :                 radix_tree_node_free(node);
     572         341 :                 deleted = true;
     573             : 
     574         341 :                 node = parent;
     575         341 :         } while (node);
     576             : 
     577             :         return deleted;
     578             : }
     579             : 
     580             : /**
     581             :  *      __radix_tree_create     -       create a slot in a radix tree
     582             :  *      @root:          radix tree root
     583             :  *      @index:         index key
     584             :  *      @nodep:         returns node
     585             :  *      @slotp:         returns slot
     586             :  *
     587             :  *      Create, if necessary, and return the node and slot for an item
     588             :  *      at position @index in the radix tree @root.
     589             :  *
     590             :  *      Until there is more than one item in the tree, no nodes are
     591             :  *      allocated and @root->xa_head is used as a direct slot instead of
     592             :  *      pointing to a node, in which case *@nodep will be NULL.
     593             :  *
     594             :  *      Returns -ENOMEM, or 0 for success.
     595             :  */
     596           0 : static int __radix_tree_create(struct radix_tree_root *root,
     597             :                 unsigned long index, struct radix_tree_node **nodep,
     598             :                 void __rcu ***slotp)
     599             : {
     600           0 :         struct radix_tree_node *node = NULL, *child;
     601           0 :         void __rcu **slot = (void __rcu **)&root->xa_head;
     602             :         unsigned long maxindex;
     603           0 :         unsigned int shift, offset = 0;
     604           0 :         unsigned long max = index;
     605           0 :         gfp_t gfp = root_gfp_mask(root);
     606             : 
     607           0 :         shift = radix_tree_load_root(root, &child, &maxindex);
     608             : 
     609             :         /* Make sure the tree is high enough.  */
     610           0 :         if (max > maxindex) {
     611           0 :                 int error = radix_tree_extend(root, gfp, max, shift);
     612           0 :                 if (error < 0)
     613             :                         return error;
     614           0 :                 shift = error;
     615           0 :                 child = rcu_dereference_raw(root->xa_head);
     616             :         }
     617             : 
     618           0 :         while (shift > 0) {
     619           0 :                 shift -= RADIX_TREE_MAP_SHIFT;
     620           0 :                 if (child == NULL) {
     621             :                         /* Have to add a child node.  */
     622           0 :                         child = radix_tree_node_alloc(gfp, node, root, shift,
     623             :                                                         offset, 0, 0);
     624           0 :                         if (!child)
     625             :                                 return -ENOMEM;
     626           0 :                         rcu_assign_pointer(*slot, node_to_entry(child));
     627           0 :                         if (node)
     628           0 :                                 node->count++;
     629           0 :                 } else if (!radix_tree_is_internal_node(child))
     630             :                         break;
     631             : 
     632             :                 /* Go a level down */
     633           0 :                 node = entry_to_node(child);
     634           0 :                 offset = radix_tree_descend(node, &child, index);
     635           0 :                 slot = &node->slots[offset];
     636             :         }
     637             : 
     638           0 :         if (nodep)
     639           0 :                 *nodep = node;
     640           0 :         if (slotp)
     641           0 :                 *slotp = slot;
     642             :         return 0;
     643             : }
     644             : 
     645             : /*
     646             :  * Free any nodes below this node.  The tree is presumed to not need
     647             :  * shrinking, and any user data in the tree is presumed to not need a
     648             :  * destructor called on it.  If we need to add a destructor, we can
     649             :  * add that functionality later.  Note that we may not clear tags or
     650             :  * slots from the tree as an RCU walker may still have a pointer into
     651             :  * this subtree.  We could replace the entries with RADIX_TREE_RETRY,
     652             :  * but we'll still have to clear those in rcu_free.
     653             :  */
     654           0 : static void radix_tree_free_nodes(struct radix_tree_node *node)
     655             : {
     656           0 :         unsigned offset = 0;
     657           0 :         struct radix_tree_node *child = entry_to_node(node);
     658             : 
     659             :         for (;;) {
     660           0 :                 void *entry = rcu_dereference_raw(child->slots[offset]);
     661           0 :                 if (xa_is_node(entry) && child->shift) {
     662           0 :                         child = entry_to_node(entry);
     663           0 :                         offset = 0;
     664           0 :                         continue;
     665             :                 }
     666           0 :                 offset++;
     667           0 :                 while (offset == RADIX_TREE_MAP_SIZE) {
     668           0 :                         struct radix_tree_node *old = child;
     669           0 :                         offset = child->offset + 1;
     670           0 :                         child = child->parent;
     671           0 :                         WARN_ON_ONCE(!list_empty(&old->private_list));
     672           0 :                         radix_tree_node_free(old);
     673           0 :                         if (old == entry_to_node(node))
     674           0 :                                 return;
     675             :                 }
     676             :         }
     677             : }
     678             : 
     679             : static inline int insert_entries(struct radix_tree_node *node,
     680             :                 void __rcu **slot, void *item)
     681             : {
     682           0 :         if (*slot)
     683             :                 return -EEXIST;
     684           0 :         rcu_assign_pointer(*slot, item);
     685           0 :         if (node) {
     686           0 :                 node->count++;
     687           0 :                 if (xa_is_value(item))
     688           0 :                         node->nr_values++;
     689             :         }
     690             :         return 1;
     691             : }
     692             : 
     693             : /**
     694             :  *      radix_tree_insert    -    insert into a radix tree
     695             :  *      @root:          radix tree root
     696             :  *      @index:         index key
     697             :  *      @item:          item to insert
     698             :  *
     699             :  *      Insert an item into the radix tree at position @index.
     700             :  */
     701           0 : int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
     702             :                         void *item)
     703             : {
     704             :         struct radix_tree_node *node;
     705             :         void __rcu **slot;
     706             :         int error;
     707             : 
     708           0 :         BUG_ON(radix_tree_is_internal_node(item));
     709             : 
     710           0 :         error = __radix_tree_create(root, index, &node, &slot);
     711           0 :         if (error)
     712             :                 return error;
     713             : 
     714           0 :         error = insert_entries(node, slot, item);
     715           0 :         if (error < 0)
     716             :                 return error;
     717             : 
     718           0 :         if (node) {
     719           0 :                 unsigned offset = get_slot_offset(node, slot);
     720           0 :                 BUG_ON(tag_get(node, 0, offset));
     721           0 :                 BUG_ON(tag_get(node, 1, offset));
     722           0 :                 BUG_ON(tag_get(node, 2, offset));
     723             :         } else {
     724           0 :                 BUG_ON(root_tags_get(root));
     725             :         }
     726             : 
     727             :         return 0;
     728             : }
     729             : EXPORT_SYMBOL(radix_tree_insert);
     730             : 
     731             : /**
     732             :  *      __radix_tree_lookup     -       lookup an item in a radix tree
     733             :  *      @root:          radix tree root
     734             :  *      @index:         index key
     735             :  *      @nodep:         returns node
     736             :  *      @slotp:         returns slot
     737             :  *
     738             :  *      Lookup and return the item at position @index in the radix
     739             :  *      tree @root.
     740             :  *
     741             :  *      Until there is more than one item in the tree, no nodes are
     742             :  *      allocated and @root->xa_head is used as a direct slot instead of
     743             :  *      pointing to a node, in which case *@nodep will be NULL.
     744             :  */
     745        1780 : void *__radix_tree_lookup(const struct radix_tree_root *root,
     746             :                           unsigned long index, struct radix_tree_node **nodep,
     747             :                           void __rcu ***slotp)
     748             : {
     749             :         struct radix_tree_node *node, *parent;
     750             :         unsigned long maxindex;
     751             :         void __rcu **slot;
     752             : 
     753             :  restart:
     754        1780 :         parent = NULL;
     755        1780 :         slot = (void __rcu **)&root->xa_head;
     756        1780 :         radix_tree_load_root(root, &node, &maxindex);
     757        1780 :         if (index > maxindex)
     758             :                 return NULL;
     759             : 
     760        6436 :         while (radix_tree_is_internal_node(node)) {
     761             :                 unsigned offset;
     762             : 
     763        6436 :                 parent = entry_to_node(node);
     764        3218 :                 offset = radix_tree_descend(parent, &node, index);
     765        3218 :                 slot = parent->slots + offset;
     766        3218 :                 if (node == RADIX_TREE_RETRY)
     767             :                         goto restart;
     768        3218 :                 if (parent->shift == 0)
     769             :                         break;
     770             :         }
     771             : 
     772        1780 :         if (nodep)
     773        1747 :                 *nodep = parent;
     774        1780 :         if (slotp)
     775        1747 :                 *slotp = slot;
     776             :         return node;
     777             : }
     778             : 
     779             : /**
     780             :  *      radix_tree_lookup_slot    -    lookup a slot in a radix tree
     781             :  *      @root:          radix tree root
     782             :  *      @index:         index key
     783             :  *
     784             :  *      Returns:  the slot corresponding to the position @index in the
     785             :  *      radix tree @root. This is useful for update-if-exists operations.
     786             :  *
     787             :  *      This function can be called under rcu_read_lock iff the slot is not
     788             :  *      modified by radix_tree_replace_slot, otherwise it must be called
     789             :  *      exclusive from other writers. Any dereference of the slot must be done
     790             :  *      using radix_tree_deref_slot.
     791             :  */
     792           0 : void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *root,
     793             :                                 unsigned long index)
     794             : {
     795             :         void __rcu **slot;
     796             : 
     797           0 :         if (!__radix_tree_lookup(root, index, NULL, &slot))
     798             :                 return NULL;
     799           0 :         return slot;
     800             : }
     801             : EXPORT_SYMBOL(radix_tree_lookup_slot);
     802             : 
     803             : /**
     804             :  *      radix_tree_lookup    -    perform lookup operation on a radix tree
     805             :  *      @root:          radix tree root
     806             :  *      @index:         index key
     807             :  *
     808             :  *      Lookup the item at the position @index in the radix tree @root.
     809             :  *
     810             :  *      This function can be called under rcu_read_lock, however the caller
     811             :  *      must manage lifetimes of leaf nodes (eg. RCU may also be used to free
     812             :  *      them safely). No RCU barriers are required to access or modify the
     813             :  *      returned item, however.
     814             :  */
     815          33 : void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
     816             : {
     817          33 :         return __radix_tree_lookup(root, index, NULL, NULL);
     818             : }
     819             : EXPORT_SYMBOL(radix_tree_lookup);
     820             : 
     821             : static void replace_slot(void __rcu **slot, void *item,
     822             :                 struct radix_tree_node *node, int count, int values)
     823             : {
     824       11372 :         if (node && (count || values)) {
     825       11022 :                 node->count += count;
     826       11022 :                 node->nr_values += values;
     827             :         }
     828             : 
     829       11372 :         rcu_assign_pointer(*slot, item);
     830             : }
     831             : 
     832             : static bool node_tag_get(const struct radix_tree_root *root,
     833             :                                 const struct radix_tree_node *node,
     834             :                                 unsigned int tag, unsigned int offset)
     835             : {
     836       10004 :         if (node)
     837       10004 :                 return tag_get(node, tag, offset);
     838           0 :         return root_tag_get(root, tag);
     839             : }
     840             : 
     841             : /*
     842             :  * IDR users want to be able to store NULL in the tree, so if the slot isn't
     843             :  * free, don't adjust the count, even if it's transitioning between NULL and
     844             :  * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
     845             :  * have empty bits, but it only stores NULL in slots when they're being
     846             :  * deleted.
     847             :  */
     848        9975 : static int calculate_count(struct radix_tree_root *root,
     849             :                                 struct radix_tree_node *node, void __rcu **slot,
     850             :                                 void *item, void *old)
     851             : {
     852       19950 :         if (is_idr(root)) {
     853        9975 :                 unsigned offset = get_slot_offset(node, slot);
     854        9975 :                 bool free = node_tag_get(root, node, IDR_FREE, offset);
     855        9975 :                 if (!free)
     856             :                         return 0;
     857        9625 :                 if (!old)
     858             :                         return 1;
     859             :         }
     860           0 :         return !!item - !!old;
     861             : }
     862             : 
     863             : /**
     864             :  * __radix_tree_replace         - replace item in a slot
     865             :  * @root:               radix tree root
     866             :  * @node:               pointer to tree node
     867             :  * @slot:               pointer to slot in @node
     868             :  * @item:               new item to store in the slot.
     869             :  *
     870             :  * For use with __radix_tree_lookup().  Caller must hold tree write locked
     871             :  * across slot lookup and replacement.
     872             :  */
     873        9975 : void __radix_tree_replace(struct radix_tree_root *root,
     874             :                           struct radix_tree_node *node,
     875             :                           void __rcu **slot, void *item)
     876             : {
     877        9975 :         void *old = rcu_dereference_raw(*slot);
     878       19950 :         int values = !!xa_is_value(item) - !!xa_is_value(old);
     879        9975 :         int count = calculate_count(root, node, slot, item, old);
     880             : 
     881             :         /*
     882             :          * This function supports replacing value entries and
     883             :          * deleting entries, but that needs accounting against the
     884             :          * node unless the slot is root->xa_head.
     885             :          */
     886        9975 :         WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
     887             :                         (count || values));
     888        9975 :         replace_slot(slot, item, node, count, values);
     889             : 
     890        9975 :         if (!node)
     891             :                 return;
     892             : 
     893        9975 :         delete_node(root, node);
     894             : }
     895             : 
     896             : /**
     897             :  * radix_tree_replace_slot      - replace item in a slot
     898             :  * @root:       radix tree root
     899             :  * @slot:       pointer to slot
     900             :  * @item:       new item to store in the slot.
     901             :  *
     902             :  * For use with radix_tree_lookup_slot() and
     903             :  * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
     904             :  * across slot lookup and replacement.
     905             :  *
     906             :  * NOTE: This cannot be used to switch between non-entries (empty slots),
     907             :  * regular entries, and value entries, as that requires accounting
     908             :  * inside the radix tree node. When switching from one type of entry or
     909             :  * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
     910             :  * radix_tree_iter_replace().
     911             :  */
     912           0 : void radix_tree_replace_slot(struct radix_tree_root *root,
     913             :                              void __rcu **slot, void *item)
     914             : {
     915           0 :         __radix_tree_replace(root, NULL, slot, item);
     916           0 : }
     917             : EXPORT_SYMBOL(radix_tree_replace_slot);
     918             : 
     919             : /**
     920             :  * radix_tree_iter_replace - replace item in a slot
     921             :  * @root:       radix tree root
     922             :  * @iter:       iterator state
     923             :  * @slot:       pointer to slot
     924             :  * @item:       new item to store in the slot.
     925             :  *
     926             :  * For use with radix_tree_for_each_slot().
     927             :  * Caller must hold tree write locked.
     928             :  */
     929        9625 : void radix_tree_iter_replace(struct radix_tree_root *root,
     930             :                                 const struct radix_tree_iter *iter,
     931             :                                 void __rcu **slot, void *item)
     932             : {
     933        9625 :         __radix_tree_replace(root, iter->node, slot, item);
     934        9625 : }
     935             : 
     936        1397 : static void node_tag_set(struct radix_tree_root *root,
     937             :                                 struct radix_tree_node *node,
     938             :                                 unsigned int tag, unsigned int offset)
     939             : {
     940        4192 :         while (node) {
     941        2117 :                 if (tag_get(node, tag, offset))
     942             :                         return;
     943        2796 :                 tag_set(node, tag, offset);
     944        1398 :                 offset = node->offset;
     945        1398 :                 node = node->parent;
     946             :         }
     947             : 
     948        1356 :         if (!root_tag_get(root, tag))
     949           0 :                 root_tag_set(root, tag);
     950             : }
     951             : 
     952             : /**
     953             :  *      radix_tree_tag_set - set a tag on a radix tree node
     954             :  *      @root:          radix tree root
     955             :  *      @index:         index key
     956             :  *      @tag:           tag index
     957             :  *
     958             :  *      Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
     959             :  *      corresponding to @index in the radix tree.  From
     960             :  *      the root all the way down to the leaf node.
     961             :  *
     962             :  *      Returns the address of the tagged item.  Setting a tag on a not-present
     963             :  *      item is a bug.
     964             :  */
     965           0 : void *radix_tree_tag_set(struct radix_tree_root *root,
     966             :                         unsigned long index, unsigned int tag)
     967             : {
     968             :         struct radix_tree_node *node, *parent;
     969             :         unsigned long maxindex;
     970             : 
     971           0 :         radix_tree_load_root(root, &node, &maxindex);
     972           0 :         BUG_ON(index > maxindex);
     973             : 
     974           0 :         while (radix_tree_is_internal_node(node)) {
     975             :                 unsigned offset;
     976             : 
     977           0 :                 parent = entry_to_node(node);
     978           0 :                 offset = radix_tree_descend(parent, &node, index);
     979           0 :                 BUG_ON(!node);
     980             : 
     981           0 :                 if (!tag_get(parent, tag, offset))
     982           0 :                         tag_set(parent, tag, offset);
     983             :         }
     984             : 
     985             :         /* set the root's tag bit */
     986           0 :         if (!root_tag_get(root, tag))
     987           0 :                 root_tag_set(root, tag);
     988             : 
     989           0 :         return node;
     990             : }
     991             : EXPORT_SYMBOL(radix_tree_tag_set);
     992             : 
     993        9625 : static void node_tag_clear(struct radix_tree_root *root,
     994             :                                 struct radix_tree_node *node,
     995             :                                 unsigned int tag, unsigned int offset)
     996             : {
     997       19375 :         while (node) {
     998        9750 :                 if (!tag_get(node, tag, offset))
     999             :                         return;
    1000        9750 :                 tag_clear(node, tag, offset);
    1001        9750 :                 if (any_tag_set(node, tag))
    1002             :                         return;
    1003             : 
    1004         125 :                 offset = node->offset;
    1005         125 :                 node = node->parent;
    1006             :         }
    1007             : 
    1008             :         /* clear the root's tag bit */
    1009           0 :         if (root_tag_get(root, tag))
    1010           0 :                 root_tag_clear(root, tag);
    1011             : }
    1012             : 
    1013             : /**
    1014             :  *      radix_tree_tag_clear - clear a tag on a radix tree node
    1015             :  *      @root:          radix tree root
    1016             :  *      @index:         index key
    1017             :  *      @tag:           tag index
    1018             :  *
    1019             :  *      Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
    1020             :  *      corresponding to @index in the radix tree.  If this causes
    1021             :  *      the leaf node to have no tags set then clear the tag in the
    1022             :  *      next-to-leaf node, etc.
    1023             :  *
    1024             :  *      Returns the address of the tagged item on success, else NULL.  ie:
    1025             :  *      has the same return value and semantics as radix_tree_lookup().
    1026             :  */
    1027           0 : void *radix_tree_tag_clear(struct radix_tree_root *root,
    1028             :                         unsigned long index, unsigned int tag)
    1029             : {
    1030             :         struct radix_tree_node *node, *parent;
    1031             :         unsigned long maxindex;
    1032           0 :         int offset = 0;
    1033             : 
    1034           0 :         radix_tree_load_root(root, &node, &maxindex);
    1035           0 :         if (index > maxindex)
    1036             :                 return NULL;
    1037             : 
    1038             :         parent = NULL;
    1039             : 
    1040           0 :         while (radix_tree_is_internal_node(node)) {
    1041           0 :                 parent = entry_to_node(node);
    1042           0 :                 offset = radix_tree_descend(parent, &node, index);
    1043             :         }
    1044             : 
    1045           0 :         if (node)
    1046           0 :                 node_tag_clear(root, parent, tag, offset);
    1047             : 
    1048             :         return node;
    1049             : }
    1050             : EXPORT_SYMBOL(radix_tree_tag_clear);
    1051             : 
    1052             : /**
    1053             :   * radix_tree_iter_tag_clear - clear a tag on the current iterator entry
    1054             :   * @root: radix tree root
    1055             :   * @iter: iterator state
    1056             :   * @tag: tag to clear
    1057             :   */
    1058        9625 : void radix_tree_iter_tag_clear(struct radix_tree_root *root,
    1059             :                         const struct radix_tree_iter *iter, unsigned int tag)
    1060             : {
    1061       19250 :         node_tag_clear(root, iter->node, tag, iter_offset(iter));
    1062        9625 : }
    1063             : 
    1064             : /**
    1065             :  * radix_tree_tag_get - get a tag on a radix tree node
    1066             :  * @root:               radix tree root
    1067             :  * @index:              index key
    1068             :  * @tag:                tag index (< RADIX_TREE_MAX_TAGS)
    1069             :  *
    1070             :  * Return values:
    1071             :  *
    1072             :  *  0: tag not present or not set
    1073             :  *  1: tag set
    1074             :  *
    1075             :  * Note that the return value of this function may not be relied on, even if
    1076             :  * the RCU lock is held, unless tag modification and node deletion are excluded
    1077             :  * from concurrency.
    1078             :  */
    1079         350 : int radix_tree_tag_get(const struct radix_tree_root *root,
    1080             :                         unsigned long index, unsigned int tag)
    1081             : {
    1082             :         struct radix_tree_node *node, *parent;
    1083             :         unsigned long maxindex;
    1084             : 
    1085         700 :         if (!root_tag_get(root, tag))
    1086             :                 return 0;
    1087             : 
    1088         350 :         radix_tree_load_root(root, &node, &maxindex);
    1089         350 :         if (index > maxindex)
    1090             :                 return 0;
    1091             : 
    1092        1270 :         while (radix_tree_is_internal_node(node)) {
    1093             :                 unsigned offset;
    1094             : 
    1095        1270 :                 parent = entry_to_node(node);
    1096         635 :                 offset = radix_tree_descend(parent, &node, index);
    1097             : 
    1098         635 :                 if (!tag_get(parent, tag, offset))
    1099             :                         return 0;
    1100         285 :                 if (node == RADIX_TREE_RETRY)
    1101             :                         break;
    1102             :         }
    1103             : 
    1104             :         return 1;
    1105             : }
    1106             : EXPORT_SYMBOL(radix_tree_tag_get);
    1107             : 
    1108             : /* Construct iter->tags bit-mask from node->tags[tag] array */
    1109             : static void set_iter_tags(struct radix_tree_iter *iter,
    1110             :                                 struct radix_tree_node *node, unsigned offset,
    1111             :                                 unsigned tag)
    1112             : {
    1113        9625 :         unsigned tag_long = offset / BITS_PER_LONG;
    1114        9625 :         unsigned tag_bit  = offset % BITS_PER_LONG;
    1115             : 
    1116        9625 :         if (!node) {
    1117           0 :                 iter->tags = 1;
    1118             :                 return;
    1119             :         }
    1120             : 
    1121        9625 :         iter->tags = node->tags[tag][tag_long] >> tag_bit;
    1122             : 
    1123             :         /* This never happens if RADIX_TREE_TAG_LONGS == 1 */
    1124             :         if (tag_long < RADIX_TREE_TAG_LONGS - 1) {
    1125             :                 /* Pick tags from next element */
    1126             :                 if (tag_bit)
    1127             :                         iter->tags |= node->tags[tag][tag_long + 1] <<
    1128             :                                                 (BITS_PER_LONG - tag_bit);
    1129             :                 /* Clip chunk size, here only BITS_PER_LONG tags */
    1130             :                 iter->next_index = __radix_tree_iter_add(iter, BITS_PER_LONG);
    1131             :         }
    1132             : }
    1133             : 
    1134           0 : void __rcu **radix_tree_iter_resume(void __rcu **slot,
    1135             :                                         struct radix_tree_iter *iter)
    1136             : {
    1137           0 :         slot++;
    1138           0 :         iter->index = __radix_tree_iter_add(iter, 1);
    1139           0 :         iter->next_index = iter->index;
    1140           0 :         iter->tags = 0;
    1141           0 :         return NULL;
    1142             : }
    1143             : EXPORT_SYMBOL(radix_tree_iter_resume);
    1144             : 
    1145             : /**
    1146             :  * radix_tree_next_chunk - find next chunk of slots for iteration
    1147             :  *
    1148             :  * @root:       radix tree root
    1149             :  * @iter:       iterator state
    1150             :  * @flags:      RADIX_TREE_ITER_* flags and tag index
    1151             :  * Returns:     pointer to chunk first slot, or NULL if iteration is over
    1152             :  */
    1153           0 : void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
    1154             :                              struct radix_tree_iter *iter, unsigned flags)
    1155             : {
    1156           0 :         unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
    1157             :         struct radix_tree_node *node, *child;
    1158             :         unsigned long index, offset, maxindex;
    1159             : 
    1160           0 :         if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
    1161             :                 return NULL;
    1162             : 
    1163             :         /*
    1164             :          * Catch next_index overflow after ~0UL. iter->index never overflows
    1165             :          * during iterating; it can be zero only at the beginning.
    1166             :          * And we cannot overflow iter->next_index in a single step,
    1167             :          * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
    1168             :          *
    1169             :          * This condition also used by radix_tree_next_slot() to stop
    1170             :          * contiguous iterating, and forbid switching to the next chunk.
    1171             :          */
    1172           0 :         index = iter->next_index;
    1173           0 :         if (!index && iter->index)
    1174             :                 return NULL;
    1175             : 
    1176             :  restart:
    1177           0 :         radix_tree_load_root(root, &child, &maxindex);
    1178           0 :         if (index > maxindex)
    1179             :                 return NULL;
    1180           0 :         if (!child)
    1181             :                 return NULL;
    1182             : 
    1183           0 :         if (!radix_tree_is_internal_node(child)) {
    1184             :                 /* Single-slot tree */
    1185           0 :                 iter->index = index;
    1186           0 :                 iter->next_index = maxindex + 1;
    1187           0 :                 iter->tags = 1;
    1188           0 :                 iter->node = NULL;
    1189           0 :                 return (void __rcu **)&root->xa_head;
    1190             :         }
    1191             : 
    1192             :         do {
    1193           0 :                 node = entry_to_node(child);
    1194           0 :                 offset = radix_tree_descend(node, &child, index);
    1195             : 
    1196           0 :                 if ((flags & RADIX_TREE_ITER_TAGGED) ?
    1197           0 :                                 !tag_get(node, tag, offset) : !child) {
    1198             :                         /* Hole detected */
    1199           0 :                         if (flags & RADIX_TREE_ITER_CONTIG)
    1200             :                                 return NULL;
    1201             : 
    1202           0 :                         if (flags & RADIX_TREE_ITER_TAGGED)
    1203           0 :                                 offset = radix_tree_find_next_bit(node, tag,
    1204             :                                                 offset + 1);
    1205             :                         else
    1206           0 :                                 while (++offset < RADIX_TREE_MAP_SIZE) {
    1207           0 :                                         void *slot = rcu_dereference_raw(
    1208             :                                                         node->slots[offset]);
    1209           0 :                                         if (slot)
    1210             :                                                 break;
    1211             :                                 }
    1212           0 :                         index &= ~node_maxindex(node);
    1213           0 :                         index += offset << node->shift;
    1214             :                         /* Overflow after ~0UL */
    1215           0 :                         if (!index)
    1216             :                                 return NULL;
    1217           0 :                         if (offset == RADIX_TREE_MAP_SIZE)
    1218             :                                 goto restart;
    1219           0 :                         child = rcu_dereference_raw(node->slots[offset]);
    1220             :                 }
    1221             : 
    1222           0 :                 if (!child)
    1223             :                         goto restart;
    1224           0 :                 if (child == RADIX_TREE_RETRY)
    1225             :                         break;
    1226           0 :         } while (node->shift && radix_tree_is_internal_node(child));
    1227             : 
    1228             :         /* Update the iterator state */
    1229           0 :         iter->index = (index &~ node_maxindex(node)) | offset;
    1230           0 :         iter->next_index = (index | node_maxindex(node)) + 1;
    1231           0 :         iter->node = node;
    1232             : 
    1233           0 :         if (flags & RADIX_TREE_ITER_TAGGED)
    1234           0 :                 set_iter_tags(iter, node, offset, tag);
    1235             : 
    1236           0 :         return node->slots + offset;
    1237             : }
    1238             : EXPORT_SYMBOL(radix_tree_next_chunk);
    1239             : 
    1240             : /**
    1241             :  *      radix_tree_gang_lookup - perform multiple lookup on a radix tree
    1242             :  *      @root:          radix tree root
    1243             :  *      @results:       where the results of the lookup are placed
    1244             :  *      @first_index:   start the lookup from this key
    1245             :  *      @max_items:     place up to this many items at *results
    1246             :  *
    1247             :  *      Performs an index-ascending scan of the tree for present items.  Places
    1248             :  *      them at *@results and returns the number of items which were placed at
    1249             :  *      *@results.
    1250             :  *
    1251             :  *      The implementation is naive.
    1252             :  *
    1253             :  *      Like radix_tree_lookup, radix_tree_gang_lookup may be called under
    1254             :  *      rcu_read_lock. In this case, rather than the returned results being
    1255             :  *      an atomic snapshot of the tree at a single point in time, the
    1256             :  *      semantics of an RCU protected gang lookup are as though multiple
    1257             :  *      radix_tree_lookups have been issued in individual locks, and results
    1258             :  *      stored in 'results'.
    1259             :  */
    1260             : unsigned int
    1261           0 : radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
    1262             :                         unsigned long first_index, unsigned int max_items)
    1263             : {
    1264             :         struct radix_tree_iter iter;
    1265             :         void __rcu **slot;
    1266           0 :         unsigned int ret = 0;
    1267             : 
    1268           0 :         if (unlikely(!max_items))
    1269             :                 return 0;
    1270             : 
    1271           0 :         radix_tree_for_each_slot(slot, root, &iter, first_index) {
    1272           0 :                 results[ret] = rcu_dereference_raw(*slot);
    1273           0 :                 if (!results[ret])
    1274           0 :                         continue;
    1275           0 :                 if (radix_tree_is_internal_node(results[ret])) {
    1276           0 :                         slot = radix_tree_iter_retry(&iter);
    1277           0 :                         continue;
    1278             :                 }
    1279           0 :                 if (++ret == max_items)
    1280             :                         break;
    1281             :         }
    1282             : 
    1283             :         return ret;
    1284             : }
    1285             : EXPORT_SYMBOL(radix_tree_gang_lookup);
    1286             : 
    1287             : /**
    1288             :  *      radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
    1289             :  *                                   based on a tag
    1290             :  *      @root:          radix tree root
    1291             :  *      @results:       where the results of the lookup are placed
    1292             :  *      @first_index:   start the lookup from this key
    1293             :  *      @max_items:     place up to this many items at *results
    1294             :  *      @tag:           the tag index (< RADIX_TREE_MAX_TAGS)
    1295             :  *
    1296             :  *      Performs an index-ascending scan of the tree for present items which
    1297             :  *      have the tag indexed by @tag set.  Places the items at *@results and
    1298             :  *      returns the number of items which were placed at *@results.
    1299             :  */
    1300             : unsigned int
    1301           0 : radix_tree_gang_lookup_tag(const struct radix_tree_root *root, void **results,
    1302             :                 unsigned long first_index, unsigned int max_items,
    1303             :                 unsigned int tag)
    1304             : {
    1305             :         struct radix_tree_iter iter;
    1306             :         void __rcu **slot;
    1307           0 :         unsigned int ret = 0;
    1308             : 
    1309           0 :         if (unlikely(!max_items))
    1310             :                 return 0;
    1311             : 
    1312           0 :         radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
    1313           0 :                 results[ret] = rcu_dereference_raw(*slot);
    1314           0 :                 if (!results[ret])
    1315           0 :                         continue;
    1316           0 :                 if (radix_tree_is_internal_node(results[ret])) {
    1317           0 :                         slot = radix_tree_iter_retry(&iter);
    1318           0 :                         continue;
    1319             :                 }
    1320           0 :                 if (++ret == max_items)
    1321             :                         break;
    1322             :         }
    1323             : 
    1324             :         return ret;
    1325             : }
    1326             : EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
    1327             : 
    1328             : /**
    1329             :  *      radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
    1330             :  *                                        radix tree based on a tag
    1331             :  *      @root:          radix tree root
    1332             :  *      @results:       where the results of the lookup are placed
    1333             :  *      @first_index:   start the lookup from this key
    1334             :  *      @max_items:     place up to this many items at *results
    1335             :  *      @tag:           the tag index (< RADIX_TREE_MAX_TAGS)
    1336             :  *
    1337             :  *      Performs an index-ascending scan of the tree for present items which
    1338             :  *      have the tag indexed by @tag set.  Places the slots at *@results and
    1339             :  *      returns the number of slots which were placed at *@results.
    1340             :  */
    1341             : unsigned int
    1342           0 : radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
    1343             :                 void __rcu ***results, unsigned long first_index,
    1344             :                 unsigned int max_items, unsigned int tag)
    1345             : {
    1346             :         struct radix_tree_iter iter;
    1347             :         void __rcu **slot;
    1348           0 :         unsigned int ret = 0;
    1349             : 
    1350           0 :         if (unlikely(!max_items))
    1351             :                 return 0;
    1352             : 
    1353           0 :         radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
    1354           0 :                 results[ret] = slot;
    1355           0 :                 if (++ret == max_items)
    1356             :                         break;
    1357             :         }
    1358             : 
    1359             :         return ret;
    1360             : }
    1361             : EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
    1362             : 
    1363        1397 : static bool __radix_tree_delete(struct radix_tree_root *root,
    1364             :                                 struct radix_tree_node *node, void __rcu **slot)
    1365             : {
    1366        1397 :         void *old = rcu_dereference_raw(*slot);
    1367        1397 :         int values = xa_is_value(old) ? -1 : 0;
    1368        1397 :         unsigned offset = get_slot_offset(node, slot);
    1369             :         int tag;
    1370             : 
    1371        2794 :         if (is_idr(root))
    1372        1397 :                 node_tag_set(root, node, IDR_FREE, offset);
    1373             :         else
    1374           0 :                 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
    1375           0 :                         node_tag_clear(root, node, tag, offset);
    1376             : 
    1377        1397 :         replace_slot(slot, NULL, node, -1, values);
    1378        1397 :         return node && delete_node(root, node);
    1379             : }
    1380             : 
    1381             : /**
    1382             :  * radix_tree_iter_delete - delete the entry at this iterator position
    1383             :  * @root: radix tree root
    1384             :  * @iter: iterator state
    1385             :  * @slot: pointer to slot
    1386             :  *
    1387             :  * Delete the entry at the position currently pointed to by the iterator.
    1388             :  * This may result in the current node being freed; if it is, the iterator
    1389             :  * is advanced so that it will not reference the freed memory.  This
    1390             :  * function may be called without any locking if there are no other threads
    1391             :  * which can access this tree.
    1392             :  */
    1393           0 : void radix_tree_iter_delete(struct radix_tree_root *root,
    1394             :                                 struct radix_tree_iter *iter, void __rcu **slot)
    1395             : {
    1396           0 :         if (__radix_tree_delete(root, iter->node, slot))
    1397           0 :                 iter->index = iter->next_index;
    1398           0 : }
    1399             : EXPORT_SYMBOL(radix_tree_iter_delete);
    1400             : 
    1401             : /**
    1402             :  * radix_tree_delete_item - delete an item from a radix tree
    1403             :  * @root: radix tree root
    1404             :  * @index: index key
    1405             :  * @item: expected item
    1406             :  *
    1407             :  * Remove @item at @index from the radix tree rooted at @root.
    1408             :  *
    1409             :  * Return: the deleted entry, or %NULL if it was not present
    1410             :  * or the entry at the given @index was not @item.
    1411             :  */
    1412        1397 : void *radix_tree_delete_item(struct radix_tree_root *root,
    1413             :                              unsigned long index, void *item)
    1414             : {
    1415        1397 :         struct radix_tree_node *node = NULL;
    1416        1397 :         void __rcu **slot = NULL;
    1417             :         void *entry;
    1418             : 
    1419        1397 :         entry = __radix_tree_lookup(root, index, &node, &slot);
    1420        1397 :         if (!slot)
    1421             :                 return NULL;
    1422        1484 :         if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
    1423          58 :                                                 get_slot_offset(node, slot))))
    1424             :                 return NULL;
    1425             : 
    1426        1397 :         if (item && entry != item)
    1427             :                 return NULL;
    1428             : 
    1429        1397 :         __radix_tree_delete(root, node, slot);
    1430             : 
    1431        1397 :         return entry;
    1432             : }
    1433             : EXPORT_SYMBOL(radix_tree_delete_item);
    1434             : 
    1435             : /**
    1436             :  * radix_tree_delete - delete an entry from a radix tree
    1437             :  * @root: radix tree root
    1438             :  * @index: index key
    1439             :  *
    1440             :  * Remove the entry at @index from the radix tree rooted at @root.
    1441             :  *
    1442             :  * Return: The deleted entry, or %NULL if it was not present.
    1443             :  */
    1444           0 : void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
    1445             : {
    1446           0 :         return radix_tree_delete_item(root, index, NULL);
    1447             : }
    1448             : EXPORT_SYMBOL(radix_tree_delete);
    1449             : 
    1450             : /**
    1451             :  *      radix_tree_tagged - test whether any items in the tree are tagged
    1452             :  *      @root:          radix tree root
    1453             :  *      @tag:           tag to test
    1454             :  */
    1455           0 : int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
    1456             : {
    1457       19250 :         return root_tag_get(root, tag);
    1458             : }
    1459             : EXPORT_SYMBOL(radix_tree_tagged);
    1460             : 
    1461             : /**
    1462             :  * idr_preload - preload for idr_alloc()
    1463             :  * @gfp_mask: allocation mask to use for preloading
    1464             :  *
    1465             :  * Preallocate memory to use for the next call to idr_alloc().  This function
    1466             :  * returns with preemption disabled.  It will be enabled by idr_preload_end().
    1467             :  */
    1468        9009 : void idr_preload(gfp_t gfp_mask)
    1469             : {
    1470        9009 :         if (__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE))
    1471           0 :                 local_lock(&radix_tree_preloads.lock);
    1472        9009 : }
    1473             : EXPORT_SYMBOL(idr_preload);
    1474             : 
    1475        9625 : void __rcu **idr_get_free(struct radix_tree_root *root,
    1476             :                               struct radix_tree_iter *iter, gfp_t gfp,
    1477             :                               unsigned long max)
    1478             : {
    1479        9625 :         struct radix_tree_node *node = NULL, *child;
    1480        9625 :         void __rcu **slot = (void __rcu **)&root->xa_head;
    1481        9625 :         unsigned long maxindex, start = iter->next_index;
    1482        9625 :         unsigned int shift, offset = 0;
    1483             : 
    1484             :  grow:
    1485        9625 :         shift = radix_tree_load_root(root, &child, &maxindex);
    1486        9625 :         if (!radix_tree_tagged(root, IDR_FREE))
    1487           0 :                 start = max(start, maxindex + 1);
    1488        9625 :         if (start > max)
    1489             :                 return ERR_PTR(-ENOSPC);
    1490             : 
    1491        9625 :         if (start > maxindex) {
    1492         289 :                 int error = radix_tree_extend(root, gfp, start, shift);
    1493         289 :                 if (error < 0)
    1494           0 :                         return ERR_PTR(error);
    1495         289 :                 shift = error;
    1496         289 :                 child = rcu_dereference_raw(root->xa_head);
    1497             :         }
    1498        9625 :         if (start == 0 && shift == 0)
    1499          35 :                 shift = RADIX_TREE_MAP_SHIFT;
    1500             : 
    1501       32665 :         while (shift) {
    1502       23040 :                 shift -= RADIX_TREE_MAP_SHIFT;
    1503       23040 :                 if (child == NULL) {
    1504             :                         /* Have to add a child node.  */
    1505         476 :                         child = radix_tree_node_alloc(gfp, node, root, shift,
    1506             :                                                         offset, 0, 0);
    1507         476 :                         if (!child)
    1508             :                                 return ERR_PTR(-ENOMEM);
    1509         952 :                         all_tag_set(child, IDR_FREE);
    1510         952 :                         rcu_assign_pointer(*slot, node_to_entry(child));
    1511         476 :                         if (node)
    1512         439 :                                 node->count++;
    1513       45128 :                 } else if (!radix_tree_is_internal_node(child))
    1514             :                         break;
    1515             : 
    1516       46080 :                 node = entry_to_node(child);
    1517       23040 :                 offset = radix_tree_descend(node, &child, start);
    1518       23040 :                 if (!tag_get(node, IDR_FREE, offset)) {
    1519        1196 :                         offset = radix_tree_find_next_bit(node, IDR_FREE,
    1520         598 :                                                         offset + 1);
    1521        1196 :                         start = next_index(start, node, offset);
    1522         598 :                         if (start > max || start == 0)
    1523             :                                 return ERR_PTR(-ENOSPC);
    1524         598 :                         while (offset == RADIX_TREE_MAP_SIZE) {
    1525           0 :                                 offset = node->offset + 1;
    1526           0 :                                 node = node->parent;
    1527           0 :                                 if (!node)
    1528             :                                         goto grow;
    1529           0 :                                 shift = node->shift;
    1530             :                         }
    1531         598 :                         child = rcu_dereference_raw(node->slots[offset]);
    1532             :                 }
    1533       23040 :                 slot = &node->slots[offset];
    1534             :         }
    1535             : 
    1536        9625 :         iter->index = start;
    1537        9625 :         if (node)
    1538       19250 :                 iter->next_index = 1 + min(max, (start | node_maxindex(node)));
    1539             :         else
    1540           0 :                 iter->next_index = 1;
    1541        9625 :         iter->node = node;
    1542        9625 :         set_iter_tags(iter, node, offset, IDR_FREE);
    1543             : 
    1544             :         return slot;
    1545             : }
    1546             : 
    1547             : /**
    1548             :  * idr_destroy - release all internal memory from an IDR
    1549             :  * @idr: idr handle
    1550             :  *
    1551             :  * After this function is called, the IDR is empty, and may be reused or
    1552             :  * the data structure containing it may be freed.
    1553             :  *
    1554             :  * A typical clean-up sequence for objects stored in an idr tree will use
    1555             :  * idr_for_each() to free all objects, if necessary, then idr_destroy() to
    1556             :  * free the memory used to keep track of those objects.
    1557             :  */
    1558          34 : void idr_destroy(struct idr *idr)
    1559             : {
    1560          34 :         struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
    1561          34 :         if (radix_tree_is_internal_node(node))
    1562           0 :                 radix_tree_free_nodes(node);
    1563          34 :         idr->idr_rt.xa_head = NULL;
    1564          68 :         root_tag_set(&idr->idr_rt, IDR_FREE);
    1565          34 : }
    1566             : EXPORT_SYMBOL(idr_destroy);
    1567             : 
    1568             : static void
    1569         168 : radix_tree_node_ctor(void *arg)
    1570             : {
    1571         168 :         struct radix_tree_node *node = arg;
    1572             : 
    1573         168 :         memset(node, 0, sizeof(*node));
    1574         336 :         INIT_LIST_HEAD(&node->private_list);
    1575         168 : }
    1576             : 
    1577           0 : static int radix_tree_cpu_dead(unsigned int cpu)
    1578             : {
    1579             :         struct radix_tree_preload *rtp;
    1580             :         struct radix_tree_node *node;
    1581             : 
    1582             :         /* Free per-cpu pool of preloaded nodes */
    1583           0 :         rtp = &per_cpu(radix_tree_preloads, cpu);
    1584           0 :         while (rtp->nr) {
    1585           0 :                 node = rtp->nodes;
    1586           0 :                 rtp->nodes = node->parent;
    1587           0 :                 kmem_cache_free(radix_tree_node_cachep, node);
    1588           0 :                 rtp->nr--;
    1589             :         }
    1590           0 :         return 0;
    1591             : }
    1592             : 
    1593           1 : void __init radix_tree_init(void)
    1594             : {
    1595             :         int ret;
    1596             : 
    1597             :         BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
    1598             :         BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
    1599             :         BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
    1600           1 :         radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
    1601             :                         sizeof(struct radix_tree_node), 0,
    1602             :                         SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
    1603             :                         radix_tree_node_ctor);
    1604           1 :         ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
    1605             :                                         NULL, radix_tree_cpu_dead);
    1606           1 :         WARN_ON(ret < 0);
    1607           1 : }

Generated by: LCOV version 1.14