LCOV - code coverage report
Current view: top level - lib - group_cpus.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 5 0.0 %
Date: 2023-03-27 20:00:47 Functions: 0 1 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2016 Thomas Gleixner.
       4             :  * Copyright (C) 2016-2017 Christoph Hellwig.
       5             :  */
       6             : #include <linux/kernel.h>
       7             : #include <linux/slab.h>
       8             : #include <linux/cpu.h>
       9             : #include <linux/sort.h>
      10             : #include <linux/group_cpus.h>
      11             : 
      12             : #ifdef CONFIG_SMP
      13             : 
      14             : static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
      15             :                                 unsigned int cpus_per_grp)
      16             : {
      17             :         const struct cpumask *siblmsk;
      18             :         int cpu, sibl;
      19             : 
      20             :         for ( ; cpus_per_grp > 0; ) {
      21             :                 cpu = cpumask_first(nmsk);
      22             : 
      23             :                 /* Should not happen, but I'm too lazy to think about it */
      24             :                 if (cpu >= nr_cpu_ids)
      25             :                         return;
      26             : 
      27             :                 cpumask_clear_cpu(cpu, nmsk);
      28             :                 cpumask_set_cpu(cpu, irqmsk);
      29             :                 cpus_per_grp--;
      30             : 
      31             :                 /* If the cpu has siblings, use them first */
      32             :                 siblmsk = topology_sibling_cpumask(cpu);
      33             :                 for (sibl = -1; cpus_per_grp > 0; ) {
      34             :                         sibl = cpumask_next(sibl, siblmsk);
      35             :                         if (sibl >= nr_cpu_ids)
      36             :                                 break;
      37             :                         if (!cpumask_test_and_clear_cpu(sibl, nmsk))
      38             :                                 continue;
      39             :                         cpumask_set_cpu(sibl, irqmsk);
      40             :                         cpus_per_grp--;
      41             :                 }
      42             :         }
      43             : }
      44             : 
      45             : static cpumask_var_t *alloc_node_to_cpumask(void)
      46             : {
      47             :         cpumask_var_t *masks;
      48             :         int node;
      49             : 
      50             :         masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
      51             :         if (!masks)
      52             :                 return NULL;
      53             : 
      54             :         for (node = 0; node < nr_node_ids; node++) {
      55             :                 if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
      56             :                         goto out_unwind;
      57             :         }
      58             : 
      59             :         return masks;
      60             : 
      61             : out_unwind:
      62             :         while (--node >= 0)
      63             :                 free_cpumask_var(masks[node]);
      64             :         kfree(masks);
      65             :         return NULL;
      66             : }
      67             : 
      68             : static void free_node_to_cpumask(cpumask_var_t *masks)
      69             : {
      70             :         int node;
      71             : 
      72             :         for (node = 0; node < nr_node_ids; node++)
      73             :                 free_cpumask_var(masks[node]);
      74             :         kfree(masks);
      75             : }
      76             : 
      77             : static void build_node_to_cpumask(cpumask_var_t *masks)
      78             : {
      79             :         int cpu;
      80             : 
      81             :         for_each_possible_cpu(cpu)
      82             :                 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
      83             : }
      84             : 
      85             : static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
      86             :                                 const struct cpumask *mask, nodemask_t *nodemsk)
      87             : {
      88             :         int n, nodes = 0;
      89             : 
      90             :         /* Calculate the number of nodes in the supplied affinity mask */
      91             :         for_each_node(n) {
      92             :                 if (cpumask_intersects(mask, node_to_cpumask[n])) {
      93             :                         node_set(n, *nodemsk);
      94             :                         nodes++;
      95             :                 }
      96             :         }
      97             :         return nodes;
      98             : }
      99             : 
     100             : struct node_groups {
     101             :         unsigned id;
     102             : 
     103             :         union {
     104             :                 unsigned ngroups;
     105             :                 unsigned ncpus;
     106             :         };
     107             : };
     108             : 
     109             : static int ncpus_cmp_func(const void *l, const void *r)
     110             : {
     111             :         const struct node_groups *ln = l;
     112             :         const struct node_groups *rn = r;
     113             : 
     114             :         return ln->ncpus - rn->ncpus;
     115             : }
     116             : 
     117             : /*
     118             :  * Allocate group number for each node, so that for each node:
     119             :  *
     120             :  * 1) the allocated number is >= 1
     121             :  *
     122             :  * 2) the allocated number is <= active CPU number of this node
     123             :  *
     124             :  * The actual allocated total groups may be less than @numgrps when
     125             :  * active total CPU number is less than @numgrps.
     126             :  *
     127             :  * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
     128             :  * for each node.
     129             :  */
     130             : static void alloc_nodes_groups(unsigned int numgrps,
     131             :                                cpumask_var_t *node_to_cpumask,
     132             :                                const struct cpumask *cpu_mask,
     133             :                                const nodemask_t nodemsk,
     134             :                                struct cpumask *nmsk,
     135             :                                struct node_groups *node_groups)
     136             : {
     137             :         unsigned n, remaining_ncpus = 0;
     138             : 
     139             :         for (n = 0; n < nr_node_ids; n++) {
     140             :                 node_groups[n].id = n;
     141             :                 node_groups[n].ncpus = UINT_MAX;
     142             :         }
     143             : 
     144             :         for_each_node_mask(n, nodemsk) {
     145             :                 unsigned ncpus;
     146             : 
     147             :                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
     148             :                 ncpus = cpumask_weight(nmsk);
     149             : 
     150             :                 if (!ncpus)
     151             :                         continue;
     152             :                 remaining_ncpus += ncpus;
     153             :                 node_groups[n].ncpus = ncpus;
     154             :         }
     155             : 
     156             :         numgrps = min_t(unsigned, remaining_ncpus, numgrps);
     157             : 
     158             :         sort(node_groups, nr_node_ids, sizeof(node_groups[0]),
     159             :              ncpus_cmp_func, NULL);
     160             : 
     161             :         /*
     162             :          * Allocate groups for each node according to the ratio of this
     163             :          * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is
     164             :          * bigger than number of active numa nodes. Always start the
     165             :          * allocation from the node with minimized nr_cpus.
     166             :          *
     167             :          * This way guarantees that each active node gets allocated at
     168             :          * least one group, and the theory is simple: over-allocation
     169             :          * is only done when this node is assigned by one group, so
     170             :          * other nodes will be allocated >= 1 groups, since 'numgrps' is
     171             :          * bigger than number of numa nodes.
     172             :          *
     173             :          * One perfect invariant is that number of allocated groups for
     174             :          * each node is <= CPU count of this node:
     175             :          *
     176             :          * 1) suppose there are two nodes: A and B
     177             :          *      ncpu(X) is CPU count of node X
     178             :          *      grps(X) is the group count allocated to node X via this
     179             :          *      algorithm
     180             :          *
     181             :          *      ncpu(A) <= ncpu(B)
     182             :          *      ncpu(A) + ncpu(B) = N
     183             :          *      grps(A) + grps(B) = G
     184             :          *
     185             :          *      grps(A) = max(1, round_down(G * ncpu(A) / N))
     186             :          *      grps(B) = G - grps(A)
     187             :          *
     188             :          *      both N and G are integer, and 2 <= G <= N, suppose
     189             :          *      G = N - delta, and 0 <= delta <= N - 2
     190             :          *
     191             :          * 2) obviously grps(A) <= ncpu(A) because:
     192             :          *
     193             :          *      if grps(A) is 1, then grps(A) <= ncpu(A) given
     194             :          *      ncpu(A) >= 1
     195             :          *
     196             :          *      otherwise,
     197             :          *              grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N
     198             :          *
     199             :          * 3) prove how grps(B) <= ncpu(B):
     200             :          *
     201             :          *      if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be
     202             :          *      over-allocated, so grps(B) <= ncpu(B),
     203             :          *
     204             :          *      otherwise:
     205             :          *
     206             :          *      grps(A) =
     207             :          *              round_down(G * ncpu(A) / N) =
     208             :          *              round_down((N - delta) * ncpu(A) / N) =
     209             :          *              round_down((N * ncpu(A) - delta * ncpu(A)) / N)  >=
     210             :          *              round_down((N * ncpu(A) - delta * N) / N)        =
     211             :          *              cpu(A) - delta
     212             :          *
     213             :          *      then:
     214             :          *
     215             :          *      grps(A) - G >= ncpu(A) - delta - G
     216             :          *      =>
     217             :          *      G - grps(A) <= G + delta - ncpu(A)
     218             :          *      =>
     219             :          *      grps(B) <= N - ncpu(A)
     220             :          *      =>
     221             :          *      grps(B) <= cpu(B)
     222             :          *
     223             :          * For nodes >= 3, it can be thought as one node and another big
     224             :          * node given that is exactly what this algorithm is implemented,
     225             :          * and we always re-calculate 'remaining_ncpus' & 'numgrps', and
     226             :          * finally for each node X: grps(X) <= ncpu(X).
     227             :          *
     228             :          */
     229             :         for (n = 0; n < nr_node_ids; n++) {
     230             :                 unsigned ngroups, ncpus;
     231             : 
     232             :                 if (node_groups[n].ncpus == UINT_MAX)
     233             :                         continue;
     234             : 
     235             :                 WARN_ON_ONCE(numgrps == 0);
     236             : 
     237             :                 ncpus = node_groups[n].ncpus;
     238             :                 ngroups = max_t(unsigned, 1,
     239             :                                  numgrps * ncpus / remaining_ncpus);
     240             :                 WARN_ON_ONCE(ngroups > ncpus);
     241             : 
     242             :                 node_groups[n].ngroups = ngroups;
     243             : 
     244             :                 remaining_ncpus -= ncpus;
     245             :                 numgrps -= ngroups;
     246             :         }
     247             : }
     248             : 
     249             : static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps,
     250             :                                cpumask_var_t *node_to_cpumask,
     251             :                                const struct cpumask *cpu_mask,
     252             :                                struct cpumask *nmsk, struct cpumask *masks)
     253             : {
     254             :         unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0;
     255             :         unsigned int last_grp = numgrps;
     256             :         unsigned int curgrp = startgrp;
     257             :         nodemask_t nodemsk = NODE_MASK_NONE;
     258             :         struct node_groups *node_groups;
     259             : 
     260             :         if (cpumask_empty(cpu_mask))
     261             :                 return 0;
     262             : 
     263             :         nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
     264             : 
     265             :         /*
     266             :          * If the number of nodes in the mask is greater than or equal the
     267             :          * number of groups we just spread the groups across the nodes.
     268             :          */
     269             :         if (numgrps <= nodes) {
     270             :                 for_each_node_mask(n, nodemsk) {
     271             :                         /* Ensure that only CPUs which are in both masks are set */
     272             :                         cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
     273             :                         cpumask_or(&masks[curgrp], &masks[curgrp], nmsk);
     274             :                         if (++curgrp == last_grp)
     275             :                                 curgrp = 0;
     276             :                 }
     277             :                 return numgrps;
     278             :         }
     279             : 
     280             :         node_groups = kcalloc(nr_node_ids,
     281             :                                sizeof(struct node_groups),
     282             :                                GFP_KERNEL);
     283             :         if (!node_groups)
     284             :                 return -ENOMEM;
     285             : 
     286             :         /* allocate group number for each node */
     287             :         alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask,
     288             :                            nodemsk, nmsk, node_groups);
     289             :         for (i = 0; i < nr_node_ids; i++) {
     290             :                 unsigned int ncpus, v;
     291             :                 struct node_groups *nv = &node_groups[i];
     292             : 
     293             :                 if (nv->ngroups == UINT_MAX)
     294             :                         continue;
     295             : 
     296             :                 /* Get the cpus on this node which are in the mask */
     297             :                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
     298             :                 ncpus = cpumask_weight(nmsk);
     299             :                 if (!ncpus)
     300             :                         continue;
     301             : 
     302             :                 WARN_ON_ONCE(nv->ngroups > ncpus);
     303             : 
     304             :                 /* Account for rounding errors */
     305             :                 extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups);
     306             : 
     307             :                 /* Spread allocated groups on CPUs of the current node */
     308             :                 for (v = 0; v < nv->ngroups; v++, curgrp++) {
     309             :                         cpus_per_grp = ncpus / nv->ngroups;
     310             : 
     311             :                         /* Account for extra groups to compensate rounding errors */
     312             :                         if (extra_grps) {
     313             :                                 cpus_per_grp++;
     314             :                                 --extra_grps;
     315             :                         }
     316             : 
     317             :                         /*
     318             :                          * wrapping has to be considered given 'startgrp'
     319             :                          * may start anywhere
     320             :                          */
     321             :                         if (curgrp >= last_grp)
     322             :                                 curgrp = 0;
     323             :                         grp_spread_init_one(&masks[curgrp], nmsk,
     324             :                                                 cpus_per_grp);
     325             :                 }
     326             :                 done += nv->ngroups;
     327             :         }
     328             :         kfree(node_groups);
     329             :         return done;
     330             : }
     331             : 
     332             : /**
     333             :  * group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality
     334             :  * @numgrps: number of groups
     335             :  *
     336             :  * Return: cpumask array if successful, NULL otherwise. And each element
     337             :  * includes CPUs assigned to this group
     338             :  *
     339             :  * Try to put close CPUs from viewpoint of CPU and NUMA locality into
     340             :  * same group, and run two-stage grouping:
     341             :  *      1) allocate present CPUs on these groups evenly first
     342             :  *      2) allocate other possible CPUs on these groups evenly
     343             :  *
     344             :  * We guarantee in the resulted grouping that all CPUs are covered, and
     345             :  * no same CPU is assigned to multiple groups
     346             :  */
     347             : struct cpumask *group_cpus_evenly(unsigned int numgrps)
     348             : {
     349             :         unsigned int curgrp = 0, nr_present = 0, nr_others = 0;
     350             :         cpumask_var_t *node_to_cpumask;
     351             :         cpumask_var_t nmsk, npresmsk;
     352             :         int ret = -ENOMEM;
     353             :         struct cpumask *masks = NULL;
     354             : 
     355             :         if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
     356             :                 return NULL;
     357             : 
     358             :         if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
     359             :                 goto fail_nmsk;
     360             : 
     361             :         node_to_cpumask = alloc_node_to_cpumask();
     362             :         if (!node_to_cpumask)
     363             :                 goto fail_npresmsk;
     364             : 
     365             :         masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
     366             :         if (!masks)
     367             :                 goto fail_node_to_cpumask;
     368             : 
     369             :         /* Stabilize the cpumasks */
     370             :         cpus_read_lock();
     371             :         build_node_to_cpumask(node_to_cpumask);
     372             : 
     373             :         /* grouping present CPUs first */
     374             :         ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
     375             :                                   cpu_present_mask, nmsk, masks);
     376             :         if (ret < 0)
     377             :                 goto fail_build_affinity;
     378             :         nr_present = ret;
     379             : 
     380             :         /*
     381             :          * Allocate non present CPUs starting from the next group to be
     382             :          * handled. If the grouping of present CPUs already exhausted the
     383             :          * group space, assign the non present CPUs to the already
     384             :          * allocated out groups.
     385             :          */
     386             :         if (nr_present >= numgrps)
     387             :                 curgrp = 0;
     388             :         else
     389             :                 curgrp = nr_present;
     390             :         cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
     391             :         ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask,
     392             :                                   npresmsk, nmsk, masks);
     393             :         if (ret >= 0)
     394             :                 nr_others = ret;
     395             : 
     396             :  fail_build_affinity:
     397             :         cpus_read_unlock();
     398             : 
     399             :         if (ret >= 0)
     400             :                 WARN_ON(nr_present + nr_others < numgrps);
     401             : 
     402             :  fail_node_to_cpumask:
     403             :         free_node_to_cpumask(node_to_cpumask);
     404             : 
     405             :  fail_npresmsk:
     406             :         free_cpumask_var(npresmsk);
     407             : 
     408             :  fail_nmsk:
     409             :         free_cpumask_var(nmsk);
     410             :         if (ret < 0) {
     411             :                 kfree(masks);
     412             :                 return NULL;
     413             :         }
     414             :         return masks;
     415             : }
     416             : #else /* CONFIG_SMP */
     417           0 : struct cpumask *group_cpus_evenly(unsigned int numgrps)
     418             : {
     419           0 :         struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL);
     420             : 
     421           0 :         if (!masks)
     422             :                 return NULL;
     423             : 
     424             :         /* assign all CPUs(cpu 0) to the 1st group only */
     425           0 :         cpumask_copy(&masks[0], cpu_possible_mask);
     426           0 :         return masks;
     427             : }
     428             : #endif /* CONFIG_SMP */

Generated by: LCOV version 1.14