LCOV - code coverage report
Current view: top level - include/linux - reciprocal_div.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 2 0.0 %
Date: 2023-04-06 08:38:28 Functions: 0 0 -

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0 */
       2             : #ifndef _LINUX_RECIPROCAL_DIV_H
       3             : #define _LINUX_RECIPROCAL_DIV_H
       4             : 
       5             : #include <linux/types.h>
       6             : 
       7             : /*
       8             :  * This algorithm is based on the paper "Division by Invariant
       9             :  * Integers Using Multiplication" by Torbjörn Granlund and Peter
      10             :  * L. Montgomery.
      11             :  *
      12             :  * The assembler implementation from Agner Fog, which this code is
      13             :  * based on, can be found here:
      14             :  * http://www.agner.org/optimize/asmlib.zip
      15             :  *
      16             :  * This optimization for A/B is helpful if the divisor B is mostly
      17             :  * runtime invariant. The reciprocal of B is calculated in the
      18             :  * slow-path with reciprocal_value(). The fast-path can then just use
      19             :  * a much faster multiplication operation with a variable dividend A
      20             :  * to calculate the division A/B.
      21             :  */
      22             : 
      23             : struct reciprocal_value {
      24             :         u32 m;
      25             :         u8 sh1, sh2;
      26             : };
      27             : 
      28             : /* "reciprocal_value" and "reciprocal_divide" together implement the basic
      29             :  * version of the algorithm described in Figure 4.1 of the paper.
      30             :  */
      31             : struct reciprocal_value reciprocal_value(u32 d);
      32             : 
      33             : static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
      34             : {
      35           0 :         u32 t = (u32)(((u64)a * R.m) >> 32);
      36           0 :         return (t + ((a - t) >> R.sh1)) >> R.sh2;
      37             : }
      38             : 
      39             : struct reciprocal_value_adv {
      40             :         u32 m;
      41             :         u8 sh, exp;
      42             :         bool is_wide_m;
      43             : };
      44             : 
      45             : /* "reciprocal_value_adv" implements the advanced version of the algorithm
      46             :  * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose
      47             :  * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The
      48             :  * exception case could be easily handled before calling "reciprocal_value_adv".
      49             :  *
      50             :  * The advanced version requires more complex calculation to get the reciprocal
      51             :  * multiplier and other control variables, but then could reduce the required
      52             :  * emulation operations.
      53             :  *
      54             :  * It makes no sense to use this advanced version for host divide emulation,
      55             :  * those extra complexities for calculating multiplier etc could completely
      56             :  * waive our saving on emulation operations.
      57             :  *
      58             :  * However, it makes sense to use it for JIT divide code generation for which
      59             :  * we are willing to trade performance of JITed code with that of host. As shown
      60             :  * by the following pseudo code, the required emulation operations could go down
      61             :  * from 6 (the basic version) to 3 or 4.
      62             :  *
      63             :  * To use the result of "reciprocal_value_adv", suppose we want to calculate
      64             :  * n/d, the pseudo C code will be:
      65             :  *
      66             :  *   struct reciprocal_value_adv rvalue;
      67             :  *   u8 pre_shift, exp;
      68             :  *
      69             :  *   // handle exception case.
      70             :  *   if (d >= (1U << 31)) {
      71             :  *     result = n >= d;
      72             :  *     return;
      73             :  *   }
      74             :  *
      75             :  *   rvalue = reciprocal_value_adv(d, 32)
      76             :  *   exp = rvalue.exp;
      77             :  *   if (rvalue.is_wide_m && !(d & 1)) {
      78             :  *     // floor(log2(d & (2^32 -d)))
      79             :  *     pre_shift = fls(d & -d) - 1;
      80             :  *     rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift);
      81             :  *   } else {
      82             :  *     pre_shift = 0;
      83             :  *   }
      84             :  *
      85             :  *   // code generation starts.
      86             :  *   if (imm == 1U << exp) {
      87             :  *     result = n >> exp;
      88             :  *   } else if (rvalue.is_wide_m) {
      89             :  *     // pre_shift must be zero when reached here.
      90             :  *     t = (n * rvalue.m) >> 32;
      91             :  *     result = n - t;
      92             :  *     result >>= 1;
      93             :  *     result += t;
      94             :  *     result >>= rvalue.sh - 1;
      95             :  *   } else {
      96             :  *     if (pre_shift)
      97             :  *       result = n >> pre_shift;
      98             :  *     result = ((u64)result * rvalue.m) >> 32;
      99             :  *     result >>= rvalue.sh;
     100             :  *   }
     101             :  */
     102             : struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec);
     103             : 
     104             : #endif /* _LINUX_RECIPROCAL_DIV_H */

Generated by: LCOV version 1.14