Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only 2 : 3 : /* 4 : * rcuref - A scalable reference count implementation for RCU managed objects 5 : * 6 : * rcuref is provided to replace open coded reference count implementations 7 : * based on atomic_t. It protects explicitely RCU managed objects which can 8 : * be visible even after the last reference has been dropped and the object 9 : * is heading towards destruction. 10 : * 11 : * A common usage pattern is: 12 : * 13 : * get() 14 : * rcu_read_lock(); 15 : * p = get_ptr(); 16 : * if (p && !atomic_inc_not_zero(&p->refcnt)) 17 : * p = NULL; 18 : * rcu_read_unlock(); 19 : * return p; 20 : * 21 : * put() 22 : * if (!atomic_dec_return(&->refcnt)) { 23 : * remove_ptr(p); 24 : * kfree_rcu((p, rcu); 25 : * } 26 : * 27 : * atomic_inc_not_zero() is implemented with a try_cmpxchg() loop which has 28 : * O(N^2) behaviour under contention with N concurrent operations. 29 : * 30 : * rcuref uses atomic_add_negative_relaxed() for the fast path, which scales 31 : * better under contention. 32 : * 33 : * Why not refcount? 34 : * ================= 35 : * 36 : * In principle it should be possible to make refcount use the rcuref 37 : * scheme, but the destruction race described below cannot be prevented 38 : * unless the protected object is RCU managed. 39 : * 40 : * Theory of operation 41 : * =================== 42 : * 43 : * rcuref uses an unsigned integer reference counter. As long as the 44 : * counter value is greater than or equal to RCUREF_ONEREF and not larger 45 : * than RCUREF_MAXREF the reference is alive: 46 : * 47 : * ONEREF MAXREF SATURATED RELEASED DEAD NOREF 48 : * 0 0x7FFFFFFF 0x8000000 0xA0000000 0xBFFFFFFF 0xC0000000 0xE0000000 0xFFFFFFFF 49 : * <---valid --------> <-------saturation zone-------> <-----dead zone-----> 50 : * 51 : * The get() and put() operations do unconditional increments and 52 : * decrements. The result is checked after the operation. This optimizes 53 : * for the fast path. 54 : * 55 : * If the reference count is saturated or dead, then the increments and 56 : * decrements are not harmful as the reference count still stays in the 57 : * respective zones and is always set back to STATURATED resp. DEAD. The 58 : * zones have room for 2^28 racing operations in each direction, which 59 : * makes it practically impossible to escape the zones. 60 : * 61 : * Once the last reference is dropped the reference count becomes 62 : * RCUREF_NOREF which forces rcuref_put() into the slowpath operation. The 63 : * slowpath then tries to set the reference count from RCUREF_NOREF to 64 : * RCUREF_DEAD via a cmpxchg(). This opens a small window where a 65 : * concurrent rcuref_get() can acquire the reference count and bring it 66 : * back to RCUREF_ONEREF or even drop the reference again and mark it DEAD. 67 : * 68 : * If the cmpxchg() succeeds then a concurrent rcuref_get() will result in 69 : * DEAD + 1, which is inside the dead zone. If that happens the reference 70 : * count is put back to DEAD. 71 : * 72 : * The actual race is possible due to the unconditional increment and 73 : * decrements in rcuref_get() and rcuref_put(): 74 : * 75 : * T1 T2 76 : * get() put() 77 : * if (atomic_add_negative(-1, &ref->refcnt)) 78 : * succeeds-> atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); 79 : * 80 : * atomic_add_negative(1, &ref->refcnt); <- Elevates refcount to DEAD + 1 81 : * 82 : * As the result of T1's add is negative, the get() goes into the slow path 83 : * and observes refcnt being in the dead zone which makes the operation fail. 84 : * 85 : * Possible critical states: 86 : * 87 : * Context Counter References Operation 88 : * T1 0 1 init() 89 : * T2 1 2 get() 90 : * T1 0 1 put() 91 : * T2 -1 0 put() tries to mark dead 92 : * T1 0 1 get() 93 : * T2 0 1 put() mark dead fails 94 : * T1 -1 0 put() tries to mark dead 95 : * T1 DEAD 0 put() mark dead succeeds 96 : * T2 DEAD+1 0 get() fails and puts it back to DEAD 97 : * 98 : * Of course there are more complex scenarios, but the above illustrates 99 : * the working principle. The rest is left to the imagination of the 100 : * reader. 101 : * 102 : * Deconstruction race 103 : * =================== 104 : * 105 : * The release operation must be protected by prohibiting a grace period in 106 : * order to prevent a possible use after free: 107 : * 108 : * T1 T2 109 : * put() get() 110 : * // ref->refcnt = ONEREF 111 : * if (!atomic_add_negative(-1, &ref->refcnt)) 112 : * return false; <- Not taken 113 : * 114 : * // ref->refcnt == NOREF 115 : * --> preemption 116 : * // Elevates ref->refcnt to ONEREF 117 : * if (!atomic_add_negative(1, &ref->refcnt)) 118 : * return true; <- taken 119 : * 120 : * if (put(&p->ref)) { <-- Succeeds 121 : * remove_pointer(p); 122 : * kfree_rcu(p, rcu); 123 : * } 124 : * 125 : * RCU grace period ends, object is freed 126 : * 127 : * atomic_cmpxchg(&ref->refcnt, NOREF, DEAD); <- UAF 128 : * 129 : * This is prevented by disabling preemption around the put() operation as 130 : * that's in most kernel configurations cheaper than a rcu_read_lock() / 131 : * rcu_read_unlock() pair and in many cases even a NOOP. In any case it 132 : * prevents the grace period which keeps the object alive until all put() 133 : * operations complete. 134 : * 135 : * Saturation protection 136 : * ===================== 137 : * 138 : * The reference count has a saturation limit RCUREF_MAXREF (INT_MAX). 139 : * Once this is exceedded the reference count becomes stale by setting it 140 : * to RCUREF_SATURATED, which will cause a memory leak, but it prevents 141 : * wrap arounds which obviously cause worse problems than a memory 142 : * leak. When saturation is reached a warning is emitted. 143 : * 144 : * Race conditions 145 : * =============== 146 : * 147 : * All reference count increment/decrement operations are unconditional and 148 : * only verified after the fact. This optimizes for the good case and takes 149 : * the occasional race vs. a dead or already saturated refcount into 150 : * account. The saturation and dead zones are large enough to accomodate 151 : * for that. 152 : * 153 : * Memory ordering 154 : * =============== 155 : * 156 : * Memory ordering rules are slightly relaxed wrt regular atomic_t functions 157 : * and provide only what is strictly required for refcounts. 158 : * 159 : * The increments are fully relaxed; these will not provide ordering. The 160 : * rationale is that whatever is used to obtain the object to increase the 161 : * reference count on will provide the ordering. For locked data 162 : * structures, its the lock acquire, for RCU/lockless data structures its 163 : * the dependent load. 164 : * 165 : * rcuref_get() provides a control dependency ordering future stores which 166 : * ensures that the object is not modified when acquiring a reference 167 : * fails. 168 : * 169 : * rcuref_put() provides release order, i.e. all prior loads and stores 170 : * will be issued before. It also provides a control dependency ordering 171 : * against the subsequent destruction of the object. 172 : * 173 : * If rcuref_put() successfully dropped the last reference and marked the 174 : * object DEAD it also provides acquire ordering. 175 : */ 176 : 177 : #include <linux/export.h> 178 : #include <linux/rcuref.h> 179 : 180 : /** 181 : * rcuref_get_slowpath - Slowpath of rcuref_get() 182 : * @ref: Pointer to the reference count 183 : * 184 : * Invoked when the reference count is outside of the valid zone. 185 : * 186 : * Return: 187 : * False if the reference count was already marked dead 188 : * 189 : * True if the reference count is saturated, which prevents the 190 : * object from being deconstructed ever. 191 : */ 192 0 : bool rcuref_get_slowpath(rcuref_t *ref) 193 : { 194 0 : unsigned int cnt = atomic_read(&ref->refcnt); 195 : 196 : /* 197 : * If the reference count was already marked dead, undo the 198 : * increment so it stays in the middle of the dead zone and return 199 : * fail. 200 : */ 201 0 : if (cnt >= RCUREF_RELEASED) { 202 0 : atomic_set(&ref->refcnt, RCUREF_DEAD); 203 0 : return false; 204 : } 205 : 206 : /* 207 : * If it was saturated, warn and mark it so. In case the increment 208 : * was already on a saturated value restore the saturation 209 : * marker. This keeps it in the middle of the saturation zone and 210 : * prevents the reference count from overflowing. This leaks the 211 : * object memory, but prevents the obvious reference count overflow 212 : * damage. 213 : */ 214 0 : if (WARN_ONCE(cnt > RCUREF_MAXREF, "rcuref saturated - leaking memory")) 215 0 : atomic_set(&ref->refcnt, RCUREF_SATURATED); 216 : return true; 217 : } 218 : EXPORT_SYMBOL_GPL(rcuref_get_slowpath); 219 : 220 : /** 221 : * rcuref_put_slowpath - Slowpath of __rcuref_put() 222 : * @ref: Pointer to the reference count 223 : * 224 : * Invoked when the reference count is outside of the valid zone. 225 : * 226 : * Return: 227 : * True if this was the last reference with no future references 228 : * possible. This signals the caller that it can safely schedule the 229 : * object, which is protected by the reference counter, for 230 : * deconstruction. 231 : * 232 : * False if there are still active references or the put() raced 233 : * with a concurrent get()/put() pair. Caller is not allowed to 234 : * deconstruct the protected object. 235 : */ 236 0 : bool rcuref_put_slowpath(rcuref_t *ref) 237 : { 238 0 : unsigned int cnt = atomic_read(&ref->refcnt); 239 : 240 : /* Did this drop the last reference? */ 241 0 : if (likely(cnt == RCUREF_NOREF)) { 242 : /* 243 : * Carefully try to set the reference count to RCUREF_DEAD. 244 : * 245 : * This can fail if a concurrent get() operation has 246 : * elevated it again or the corresponding put() even marked 247 : * it dead already. Both are valid situations and do not 248 : * require a retry. If this fails the caller is not 249 : * allowed to deconstruct the object. 250 : */ 251 0 : if (atomic_cmpxchg_release(&ref->refcnt, RCUREF_NOREF, RCUREF_DEAD) != RCUREF_NOREF) 252 : return false; 253 : 254 : /* 255 : * The caller can safely schedule the object for 256 : * deconstruction. Provide acquire ordering. 257 : */ 258 0 : smp_acquire__after_ctrl_dep(); 259 0 : return true; 260 : } 261 : 262 : /* 263 : * If the reference count was already in the dead zone, then this 264 : * put() operation is imbalanced. Warn, put the reference count back to 265 : * DEAD and tell the caller to not deconstruct the object. 266 : */ 267 0 : if (WARN_ONCE(cnt >= RCUREF_RELEASED, "rcuref - imbalanced put()")) { 268 0 : atomic_set(&ref->refcnt, RCUREF_DEAD); 269 0 : return false; 270 : } 271 : 272 : /* 273 : * This is a put() operation on a saturated refcount. Restore the 274 : * mean saturation value and tell the caller to not deconstruct the 275 : * object. 276 : */ 277 0 : if (cnt > RCUREF_MAXREF) 278 0 : atomic_set(&ref->refcnt, RCUREF_SATURATED); 279 : return false; 280 : } 281 : EXPORT_SYMBOL_GPL(rcuref_put_slowpath);