Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Bad block management
4 : *
5 : * - Heavily based on MD badblocks code from Neil Brown
6 : *
7 : * Copyright (c) 2015, Intel Corporation.
8 : */
9 :
10 : #include <linux/badblocks.h>
11 : #include <linux/seqlock.h>
12 : #include <linux/device.h>
13 : #include <linux/kernel.h>
14 : #include <linux/module.h>
15 : #include <linux/stddef.h>
16 : #include <linux/types.h>
17 : #include <linux/slab.h>
18 :
19 : /**
20 : * badblocks_check() - check a given range for bad sectors
21 : * @bb: the badblocks structure that holds all badblock information
22 : * @s: sector (start) at which to check for badblocks
23 : * @sectors: number of sectors to check for badblocks
24 : * @first_bad: pointer to store location of the first badblock
25 : * @bad_sectors: pointer to store number of badblocks after @first_bad
26 : *
27 : * We can record which blocks on each device are 'bad' and so just
28 : * fail those blocks, or that stripe, rather than the whole device.
29 : * Entries in the bad-block table are 64bits wide. This comprises:
30 : * Length of bad-range, in sectors: 0-511 for lengths 1-512
31 : * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
32 : * A 'shift' can be set so that larger blocks are tracked and
33 : * consequently larger devices can be covered.
34 : * 'Acknowledged' flag - 1 bit. - the most significant bit.
35 : *
36 : * Locking of the bad-block table uses a seqlock so badblocks_check
37 : * might need to retry if it is very unlucky.
38 : * We will sometimes want to check for bad blocks in a bi_end_io function,
39 : * so we use the write_seqlock_irq variant.
40 : *
41 : * When looking for a bad block we specify a range and want to
42 : * know if any block in the range is bad. So we binary-search
43 : * to the last range that starts at-or-before the given endpoint,
44 : * (or "before the sector after the target range")
45 : * then see if it ends after the given start.
46 : *
47 : * Return:
48 : * 0: there are no known bad blocks in the range
49 : * 1: there are known bad block which are all acknowledged
50 : * -1: there are bad blocks which have not yet been acknowledged in metadata.
51 : * plus the start/length of the first bad section we overlap.
52 : */
53 0 : int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
54 : sector_t *first_bad, int *bad_sectors)
55 : {
56 : int hi;
57 : int lo;
58 0 : u64 *p = bb->page;
59 : int rv;
60 0 : sector_t target = s + sectors;
61 : unsigned seq;
62 :
63 0 : if (bb->shift > 0) {
64 : /* round the start down, and the end up */
65 0 : s >>= bb->shift;
66 0 : target += (1<<bb->shift) - 1;
67 0 : target >>= bb->shift;
68 : }
69 : /* 'target' is now the first block after the bad range */
70 :
71 : retry:
72 0 : seq = read_seqbegin(&bb->lock);
73 0 : lo = 0;
74 0 : rv = 0;
75 0 : hi = bb->count;
76 :
77 : /* Binary search between lo and hi for 'target'
78 : * i.e. for the last range that starts before 'target'
79 : */
80 : /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
81 : * are known not to be the last range before target.
82 : * VARIANT: hi-lo is the number of possible
83 : * ranges, and decreases until it reaches 1
84 : */
85 0 : while (hi - lo > 1) {
86 0 : int mid = (lo + hi) / 2;
87 0 : sector_t a = BB_OFFSET(p[mid]);
88 :
89 0 : if (a < target)
90 : /* This could still be the one, earlier ranges
91 : * could not.
92 : */
93 : lo = mid;
94 : else
95 : /* This and later ranges are definitely out. */
96 : hi = mid;
97 : }
98 : /* 'lo' might be the last that started before target, but 'hi' isn't */
99 0 : if (hi > lo) {
100 : /* need to check all range that end after 's' to see if
101 : * any are unacknowledged.
102 : */
103 0 : while (lo >= 0 &&
104 0 : BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
105 0 : if (BB_OFFSET(p[lo]) < target) {
106 : /* starts before the end, and finishes after
107 : * the start, so they must overlap
108 : */
109 0 : if (rv != -1 && BB_ACK(p[lo]))
110 : rv = 1;
111 : else
112 0 : rv = -1;
113 0 : *first_bad = BB_OFFSET(p[lo]);
114 0 : *bad_sectors = BB_LEN(p[lo]);
115 : }
116 0 : lo--;
117 : }
118 : }
119 :
120 0 : if (read_seqretry(&bb->lock, seq))
121 : goto retry;
122 :
123 0 : return rv;
124 : }
125 : EXPORT_SYMBOL_GPL(badblocks_check);
126 :
127 : static void badblocks_update_acked(struct badblocks *bb)
128 : {
129 0 : u64 *p = bb->page;
130 : int i;
131 0 : bool unacked = false;
132 :
133 0 : if (!bb->unacked_exist)
134 : return;
135 :
136 0 : for (i = 0; i < bb->count ; i++) {
137 0 : if (!BB_ACK(p[i])) {
138 : unacked = true;
139 : break;
140 : }
141 : }
142 :
143 0 : if (!unacked)
144 0 : bb->unacked_exist = 0;
145 : }
146 :
147 : /**
148 : * badblocks_set() - Add a range of bad blocks to the table.
149 : * @bb: the badblocks structure that holds all badblock information
150 : * @s: first sector to mark as bad
151 : * @sectors: number of sectors to mark as bad
152 : * @acknowledged: weather to mark the bad sectors as acknowledged
153 : *
154 : * This might extend the table, or might contract it if two adjacent ranges
155 : * can be merged. We binary-search to find the 'insertion' point, then
156 : * decide how best to handle it.
157 : *
158 : * Return:
159 : * 0: success
160 : * 1: failed to set badblocks (out of space)
161 : */
162 0 : int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
163 : int acknowledged)
164 : {
165 : u64 *p;
166 : int lo, hi;
167 0 : int rv = 0;
168 : unsigned long flags;
169 :
170 0 : if (bb->shift < 0)
171 : /* badblocks are disabled */
172 : return 1;
173 :
174 0 : if (bb->shift) {
175 : /* round the start down, and the end up */
176 0 : sector_t next = s + sectors;
177 :
178 0 : s >>= bb->shift;
179 0 : next += (1<<bb->shift) - 1;
180 0 : next >>= bb->shift;
181 0 : sectors = next - s;
182 : }
183 :
184 0 : write_seqlock_irqsave(&bb->lock, flags);
185 :
186 0 : p = bb->page;
187 0 : lo = 0;
188 0 : hi = bb->count;
189 : /* Find the last range that starts at-or-before 's' */
190 0 : while (hi - lo > 1) {
191 0 : int mid = (lo + hi) / 2;
192 0 : sector_t a = BB_OFFSET(p[mid]);
193 :
194 0 : if (a <= s)
195 : lo = mid;
196 : else
197 : hi = mid;
198 : }
199 0 : if (hi > lo && BB_OFFSET(p[lo]) > s)
200 0 : hi = lo;
201 :
202 0 : if (hi > lo) {
203 : /* we found a range that might merge with the start
204 : * of our new range
205 : */
206 0 : sector_t a = BB_OFFSET(p[lo]);
207 0 : sector_t e = a + BB_LEN(p[lo]);
208 0 : int ack = BB_ACK(p[lo]);
209 :
210 0 : if (e >= s) {
211 : /* Yes, we can merge with a previous range */
212 0 : if (s == a && s + sectors >= e)
213 : /* new range covers old */
214 : ack = acknowledged;
215 : else
216 0 : ack = ack && acknowledged;
217 :
218 0 : if (e < s + sectors)
219 0 : e = s + sectors;
220 0 : if (e - a <= BB_MAX_LEN) {
221 0 : p[lo] = BB_MAKE(a, e-a, ack);
222 0 : s = e;
223 : } else {
224 : /* does not all fit in one range,
225 : * make p[lo] maximal
226 : */
227 0 : if (BB_LEN(p[lo]) != BB_MAX_LEN)
228 0 : p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
229 0 : s = a + BB_MAX_LEN;
230 : }
231 0 : sectors = e - s;
232 : }
233 : }
234 0 : if (sectors && hi < bb->count) {
235 : /* 'hi' points to the first range that starts after 's'.
236 : * Maybe we can merge with the start of that range
237 : */
238 0 : sector_t a = BB_OFFSET(p[hi]);
239 0 : sector_t e = a + BB_LEN(p[hi]);
240 0 : int ack = BB_ACK(p[hi]);
241 :
242 0 : if (a <= s + sectors) {
243 : /* merging is possible */
244 0 : if (e <= s + sectors) {
245 : /* full overlap */
246 : e = s + sectors;
247 : ack = acknowledged;
248 : } else
249 0 : ack = ack && acknowledged;
250 :
251 0 : a = s;
252 0 : if (e - a <= BB_MAX_LEN) {
253 0 : p[hi] = BB_MAKE(a, e-a, ack);
254 0 : s = e;
255 : } else {
256 0 : p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
257 0 : s = a + BB_MAX_LEN;
258 : }
259 0 : sectors = e - s;
260 0 : lo = hi;
261 0 : hi++;
262 : }
263 : }
264 0 : if (sectors == 0 && hi < bb->count) {
265 : /* we might be able to combine lo and hi */
266 : /* Note: 's' is at the end of 'lo' */
267 0 : sector_t a = BB_OFFSET(p[hi]);
268 0 : int lolen = BB_LEN(p[lo]);
269 0 : int hilen = BB_LEN(p[hi]);
270 0 : int newlen = lolen + hilen - (s - a);
271 :
272 0 : if (s >= a && newlen < BB_MAX_LEN) {
273 : /* yes, we can combine them */
274 0 : int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
275 :
276 0 : p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
277 0 : memmove(p + hi, p + hi + 1,
278 : (bb->count - hi - 1) * 8);
279 0 : bb->count--;
280 : }
281 : }
282 0 : while (sectors) {
283 : /* didn't merge (it all).
284 : * Need to add a range just before 'hi'
285 : */
286 0 : if (bb->count >= MAX_BADBLOCKS) {
287 : /* No room for more */
288 : rv = 1;
289 : break;
290 : } else {
291 0 : int this_sectors = sectors;
292 :
293 0 : memmove(p + hi + 1, p + hi,
294 : (bb->count - hi) * 8);
295 0 : bb->count++;
296 :
297 0 : if (this_sectors > BB_MAX_LEN)
298 0 : this_sectors = BB_MAX_LEN;
299 0 : p[hi] = BB_MAKE(s, this_sectors, acknowledged);
300 0 : sectors -= this_sectors;
301 0 : s += this_sectors;
302 : }
303 : }
304 :
305 0 : bb->changed = 1;
306 0 : if (!acknowledged)
307 0 : bb->unacked_exist = 1;
308 : else
309 : badblocks_update_acked(bb);
310 0 : write_sequnlock_irqrestore(&bb->lock, flags);
311 :
312 0 : return rv;
313 : }
314 : EXPORT_SYMBOL_GPL(badblocks_set);
315 :
316 : /**
317 : * badblocks_clear() - Remove a range of bad blocks to the table.
318 : * @bb: the badblocks structure that holds all badblock information
319 : * @s: first sector to mark as bad
320 : * @sectors: number of sectors to mark as bad
321 : *
322 : * This may involve extending the table if we spilt a region,
323 : * but it must not fail. So if the table becomes full, we just
324 : * drop the remove request.
325 : *
326 : * Return:
327 : * 0: success
328 : * 1: failed to clear badblocks
329 : */
330 0 : int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
331 : {
332 : u64 *p;
333 : int lo, hi;
334 0 : sector_t target = s + sectors;
335 0 : int rv = 0;
336 :
337 0 : if (bb->shift > 0) {
338 : /* When clearing we round the start up and the end down.
339 : * This should not matter as the shift should align with
340 : * the block size and no rounding should ever be needed.
341 : * However it is better the think a block is bad when it
342 : * isn't than to think a block is not bad when it is.
343 : */
344 0 : s += (1<<bb->shift) - 1;
345 0 : s >>= bb->shift;
346 0 : target >>= bb->shift;
347 : }
348 :
349 0 : write_seqlock_irq(&bb->lock);
350 :
351 0 : p = bb->page;
352 0 : lo = 0;
353 0 : hi = bb->count;
354 : /* Find the last range that starts before 'target' */
355 0 : while (hi - lo > 1) {
356 0 : int mid = (lo + hi) / 2;
357 0 : sector_t a = BB_OFFSET(p[mid]);
358 :
359 0 : if (a < target)
360 : lo = mid;
361 : else
362 : hi = mid;
363 : }
364 0 : if (hi > lo) {
365 : /* p[lo] is the last range that could overlap the
366 : * current range. Earlier ranges could also overlap,
367 : * but only this one can overlap the end of the range.
368 : */
369 0 : if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
370 : (BB_OFFSET(p[lo]) < target)) {
371 : /* Partial overlap, leave the tail of this range */
372 0 : int ack = BB_ACK(p[lo]);
373 0 : sector_t a = BB_OFFSET(p[lo]);
374 0 : sector_t end = a + BB_LEN(p[lo]);
375 :
376 0 : if (a < s) {
377 : /* we need to split this range */
378 0 : if (bb->count >= MAX_BADBLOCKS) {
379 : rv = -ENOSPC;
380 : goto out;
381 : }
382 0 : memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
383 0 : bb->count++;
384 0 : p[lo] = BB_MAKE(a, s-a, ack);
385 0 : lo++;
386 : }
387 0 : p[lo] = BB_MAKE(target, end - target, ack);
388 : /* there is no longer an overlap */
389 0 : hi = lo;
390 0 : lo--;
391 : }
392 0 : while (lo >= 0 &&
393 0 : (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
394 : (BB_OFFSET(p[lo]) < target)) {
395 : /* This range does overlap */
396 0 : if (BB_OFFSET(p[lo]) < s) {
397 : /* Keep the early parts of this range. */
398 0 : int ack = BB_ACK(p[lo]);
399 0 : sector_t start = BB_OFFSET(p[lo]);
400 :
401 0 : p[lo] = BB_MAKE(start, s - start, ack);
402 : /* now low doesn't overlap, so.. */
403 0 : break;
404 : }
405 0 : lo--;
406 : }
407 : /* 'lo' is strictly before, 'hi' is strictly after,
408 : * anything between needs to be discarded
409 : */
410 0 : if (hi - lo > 1) {
411 0 : memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
412 0 : bb->count -= (hi - lo - 1);
413 : }
414 : }
415 :
416 0 : badblocks_update_acked(bb);
417 0 : bb->changed = 1;
418 : out:
419 0 : write_sequnlock_irq(&bb->lock);
420 0 : return rv;
421 : }
422 : EXPORT_SYMBOL_GPL(badblocks_clear);
423 :
424 : /**
425 : * ack_all_badblocks() - Acknowledge all bad blocks in a list.
426 : * @bb: the badblocks structure that holds all badblock information
427 : *
428 : * This only succeeds if ->changed is clear. It is used by
429 : * in-kernel metadata updates
430 : */
431 0 : void ack_all_badblocks(struct badblocks *bb)
432 : {
433 0 : if (bb->page == NULL || bb->changed)
434 : /* no point even trying */
435 : return;
436 0 : write_seqlock_irq(&bb->lock);
437 :
438 0 : if (bb->changed == 0 && bb->unacked_exist) {
439 0 : u64 *p = bb->page;
440 : int i;
441 :
442 0 : for (i = 0; i < bb->count ; i++) {
443 0 : if (!BB_ACK(p[i])) {
444 0 : sector_t start = BB_OFFSET(p[i]);
445 0 : int len = BB_LEN(p[i]);
446 :
447 0 : p[i] = BB_MAKE(start, len, 1);
448 : }
449 : }
450 0 : bb->unacked_exist = 0;
451 : }
452 0 : write_sequnlock_irq(&bb->lock);
453 : }
454 : EXPORT_SYMBOL_GPL(ack_all_badblocks);
455 :
456 : /**
457 : * badblocks_show() - sysfs access to bad-blocks list
458 : * @bb: the badblocks structure that holds all badblock information
459 : * @page: buffer received from sysfs
460 : * @unack: weather to show unacknowledged badblocks
461 : *
462 : * Return:
463 : * Length of returned data
464 : */
465 0 : ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
466 : {
467 : size_t len;
468 : int i;
469 0 : u64 *p = bb->page;
470 : unsigned seq;
471 :
472 0 : if (bb->shift < 0)
473 : return 0;
474 :
475 : retry:
476 0 : seq = read_seqbegin(&bb->lock);
477 :
478 0 : len = 0;
479 0 : i = 0;
480 :
481 0 : while (len < PAGE_SIZE && i < bb->count) {
482 0 : sector_t s = BB_OFFSET(p[i]);
483 0 : unsigned int length = BB_LEN(p[i]);
484 0 : int ack = BB_ACK(p[i]);
485 :
486 0 : i++;
487 :
488 0 : if (unack && ack)
489 0 : continue;
490 :
491 0 : len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
492 : (unsigned long long)s << bb->shift,
493 0 : length << bb->shift);
494 : }
495 0 : if (unack && len == 0)
496 0 : bb->unacked_exist = 0;
497 :
498 0 : if (read_seqretry(&bb->lock, seq))
499 : goto retry;
500 :
501 0 : return len;
502 : }
503 : EXPORT_SYMBOL_GPL(badblocks_show);
504 :
505 : /**
506 : * badblocks_store() - sysfs access to bad-blocks list
507 : * @bb: the badblocks structure that holds all badblock information
508 : * @page: buffer received from sysfs
509 : * @len: length of data received from sysfs
510 : * @unack: weather to show unacknowledged badblocks
511 : *
512 : * Return:
513 : * Length of the buffer processed or -ve error.
514 : */
515 0 : ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
516 : int unack)
517 : {
518 : unsigned long long sector;
519 : int length;
520 : char newline;
521 :
522 0 : switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
523 : case 3:
524 0 : if (newline != '\n')
525 : return -EINVAL;
526 : fallthrough;
527 : case 2:
528 0 : if (length <= 0)
529 : return -EINVAL;
530 : break;
531 : default:
532 : return -EINVAL;
533 : }
534 :
535 0 : if (badblocks_set(bb, sector, length, !unack))
536 : return -ENOSPC;
537 : else
538 0 : return len;
539 : }
540 : EXPORT_SYMBOL_GPL(badblocks_store);
541 :
542 0 : static int __badblocks_init(struct device *dev, struct badblocks *bb,
543 : int enable)
544 : {
545 0 : bb->dev = dev;
546 0 : bb->count = 0;
547 0 : if (enable)
548 0 : bb->shift = 0;
549 : else
550 0 : bb->shift = -1;
551 0 : if (dev)
552 0 : bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
553 : else
554 0 : bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
555 0 : if (!bb->page) {
556 0 : bb->shift = -1;
557 0 : return -ENOMEM;
558 : }
559 0 : seqlock_init(&bb->lock);
560 :
561 0 : return 0;
562 : }
563 :
564 : /**
565 : * badblocks_init() - initialize the badblocks structure
566 : * @bb: the badblocks structure that holds all badblock information
567 : * @enable: weather to enable badblocks accounting
568 : *
569 : * Return:
570 : * 0: success
571 : * -ve errno: on error
572 : */
573 0 : int badblocks_init(struct badblocks *bb, int enable)
574 : {
575 0 : return __badblocks_init(NULL, bb, enable);
576 : }
577 : EXPORT_SYMBOL_GPL(badblocks_init);
578 :
579 0 : int devm_init_badblocks(struct device *dev, struct badblocks *bb)
580 : {
581 0 : if (!bb)
582 : return -EINVAL;
583 0 : return __badblocks_init(dev, bb, 1);
584 : }
585 : EXPORT_SYMBOL_GPL(devm_init_badblocks);
586 :
587 : /**
588 : * badblocks_exit() - free the badblocks structure
589 : * @bb: the badblocks structure that holds all badblock information
590 : */
591 0 : void badblocks_exit(struct badblocks *bb)
592 : {
593 0 : if (!bb)
594 : return;
595 0 : if (bb->dev)
596 0 : devm_kfree(bb->dev, bb->page);
597 : else
598 0 : kfree(bb->page);
599 0 : bb->page = NULL;
600 : }
601 : EXPORT_SYMBOL_GPL(badblocks_exit);
|