LCOV - code coverage report
Current view: top level - mm - page_io.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 250 0.0 %
Date: 2023-03-27 20:00:47 Functions: 0 19 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *  linux/mm/page_io.c
       4             :  *
       5             :  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
       6             :  *
       7             :  *  Swap reorganised 29.12.95, 
       8             :  *  Asynchronous swapping added 30.12.95. Stephen Tweedie
       9             :  *  Removed race in async swapping. 14.4.1996. Bruno Haible
      10             :  *  Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie
      11             :  *  Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman
      12             :  */
      13             : 
      14             : #include <linux/mm.h>
      15             : #include <linux/kernel_stat.h>
      16             : #include <linux/gfp.h>
      17             : #include <linux/pagemap.h>
      18             : #include <linux/swap.h>
      19             : #include <linux/bio.h>
      20             : #include <linux/swapops.h>
      21             : #include <linux/writeback.h>
      22             : #include <linux/frontswap.h>
      23             : #include <linux/blkdev.h>
      24             : #include <linux/psi.h>
      25             : #include <linux/uio.h>
      26             : #include <linux/sched/task.h>
      27             : #include <linux/delayacct.h>
      28             : #include "swap.h"
      29             : 
      30           0 : static void __end_swap_bio_write(struct bio *bio)
      31             : {
      32           0 :         struct page *page = bio_first_page_all(bio);
      33             : 
      34           0 :         if (bio->bi_status) {
      35           0 :                 SetPageError(page);
      36             :                 /*
      37             :                  * We failed to write the page out to swap-space.
      38             :                  * Re-dirty the page in order to avoid it being reclaimed.
      39             :                  * Also print a dire warning that things will go BAD (tm)
      40             :                  * very quickly.
      41             :                  *
      42             :                  * Also clear PG_reclaim to avoid folio_rotate_reclaimable()
      43             :                  */
      44           0 :                 set_page_dirty(page);
      45           0 :                 pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
      46             :                                      MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
      47             :                                      (unsigned long long)bio->bi_iter.bi_sector);
      48             :                 ClearPageReclaim(page);
      49             :         }
      50           0 :         end_page_writeback(page);
      51           0 : }
      52             : 
      53           0 : static void end_swap_bio_write(struct bio *bio)
      54             : {
      55           0 :         __end_swap_bio_write(bio);
      56           0 :         bio_put(bio);
      57           0 : }
      58             : 
      59           0 : static void __end_swap_bio_read(struct bio *bio)
      60             : {
      61           0 :         struct page *page = bio_first_page_all(bio);
      62             : 
      63           0 :         if (bio->bi_status) {
      64           0 :                 SetPageError(page);
      65           0 :                 ClearPageUptodate(page);
      66           0 :                 pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
      67             :                                      MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
      68             :                                      (unsigned long long)bio->bi_iter.bi_sector);
      69             :         } else {
      70             :                 SetPageUptodate(page);
      71             :         }
      72           0 :         unlock_page(page);
      73           0 : }
      74             : 
      75           0 : static void end_swap_bio_read(struct bio *bio)
      76             : {
      77           0 :         __end_swap_bio_read(bio);
      78           0 :         bio_put(bio);
      79           0 : }
      80             : 
      81           0 : int generic_swapfile_activate(struct swap_info_struct *sis,
      82             :                                 struct file *swap_file,
      83             :                                 sector_t *span)
      84             : {
      85           0 :         struct address_space *mapping = swap_file->f_mapping;
      86           0 :         struct inode *inode = mapping->host;
      87             :         unsigned blocks_per_page;
      88             :         unsigned long page_no;
      89             :         unsigned blkbits;
      90             :         sector_t probe_block;
      91             :         sector_t last_block;
      92           0 :         sector_t lowest_block = -1;
      93           0 :         sector_t highest_block = 0;
      94           0 :         int nr_extents = 0;
      95             :         int ret;
      96             : 
      97           0 :         blkbits = inode->i_blkbits;
      98           0 :         blocks_per_page = PAGE_SIZE >> blkbits;
      99             : 
     100             :         /*
     101             :          * Map all the blocks into the extent tree.  This code doesn't try
     102             :          * to be very smart.
     103             :          */
     104           0 :         probe_block = 0;
     105           0 :         page_no = 0;
     106           0 :         last_block = i_size_read(inode) >> blkbits;
     107           0 :         while ((probe_block + blocks_per_page) <= last_block &&
     108           0 :                         page_no < sis->max) {
     109             :                 unsigned block_in_page;
     110             :                 sector_t first_block;
     111             : 
     112           0 :                 cond_resched();
     113             : 
     114           0 :                 first_block = probe_block;
     115           0 :                 ret = bmap(inode, &first_block);
     116           0 :                 if (ret || !first_block)
     117             :                         goto bad_bmap;
     118             : 
     119             :                 /*
     120             :                  * It must be PAGE_SIZE aligned on-disk
     121             :                  */
     122           0 :                 if (first_block & (blocks_per_page - 1)) {
     123           0 :                         probe_block++;
     124           0 :                         goto reprobe;
     125             :                 }
     126             : 
     127           0 :                 for (block_in_page = 1; block_in_page < blocks_per_page;
     128           0 :                                         block_in_page++) {
     129             :                         sector_t block;
     130             : 
     131           0 :                         block = probe_block + block_in_page;
     132           0 :                         ret = bmap(inode, &block);
     133           0 :                         if (ret || !block)
     134             :                                 goto bad_bmap;
     135             : 
     136           0 :                         if (block != first_block + block_in_page) {
     137             :                                 /* Discontiguity */
     138           0 :                                 probe_block++;
     139           0 :                                 goto reprobe;
     140             :                         }
     141             :                 }
     142             : 
     143           0 :                 first_block >>= (PAGE_SHIFT - blkbits);
     144           0 :                 if (page_no) {  /* exclude the header page */
     145           0 :                         if (first_block < lowest_block)
     146           0 :                                 lowest_block = first_block;
     147           0 :                         if (first_block > highest_block)
     148           0 :                                 highest_block = first_block;
     149             :                 }
     150             : 
     151             :                 /*
     152             :                  * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
     153             :                  */
     154           0 :                 ret = add_swap_extent(sis, page_no, 1, first_block);
     155           0 :                 if (ret < 0)
     156             :                         goto out;
     157           0 :                 nr_extents += ret;
     158           0 :                 page_no++;
     159           0 :                 probe_block += blocks_per_page;
     160             : reprobe:
     161           0 :                 continue;
     162             :         }
     163           0 :         ret = nr_extents;
     164           0 :         *span = 1 + highest_block - lowest_block;
     165           0 :         if (page_no == 0)
     166           0 :                 page_no = 1;    /* force Empty message */
     167           0 :         sis->max = page_no;
     168           0 :         sis->pages = page_no - 1;
     169           0 :         sis->highest_bit = page_no - 1;
     170             : out:
     171           0 :         return ret;
     172             : bad_bmap:
     173           0 :         pr_err("swapon: swapfile has holes\n");
     174           0 :         ret = -EINVAL;
     175           0 :         goto out;
     176             : }
     177             : 
     178             : /*
     179             :  * We may have stale swap cache pages in memory: notice
     180             :  * them here and get rid of the unnecessary final write.
     181             :  */
     182           0 : int swap_writepage(struct page *page, struct writeback_control *wbc)
     183             : {
     184           0 :         struct folio *folio = page_folio(page);
     185             :         int ret;
     186             : 
     187           0 :         if (folio_free_swap(folio)) {
     188           0 :                 folio_unlock(folio);
     189           0 :                 return 0;
     190             :         }
     191             :         /*
     192             :          * Arch code may have to preserve more data than just the page
     193             :          * contents, e.g. memory tags.
     194             :          */
     195           0 :         ret = arch_prepare_to_swap(&folio->page);
     196             :         if (ret) {
     197             :                 folio_mark_dirty(folio);
     198             :                 folio_unlock(folio);
     199             :                 return ret;
     200             :         }
     201           0 :         if (frontswap_store(&folio->page) == 0) {
     202             :                 folio_start_writeback(folio);
     203             :                 folio_unlock(folio);
     204             :                 folio_end_writeback(folio);
     205             :                 return 0;
     206             :         }
     207           0 :         __swap_writepage(&folio->page, wbc);
     208           0 :         return 0;
     209             : }
     210             : 
     211             : static inline void count_swpout_vm_event(struct page *page)
     212             : {
     213             : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     214             :         if (unlikely(PageTransHuge(page)))
     215             :                 count_vm_event(THP_SWPOUT);
     216             : #endif
     217           0 :         count_vm_events(PSWPOUT, thp_nr_pages(page));
     218             : }
     219             : 
     220             : #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
     221             : static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
     222             : {
     223             :         struct cgroup_subsys_state *css;
     224             :         struct mem_cgroup *memcg;
     225             : 
     226             :         memcg = page_memcg(page);
     227             :         if (!memcg)
     228             :                 return;
     229             : 
     230             :         rcu_read_lock();
     231             :         css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys);
     232             :         bio_associate_blkg_from_css(bio, css);
     233             :         rcu_read_unlock();
     234             : }
     235             : #else
     236             : #define bio_associate_blkg_from_page(bio, page)         do { } while (0)
     237             : #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */
     238             : 
     239             : struct swap_iocb {
     240             :         struct kiocb            iocb;
     241             :         struct bio_vec          bvec[SWAP_CLUSTER_MAX];
     242             :         int                     pages;
     243             :         int                     len;
     244             : };
     245             : static mempool_t *sio_pool;
     246             : 
     247           0 : int sio_pool_init(void)
     248             : {
     249           0 :         if (!sio_pool) {
     250           0 :                 mempool_t *pool = mempool_create_kmalloc_pool(
     251             :                         SWAP_CLUSTER_MAX, sizeof(struct swap_iocb));
     252           0 :                 if (cmpxchg(&sio_pool, NULL, pool))
     253           0 :                         mempool_destroy(pool);
     254             :         }
     255           0 :         if (!sio_pool)
     256             :                 return -ENOMEM;
     257           0 :         return 0;
     258             : }
     259             : 
     260           0 : static void sio_write_complete(struct kiocb *iocb, long ret)
     261             : {
     262           0 :         struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
     263           0 :         struct page *page = sio->bvec[0].bv_page;
     264             :         int p;
     265             : 
     266           0 :         if (ret != sio->len) {
     267             :                 /*
     268             :                  * In the case of swap-over-nfs, this can be a
     269             :                  * temporary failure if the system has limited
     270             :                  * memory for allocating transmit buffers.
     271             :                  * Mark the page dirty and avoid
     272             :                  * folio_rotate_reclaimable but rate-limit the
     273             :                  * messages but do not flag PageError like
     274             :                  * the normal direct-to-bio case as it could
     275             :                  * be temporary.
     276             :                  */
     277           0 :                 pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n",
     278             :                                    ret, page_file_offset(page));
     279           0 :                 for (p = 0; p < sio->pages; p++) {
     280           0 :                         page = sio->bvec[p].bv_page;
     281           0 :                         set_page_dirty(page);
     282           0 :                         ClearPageReclaim(page);
     283             :                 }
     284             :         } else {
     285           0 :                 for (p = 0; p < sio->pages; p++)
     286           0 :                         count_swpout_vm_event(sio->bvec[p].bv_page);
     287             :         }
     288             : 
     289           0 :         for (p = 0; p < sio->pages; p++)
     290           0 :                 end_page_writeback(sio->bvec[p].bv_page);
     291             : 
     292           0 :         mempool_free(sio, sio_pool);
     293           0 : }
     294             : 
     295           0 : static void swap_writepage_fs(struct page *page, struct writeback_control *wbc)
     296             : {
     297           0 :         struct swap_iocb *sio = NULL;
     298           0 :         struct swap_info_struct *sis = page_swap_info(page);
     299           0 :         struct file *swap_file = sis->swap_file;
     300           0 :         loff_t pos = page_file_offset(page);
     301             : 
     302           0 :         set_page_writeback(page);
     303           0 :         unlock_page(page);
     304           0 :         if (wbc->swap_plug)
     305           0 :                 sio = *wbc->swap_plug;
     306           0 :         if (sio) {
     307           0 :                 if (sio->iocb.ki_filp != swap_file ||
     308           0 :                     sio->iocb.ki_pos + sio->len != pos) {
     309           0 :                         swap_write_unplug(sio);
     310           0 :                         sio = NULL;
     311             :                 }
     312             :         }
     313           0 :         if (!sio) {
     314           0 :                 sio = mempool_alloc(sio_pool, GFP_NOIO);
     315           0 :                 init_sync_kiocb(&sio->iocb, swap_file);
     316           0 :                 sio->iocb.ki_complete = sio_write_complete;
     317           0 :                 sio->iocb.ki_pos = pos;
     318           0 :                 sio->pages = 0;
     319           0 :                 sio->len = 0;
     320             :         }
     321           0 :         bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0);
     322           0 :         sio->len += thp_size(page);
     323           0 :         sio->pages += 1;
     324           0 :         if (sio->pages == ARRAY_SIZE(sio->bvec) || !wbc->swap_plug) {
     325           0 :                 swap_write_unplug(sio);
     326           0 :                 sio = NULL;
     327             :         }
     328           0 :         if (wbc->swap_plug)
     329           0 :                 *wbc->swap_plug = sio;
     330           0 : }
     331             : 
     332           0 : static void swap_writepage_bdev_sync(struct page *page,
     333             :                 struct writeback_control *wbc, struct swap_info_struct *sis)
     334             : {
     335             :         struct bio_vec bv;
     336             :         struct bio bio;
     337             : 
     338           0 :         bio_init(&bio, sis->bdev, &bv, 1,
     339           0 :                  REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc));
     340           0 :         bio.bi_iter.bi_sector = swap_page_sector(page);
     341           0 :         bio_add_page(&bio, page, thp_size(page), 0);
     342             : 
     343             :         bio_associate_blkg_from_page(&bio, page);
     344           0 :         count_swpout_vm_event(page);
     345             : 
     346           0 :         set_page_writeback(page);
     347           0 :         unlock_page(page);
     348             : 
     349           0 :         submit_bio_wait(&bio);
     350           0 :         __end_swap_bio_write(&bio);
     351           0 : }
     352             : 
     353           0 : static void swap_writepage_bdev_async(struct page *page,
     354             :                 struct writeback_control *wbc, struct swap_info_struct *sis)
     355             : {
     356             :         struct bio *bio;
     357             : 
     358           0 :         bio = bio_alloc(sis->bdev, 1,
     359           0 :                         REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc),
     360             :                         GFP_NOIO);
     361           0 :         bio->bi_iter.bi_sector = swap_page_sector(page);
     362           0 :         bio->bi_end_io = end_swap_bio_write;
     363           0 :         bio_add_page(bio, page, thp_size(page), 0);
     364             : 
     365             :         bio_associate_blkg_from_page(bio, page);
     366           0 :         count_swpout_vm_event(page);
     367           0 :         set_page_writeback(page);
     368           0 :         unlock_page(page);
     369           0 :         submit_bio(bio);
     370           0 : }
     371             : 
     372           0 : void __swap_writepage(struct page *page, struct writeback_control *wbc)
     373             : {
     374           0 :         struct swap_info_struct *sis = page_swap_info(page);
     375             : 
     376             :         VM_BUG_ON_PAGE(!PageSwapCache(page), page);
     377             :         /*
     378             :          * ->flags can be updated non-atomicially (scan_swap_map_slots),
     379             :          * but that will never affect SWP_FS_OPS, so the data_race
     380             :          * is safe.
     381             :          */
     382           0 :         if (data_race(sis->flags & SWP_FS_OPS))
     383           0 :                 swap_writepage_fs(page, wbc);
     384           0 :         else if (sis->flags & SWP_SYNCHRONOUS_IO)
     385           0 :                 swap_writepage_bdev_sync(page, wbc, sis);
     386             :         else
     387           0 :                 swap_writepage_bdev_async(page, wbc, sis);
     388           0 : }
     389             : 
     390           0 : void swap_write_unplug(struct swap_iocb *sio)
     391             : {
     392             :         struct iov_iter from;
     393           0 :         struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
     394             :         int ret;
     395             : 
     396           0 :         iov_iter_bvec(&from, ITER_SOURCE, sio->bvec, sio->pages, sio->len);
     397           0 :         ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
     398           0 :         if (ret != -EIOCBQUEUED)
     399           0 :                 sio_write_complete(&sio->iocb, ret);
     400           0 : }
     401             : 
     402           0 : static void sio_read_complete(struct kiocb *iocb, long ret)
     403             : {
     404           0 :         struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
     405             :         int p;
     406             : 
     407           0 :         if (ret == sio->len) {
     408           0 :                 for (p = 0; p < sio->pages; p++) {
     409           0 :                         struct page *page = sio->bvec[p].bv_page;
     410             : 
     411           0 :                         SetPageUptodate(page);
     412           0 :                         unlock_page(page);
     413             :                 }
     414           0 :                 count_vm_events(PSWPIN, sio->pages);
     415             :         } else {
     416           0 :                 for (p = 0; p < sio->pages; p++) {
     417           0 :                         struct page *page = sio->bvec[p].bv_page;
     418             : 
     419           0 :                         SetPageError(page);
     420           0 :                         ClearPageUptodate(page);
     421           0 :                         unlock_page(page);
     422             :                 }
     423           0 :                 pr_alert_ratelimited("Read-error on swap-device\n");
     424             :         }
     425           0 :         mempool_free(sio, sio_pool);
     426           0 : }
     427             : 
     428           0 : static void swap_readpage_fs(struct page *page,
     429             :                              struct swap_iocb **plug)
     430             : {
     431           0 :         struct swap_info_struct *sis = page_swap_info(page);
     432           0 :         struct swap_iocb *sio = NULL;
     433           0 :         loff_t pos = page_file_offset(page);
     434             : 
     435           0 :         if (plug)
     436           0 :                 sio = *plug;
     437           0 :         if (sio) {
     438           0 :                 if (sio->iocb.ki_filp != sis->swap_file ||
     439           0 :                     sio->iocb.ki_pos + sio->len != pos) {
     440           0 :                         swap_read_unplug(sio);
     441           0 :                         sio = NULL;
     442             :                 }
     443             :         }
     444           0 :         if (!sio) {
     445           0 :                 sio = mempool_alloc(sio_pool, GFP_KERNEL);
     446           0 :                 init_sync_kiocb(&sio->iocb, sis->swap_file);
     447           0 :                 sio->iocb.ki_pos = pos;
     448           0 :                 sio->iocb.ki_complete = sio_read_complete;
     449           0 :                 sio->pages = 0;
     450           0 :                 sio->len = 0;
     451             :         }
     452           0 :         bvec_set_page(&sio->bvec[sio->pages], page, thp_size(page), 0);
     453           0 :         sio->len += thp_size(page);
     454           0 :         sio->pages += 1;
     455           0 :         if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) {
     456             :                 swap_read_unplug(sio);
     457             :                 sio = NULL;
     458             :         }
     459           0 :         if (plug)
     460           0 :                 *plug = sio;
     461           0 : }
     462             : 
     463           0 : static void swap_readpage_bdev_sync(struct page *page,
     464             :                 struct swap_info_struct *sis)
     465             : {
     466             :         struct bio_vec bv;
     467             :         struct bio bio;
     468             : 
     469           0 :         bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
     470           0 :         bio.bi_iter.bi_sector = swap_page_sector(page);
     471           0 :         bio_add_page(&bio, page, thp_size(page), 0);
     472             :         /*
     473             :          * Keep this task valid during swap readpage because the oom killer may
     474             :          * attempt to access it in the page fault retry time check.
     475             :          */
     476           0 :         get_task_struct(current);
     477           0 :         count_vm_event(PSWPIN);
     478           0 :         submit_bio_wait(&bio);
     479           0 :         __end_swap_bio_read(&bio);
     480           0 :         put_task_struct(current);
     481           0 : }
     482             : 
     483           0 : static void swap_readpage_bdev_async(struct page *page,
     484             :                 struct swap_info_struct *sis)
     485             : {
     486             :         struct bio *bio;
     487             : 
     488           0 :         bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
     489           0 :         bio->bi_iter.bi_sector = swap_page_sector(page);
     490           0 :         bio->bi_end_io = end_swap_bio_read;
     491           0 :         bio_add_page(bio, page, thp_size(page), 0);
     492           0 :         count_vm_event(PSWPIN);
     493           0 :         submit_bio(bio);
     494           0 : }
     495             : 
     496           0 : void swap_readpage(struct page *page, bool synchronous, struct swap_iocb **plug)
     497             : {
     498           0 :         struct swap_info_struct *sis = page_swap_info(page);
     499           0 :         bool workingset = PageWorkingset(page);
     500             :         unsigned long pflags;
     501             :         bool in_thrashing;
     502             : 
     503             :         VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
     504             :         VM_BUG_ON_PAGE(!PageLocked(page), page);
     505             :         VM_BUG_ON_PAGE(PageUptodate(page), page);
     506             : 
     507             :         /*
     508             :          * Count submission time as memory stall and delay. When the device
     509             :          * is congested, or the submitting cgroup IO-throttled, submission
     510             :          * can be a significant part of overall IO time.
     511             :          */
     512             :         if (workingset) {
     513             :                 delayacct_thrashing_start(&in_thrashing);
     514             :                 psi_memstall_enter(&pflags);
     515             :         }
     516             :         delayacct_swapin_start();
     517             : 
     518           0 :         if (frontswap_load(page) == 0) {
     519             :                 SetPageUptodate(page);
     520             :                 unlock_page(page);
     521           0 :         } else if (data_race(sis->flags & SWP_FS_OPS)) {
     522           0 :                 swap_readpage_fs(page, plug);
     523           0 :         } else if (synchronous || (sis->flags & SWP_SYNCHRONOUS_IO)) {
     524           0 :                 swap_readpage_bdev_sync(page, sis);
     525             :         } else {
     526           0 :                 swap_readpage_bdev_async(page, sis);
     527             :         }
     528             : 
     529             :         if (workingset) {
     530             :                 delayacct_thrashing_end(&in_thrashing);
     531             :                 psi_memstall_leave(&pflags);
     532             :         }
     533             :         delayacct_swapin_end();
     534           0 : }
     535             : 
     536           0 : void __swap_read_unplug(struct swap_iocb *sio)
     537             : {
     538             :         struct iov_iter from;
     539           0 :         struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
     540             :         int ret;
     541             : 
     542           0 :         iov_iter_bvec(&from, ITER_DEST, sio->bvec, sio->pages, sio->len);
     543           0 :         ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
     544           0 :         if (ret != -EIOCBQUEUED)
     545           0 :                 sio_read_complete(&sio->iocb, ret);
     546           0 : }

Generated by: LCOV version 1.14