LCOV - code coverage report
Current view: top level - fs - sync.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 11 119 9.2 %
Date: 2023-07-19 18:55:55 Functions: 1 18 5.6 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * High-level sync()-related operations
       4             :  */
       5             : 
       6             : #include <linux/blkdev.h>
       7             : #include <linux/kernel.h>
       8             : #include <linux/file.h>
       9             : #include <linux/fs.h>
      10             : #include <linux/slab.h>
      11             : #include <linux/export.h>
      12             : #include <linux/namei.h>
      13             : #include <linux/sched.h>
      14             : #include <linux/writeback.h>
      15             : #include <linux/syscalls.h>
      16             : #include <linux/linkage.h>
      17             : #include <linux/pagemap.h>
      18             : #include <linux/quotaops.h>
      19             : #include <linux/backing-dev.h>
      20             : #include "internal.h"
      21             : 
      22             : #define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
      23             :                         SYNC_FILE_RANGE_WAIT_AFTER)
      24             : 
      25             : /*
      26             :  * Write out and wait upon all dirty data associated with this
      27             :  * superblock.  Filesystem data as well as the underlying block
      28             :  * device.  Takes the superblock lock.
      29             :  */
      30          22 : int sync_filesystem(struct super_block *sb)
      31             : {
      32          22 :         int ret = 0;
      33             : 
      34             :         /*
      35             :          * We need to be protected against the filesystem going from
      36             :          * r/o to r/w or vice versa.
      37             :          */
      38          44 :         WARN_ON(!rwsem_is_locked(&sb->s_umount));
      39             : 
      40             :         /*
      41             :          * No point in syncing out anything if the filesystem is read-only.
      42             :          */
      43          44 :         if (sb_rdonly(sb))
      44             :                 return 0;
      45             : 
      46             :         /*
      47             :          * Do the filesystem syncing work.  For simple filesystems
      48             :          * writeback_inodes_sb(sb) just dirties buffers with inodes so we have
      49             :          * to submit I/O for these buffers via sync_blockdev().  This also
      50             :          * speeds up the wait == 1 case since in that case write_inode()
      51             :          * methods call sync_dirty_buffer() and thus effectively write one block
      52             :          * at a time.
      53             :          */
      54          22 :         writeback_inodes_sb(sb, WB_REASON_SYNC);
      55          22 :         if (sb->s_op->sync_fs) {
      56           0 :                 ret = sb->s_op->sync_fs(sb, 0);
      57           0 :                 if (ret)
      58             :                         return ret;
      59             :         }
      60          22 :         ret = sync_blockdev_nowait(sb->s_bdev);
      61          22 :         if (ret)
      62             :                 return ret;
      63             : 
      64          22 :         sync_inodes_sb(sb);
      65          22 :         if (sb->s_op->sync_fs) {
      66           0 :                 ret = sb->s_op->sync_fs(sb, 1);
      67           0 :                 if (ret)
      68             :                         return ret;
      69             :         }
      70          22 :         return sync_blockdev(sb->s_bdev);
      71             : }
      72             : EXPORT_SYMBOL(sync_filesystem);
      73             : 
      74           0 : static void sync_inodes_one_sb(struct super_block *sb, void *arg)
      75             : {
      76           0 :         if (!sb_rdonly(sb))
      77           0 :                 sync_inodes_sb(sb);
      78           0 : }
      79             : 
      80           0 : static void sync_fs_one_sb(struct super_block *sb, void *arg)
      81             : {
      82           0 :         if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) &&
      83           0 :             sb->s_op->sync_fs)
      84           0 :                 sb->s_op->sync_fs(sb, *(int *)arg);
      85           0 : }
      86             : 
      87             : /*
      88             :  * Sync everything. We start by waking flusher threads so that most of
      89             :  * writeback runs on all devices in parallel. Then we sync all inodes reliably
      90             :  * which effectively also waits for all flusher threads to finish doing
      91             :  * writeback. At this point all data is on disk so metadata should be stable
      92             :  * and we tell filesystems to sync their metadata via ->sync_fs() calls.
      93             :  * Finally, we writeout all block devices because some filesystems (e.g. ext2)
      94             :  * just write metadata (such as inodes or bitmaps) to block device page cache
      95             :  * and do not sync it on their own in ->sync_fs().
      96             :  */
      97           0 : void ksys_sync(void)
      98             : {
      99           0 :         int nowait = 0, wait = 1;
     100             : 
     101           0 :         wakeup_flusher_threads(WB_REASON_SYNC);
     102           0 :         iterate_supers(sync_inodes_one_sb, NULL);
     103           0 :         iterate_supers(sync_fs_one_sb, &nowait);
     104           0 :         iterate_supers(sync_fs_one_sb, &wait);
     105           0 :         sync_bdevs(false);
     106           0 :         sync_bdevs(true);
     107           0 :         if (unlikely(laptop_mode))
     108           0 :                 laptop_sync_completion();
     109           0 : }
     110             : 
     111           0 : SYSCALL_DEFINE0(sync)
     112             : {
     113           0 :         ksys_sync();
     114           0 :         return 0;
     115             : }
     116             : 
     117           0 : static void do_sync_work(struct work_struct *work)
     118             : {
     119           0 :         int nowait = 0;
     120             : 
     121             :         /*
     122             :          * Sync twice to reduce the possibility we skipped some inodes / pages
     123             :          * because they were temporarily locked
     124             :          */
     125           0 :         iterate_supers(sync_inodes_one_sb, &nowait);
     126           0 :         iterate_supers(sync_fs_one_sb, &nowait);
     127           0 :         sync_bdevs(false);
     128           0 :         iterate_supers(sync_inodes_one_sb, &nowait);
     129           0 :         iterate_supers(sync_fs_one_sb, &nowait);
     130           0 :         sync_bdevs(false);
     131           0 :         printk("Emergency Sync complete\n");
     132           0 :         kfree(work);
     133           0 : }
     134             : 
     135           0 : void emergency_sync(void)
     136             : {
     137             :         struct work_struct *work;
     138             : 
     139           0 :         work = kmalloc(sizeof(*work), GFP_ATOMIC);
     140           0 :         if (work) {
     141           0 :                 INIT_WORK(work, do_sync_work);
     142             :                 schedule_work(work);
     143             :         }
     144           0 : }
     145             : 
     146             : /*
     147             :  * sync a single super
     148             :  */
     149           0 : SYSCALL_DEFINE1(syncfs, int, fd)
     150             : {
     151           0 :         struct fd f = fdget(fd);
     152             :         struct super_block *sb;
     153             :         int ret, ret2;
     154             : 
     155           0 :         if (!f.file)
     156             :                 return -EBADF;
     157           0 :         sb = f.file->f_path.dentry->d_sb;
     158             : 
     159           0 :         down_read(&sb->s_umount);
     160           0 :         ret = sync_filesystem(sb);
     161           0 :         up_read(&sb->s_umount);
     162             : 
     163           0 :         ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err);
     164             : 
     165           0 :         fdput(f);
     166           0 :         return ret ? ret : ret2;
     167             : }
     168             : 
     169             : /**
     170             :  * vfs_fsync_range - helper to sync a range of data & metadata to disk
     171             :  * @file:               file to sync
     172             :  * @start:              offset in bytes of the beginning of data range to sync
     173             :  * @end:                offset in bytes of the end of data range (inclusive)
     174             :  * @datasync:           perform only datasync
     175             :  *
     176             :  * Write back data in range @start..@end and metadata for @file to disk.  If
     177             :  * @datasync is set only metadata needed to access modified file data is
     178             :  * written.
     179             :  */
     180           0 : int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
     181             : {
     182           0 :         struct inode *inode = file->f_mapping->host;
     183             : 
     184           0 :         if (!file->f_op->fsync)
     185             :                 return -EINVAL;
     186           0 :         if (!datasync && (inode->i_state & I_DIRTY_TIME))
     187             :                 mark_inode_dirty_sync(inode);
     188           0 :         return file->f_op->fsync(file, start, end, datasync);
     189             : }
     190             : EXPORT_SYMBOL(vfs_fsync_range);
     191             : 
     192             : /**
     193             :  * vfs_fsync - perform a fsync or fdatasync on a file
     194             :  * @file:               file to sync
     195             :  * @datasync:           only perform a fdatasync operation
     196             :  *
     197             :  * Write back data and metadata for @file to disk.  If @datasync is
     198             :  * set only metadata needed to access modified file data is written.
     199             :  */
     200           0 : int vfs_fsync(struct file *file, int datasync)
     201             : {
     202           0 :         return vfs_fsync_range(file, 0, LLONG_MAX, datasync);
     203             : }
     204             : EXPORT_SYMBOL(vfs_fsync);
     205             : 
     206           0 : static int do_fsync(unsigned int fd, int datasync)
     207             : {
     208           0 :         struct fd f = fdget(fd);
     209           0 :         int ret = -EBADF;
     210             : 
     211           0 :         if (f.file) {
     212           0 :                 ret = vfs_fsync(f.file, datasync);
     213           0 :                 fdput(f);
     214             :         }
     215           0 :         return ret;
     216             : }
     217             : 
     218           0 : SYSCALL_DEFINE1(fsync, unsigned int, fd)
     219             : {
     220           0 :         return do_fsync(fd, 0);
     221             : }
     222             : 
     223           0 : SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
     224             : {
     225           0 :         return do_fsync(fd, 1);
     226             : }
     227             : 
     228           0 : int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
     229             :                     unsigned int flags)
     230             : {
     231             :         int ret;
     232             :         struct address_space *mapping;
     233             :         loff_t endbyte;                 /* inclusive */
     234             :         umode_t i_mode;
     235             : 
     236           0 :         ret = -EINVAL;
     237           0 :         if (flags & ~VALID_FLAGS)
     238             :                 goto out;
     239             : 
     240           0 :         endbyte = offset + nbytes;
     241             : 
     242           0 :         if ((s64)offset < 0)
     243             :                 goto out;
     244           0 :         if ((s64)endbyte < 0)
     245             :                 goto out;
     246           0 :         if (endbyte < offset)
     247             :                 goto out;
     248             : 
     249             :         if (sizeof(pgoff_t) == 4) {
     250             :                 if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
     251             :                         /*
     252             :                          * The range starts outside a 32 bit machine's
     253             :                          * pagecache addressing capabilities.  Let it "succeed"
     254             :                          */
     255             :                         ret = 0;
     256             :                         goto out;
     257             :                 }
     258             :                 if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
     259             :                         /*
     260             :                          * Out to EOF
     261             :                          */
     262             :                         nbytes = 0;
     263             :                 }
     264             :         }
     265             : 
     266           0 :         if (nbytes == 0)
     267             :                 endbyte = LLONG_MAX;
     268             :         else
     269           0 :                 endbyte--;              /* inclusive */
     270             : 
     271           0 :         i_mode = file_inode(file)->i_mode;
     272           0 :         ret = -ESPIPE;
     273           0 :         if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
     274             :                         !S_ISLNK(i_mode))
     275             :                 goto out;
     276             : 
     277           0 :         mapping = file->f_mapping;
     278           0 :         ret = 0;
     279           0 :         if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
     280           0 :                 ret = file_fdatawait_range(file, offset, endbyte);
     281           0 :                 if (ret < 0)
     282             :                         goto out;
     283             :         }
     284             : 
     285           0 :         if (flags & SYNC_FILE_RANGE_WRITE) {
     286           0 :                 int sync_mode = WB_SYNC_NONE;
     287             : 
     288           0 :                 if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) ==
     289             :                              SYNC_FILE_RANGE_WRITE_AND_WAIT)
     290           0 :                         sync_mode = WB_SYNC_ALL;
     291             : 
     292           0 :                 ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
     293             :                                                  sync_mode);
     294           0 :                 if (ret < 0)
     295             :                         goto out;
     296             :         }
     297             : 
     298           0 :         if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
     299           0 :                 ret = file_fdatawait_range(file, offset, endbyte);
     300             : 
     301             : out:
     302           0 :         return ret;
     303             : }
     304             : 
     305             : /*
     306             :  * ksys_sync_file_range() permits finely controlled syncing over a segment of
     307             :  * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
     308             :  * zero then ksys_sync_file_range() will operate from offset out to EOF.
     309             :  *
     310             :  * The flag bits are:
     311             :  *
     312             :  * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
     313             :  * before performing the write.
     314             :  *
     315             :  * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
     316             :  * range which are not presently under writeback. Note that this may block for
     317             :  * significant periods due to exhaustion of disk request structures.
     318             :  *
     319             :  * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
     320             :  * after performing the write.
     321             :  *
     322             :  * Useful combinations of the flag bits are:
     323             :  *
     324             :  * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
     325             :  * in the range which were dirty on entry to ksys_sync_file_range() are placed
     326             :  * under writeout.  This is a start-write-for-data-integrity operation.
     327             :  *
     328             :  * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
     329             :  * are not presently under writeout.  This is an asynchronous flush-to-disk
     330             :  * operation.  Not suitable for data integrity operations.
     331             :  *
     332             :  * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
     333             :  * completion of writeout of all pages in the range.  This will be used after an
     334             :  * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
     335             :  * for that operation to complete and to return the result.
     336             :  *
     337             :  * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER
     338             :  * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT):
     339             :  * a traditional sync() operation.  This is a write-for-data-integrity operation
     340             :  * which will ensure that all pages in the range which were dirty on entry to
     341             :  * ksys_sync_file_range() are written to disk.  It should be noted that disk
     342             :  * caches are not flushed by this call, so there are no guarantees here that the
     343             :  * data will be available on disk after a crash.
     344             :  *
     345             :  *
     346             :  * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
     347             :  * I/O errors or ENOSPC conditions and will return those to the caller, after
     348             :  * clearing the EIO and ENOSPC flags in the address_space.
     349             :  *
     350             :  * It should be noted that none of these operations write out the file's
     351             :  * metadata.  So unless the application is strictly performing overwrites of
     352             :  * already-instantiated disk blocks, there are no guarantees here that the data
     353             :  * will be available after a crash.
     354             :  */
     355           0 : int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
     356             :                          unsigned int flags)
     357             : {
     358             :         int ret;
     359             :         struct fd f;
     360             : 
     361           0 :         ret = -EBADF;
     362           0 :         f = fdget(fd);
     363           0 :         if (f.file)
     364           0 :                 ret = sync_file_range(f.file, offset, nbytes, flags);
     365             : 
     366           0 :         fdput(f);
     367           0 :         return ret;
     368             : }
     369             : 
     370           0 : SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
     371             :                                 unsigned int, flags)
     372             : {
     373           0 :         return ksys_sync_file_range(fd, offset, nbytes, flags);
     374             : }
     375             : 
     376             : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_SYNC_FILE_RANGE)
     377             : COMPAT_SYSCALL_DEFINE6(sync_file_range, int, fd, compat_arg_u64_dual(offset),
     378             :                        compat_arg_u64_dual(nbytes), unsigned int, flags)
     379             : {
     380             :         return ksys_sync_file_range(fd, compat_arg_u64_glue(offset),
     381             :                                     compat_arg_u64_glue(nbytes), flags);
     382             : }
     383             : #endif
     384             : 
     385             : /* It would be nice if people remember that not all the world's an i386
     386             :    when they introduce new system calls */
     387           0 : SYSCALL_DEFINE4(sync_file_range2, int, fd, unsigned int, flags,
     388             :                                  loff_t, offset, loff_t, nbytes)
     389             : {
     390           0 :         return ksys_sync_file_range(fd, offset, nbytes, flags);
     391             : }

Generated by: LCOV version 1.14