Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * mm/fadvise.c
4 : *
5 : * Copyright (C) 2002, Linus Torvalds
6 : *
7 : * 11Jan2003 Andrew Morton
8 : * Initial version.
9 : */
10 :
11 : #include <linux/kernel.h>
12 : #include <linux/file.h>
13 : #include <linux/fs.h>
14 : #include <linux/mm.h>
15 : #include <linux/pagemap.h>
16 : #include <linux/backing-dev.h>
17 : #include <linux/pagevec.h>
18 : #include <linux/fadvise.h>
19 : #include <linux/writeback.h>
20 : #include <linux/syscalls.h>
21 : #include <linux/swap.h>
22 :
23 : #include <asm/unistd.h>
24 :
25 : #include "internal.h"
26 :
27 : /*
28 : * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
29 : * deactivate the pages and clear PG_Referenced.
30 : */
31 :
32 0 : int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
33 : {
34 : struct inode *inode;
35 : struct address_space *mapping;
36 : struct backing_dev_info *bdi;
37 : loff_t endbyte; /* inclusive */
38 : pgoff_t start_index;
39 : pgoff_t end_index;
40 : unsigned long nrpages;
41 :
42 0 : inode = file_inode(file);
43 0 : if (S_ISFIFO(inode->i_mode))
44 : return -ESPIPE;
45 :
46 0 : mapping = file->f_mapping;
47 0 : if (!mapping || len < 0)
48 : return -EINVAL;
49 :
50 0 : bdi = inode_to_bdi(mapping->host);
51 :
52 0 : if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
53 0 : switch (advice) {
54 : case POSIX_FADV_NORMAL:
55 : case POSIX_FADV_RANDOM:
56 : case POSIX_FADV_SEQUENTIAL:
57 : case POSIX_FADV_WILLNEED:
58 : case POSIX_FADV_NOREUSE:
59 : case POSIX_FADV_DONTNEED:
60 : /* no bad return value, but ignore advice */
61 : break;
62 : default:
63 : return -EINVAL;
64 : }
65 0 : return 0;
66 : }
67 :
68 : /*
69 : * Careful about overflows. Len == 0 means "as much as possible". Use
70 : * unsigned math because signed overflows are undefined and UBSan
71 : * complains.
72 : */
73 0 : endbyte = (u64)offset + (u64)len;
74 0 : if (!len || endbyte < len)
75 : endbyte = LLONG_MAX;
76 : else
77 0 : endbyte--; /* inclusive */
78 :
79 0 : switch (advice) {
80 : case POSIX_FADV_NORMAL:
81 0 : file->f_ra.ra_pages = bdi->ra_pages;
82 0 : spin_lock(&file->f_lock);
83 0 : file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE);
84 0 : spin_unlock(&file->f_lock);
85 : break;
86 : case POSIX_FADV_RANDOM:
87 0 : spin_lock(&file->f_lock);
88 0 : file->f_mode |= FMODE_RANDOM;
89 0 : spin_unlock(&file->f_lock);
90 : break;
91 : case POSIX_FADV_SEQUENTIAL:
92 0 : file->f_ra.ra_pages = bdi->ra_pages * 2;
93 0 : spin_lock(&file->f_lock);
94 0 : file->f_mode &= ~FMODE_RANDOM;
95 0 : spin_unlock(&file->f_lock);
96 : break;
97 : case POSIX_FADV_WILLNEED:
98 : /* First and last PARTIAL page! */
99 0 : start_index = offset >> PAGE_SHIFT;
100 0 : end_index = endbyte >> PAGE_SHIFT;
101 :
102 : /* Careful about overflow on the "+1" */
103 0 : nrpages = end_index - start_index + 1;
104 0 : if (!nrpages)
105 0 : nrpages = ~0UL;
106 :
107 : force_page_cache_readahead(mapping, file, start_index, nrpages);
108 : break;
109 : case POSIX_FADV_NOREUSE:
110 0 : spin_lock(&file->f_lock);
111 0 : file->f_mode |= FMODE_NOREUSE;
112 0 : spin_unlock(&file->f_lock);
113 : break;
114 : case POSIX_FADV_DONTNEED:
115 0 : __filemap_fdatawrite_range(mapping, offset, endbyte,
116 : WB_SYNC_NONE);
117 :
118 : /*
119 : * First and last FULL page! Partial pages are deliberately
120 : * preserved on the expectation that it is better to preserve
121 : * needed memory than to discard unneeded memory.
122 : */
123 0 : start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
124 0 : end_index = (endbyte >> PAGE_SHIFT);
125 : /*
126 : * The page at end_index will be inclusively discarded according
127 : * by invalidate_mapping_pages(), so subtracting 1 from
128 : * end_index means we will skip the last page. But if endbyte
129 : * is page aligned or is at the end of file, we should not skip
130 : * that page - discarding the last page is safe enough.
131 : */
132 0 : if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
133 0 : endbyte != inode->i_size - 1) {
134 : /* First page is tricky as 0 - 1 = -1, but pgoff_t
135 : * is unsigned, so the end_index >= start_index
136 : * check below would be true and we'll discard the whole
137 : * file cache which is not what was asked.
138 : */
139 0 : if (end_index == 0)
140 : break;
141 :
142 0 : end_index--;
143 : }
144 :
145 0 : if (end_index >= start_index) {
146 0 : unsigned long nr_pagevec = 0;
147 :
148 : /*
149 : * It's common to FADV_DONTNEED right after
150 : * the read or write that instantiates the
151 : * pages, in which case there will be some
152 : * sitting on the local LRU cache. Try to
153 : * avoid the expensive remote drain and the
154 : * second cache tree walk below by flushing
155 : * them out right away.
156 : */
157 0 : lru_add_drain();
158 :
159 0 : invalidate_mapping_pagevec(mapping,
160 : start_index, end_index,
161 : &nr_pagevec);
162 :
163 : /*
164 : * If fewer pages were invalidated than expected then
165 : * it is possible that some of the pages were on
166 : * a per-cpu pagevec for a remote CPU. Drain all
167 : * pagevecs and try again.
168 : */
169 0 : if (nr_pagevec) {
170 0 : lru_add_drain_all();
171 0 : invalidate_mapping_pages(mapping, start_index,
172 : end_index);
173 : }
174 : }
175 : break;
176 : default:
177 : return -EINVAL;
178 : }
179 : return 0;
180 : }
181 : EXPORT_SYMBOL(generic_fadvise);
182 :
183 0 : int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
184 : {
185 0 : if (file->f_op->fadvise)
186 0 : return file->f_op->fadvise(file, offset, len, advice);
187 :
188 0 : return generic_fadvise(file, offset, len, advice);
189 : }
190 : EXPORT_SYMBOL(vfs_fadvise);
191 :
192 : #ifdef CONFIG_ADVISE_SYSCALLS
193 :
194 0 : int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
195 : {
196 0 : struct fd f = fdget(fd);
197 : int ret;
198 :
199 0 : if (!f.file)
200 : return -EBADF;
201 :
202 0 : ret = vfs_fadvise(f.file, offset, len, advice);
203 :
204 0 : fdput(f);
205 : return ret;
206 : }
207 :
208 0 : SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
209 : {
210 0 : return ksys_fadvise64_64(fd, offset, len, advice);
211 : }
212 :
213 : #ifdef __ARCH_WANT_SYS_FADVISE64
214 :
215 0 : SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
216 : {
217 0 : return ksys_fadvise64_64(fd, offset, len, advice);
218 : }
219 :
220 : #endif
221 :
222 : #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_FADVISE64_64)
223 :
224 : COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, compat_arg_u64_dual(offset),
225 : compat_arg_u64_dual(len), int, advice)
226 : {
227 : return ksys_fadvise64_64(fd, compat_arg_u64_glue(offset),
228 : compat_arg_u64_glue(len), advice);
229 : }
230 :
231 : #endif
232 : #endif
|