Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * fs/libfs.c
4 : * Library for filesystems writers.
5 : */
6 :
7 : #include <linux/blkdev.h>
8 : #include <linux/export.h>
9 : #include <linux/pagemap.h>
10 : #include <linux/slab.h>
11 : #include <linux/cred.h>
12 : #include <linux/mount.h>
13 : #include <linux/vfs.h>
14 : #include <linux/quotaops.h>
15 : #include <linux/mutex.h>
16 : #include <linux/namei.h>
17 : #include <linux/exportfs.h>
18 : #include <linux/iversion.h>
19 : #include <linux/writeback.h>
20 : #include <linux/buffer_head.h> /* sync_mapping_buffers */
21 : #include <linux/fs_context.h>
22 : #include <linux/pseudo_fs.h>
23 : #include <linux/fsnotify.h>
24 : #include <linux/unicode.h>
25 : #include <linux/fscrypt.h>
26 :
27 : #include <linux/uaccess.h>
28 :
29 : #include "internal.h"
30 :
31 0 : int simple_getattr(struct mnt_idmap *idmap, const struct path *path,
32 : struct kstat *stat, u32 request_mask,
33 : unsigned int query_flags)
34 : {
35 0 : struct inode *inode = d_inode(path->dentry);
36 0 : generic_fillattr(&nop_mnt_idmap, inode, stat);
37 0 : stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
38 0 : return 0;
39 : }
40 : EXPORT_SYMBOL(simple_getattr);
41 :
42 0 : int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
43 : {
44 0 : buf->f_type = dentry->d_sb->s_magic;
45 0 : buf->f_bsize = PAGE_SIZE;
46 0 : buf->f_namelen = NAME_MAX;
47 0 : return 0;
48 : }
49 : EXPORT_SYMBOL(simple_statfs);
50 :
51 : /*
52 : * Retaining negative dentries for an in-memory filesystem just wastes
53 : * memory and lookup time: arrange for them to be deleted immediately.
54 : */
55 0 : int always_delete_dentry(const struct dentry *dentry)
56 : {
57 0 : return 1;
58 : }
59 : EXPORT_SYMBOL(always_delete_dentry);
60 :
61 : const struct dentry_operations simple_dentry_operations = {
62 : .d_delete = always_delete_dentry,
63 : };
64 : EXPORT_SYMBOL(simple_dentry_operations);
65 :
66 : /*
67 : * Lookup the data. This is trivial - if the dentry didn't already
68 : * exist, we know it is negative. Set d_op to delete negative dentries.
69 : */
70 3 : struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
71 : {
72 3 : if (dentry->d_name.len > NAME_MAX)
73 : return ERR_PTR(-ENAMETOOLONG);
74 3 : if (!dentry->d_sb->s_d_op)
75 3 : d_set_d_op(dentry, &simple_dentry_operations);
76 3 : d_add(dentry, NULL);
77 3 : return NULL;
78 : }
79 : EXPORT_SYMBOL(simple_lookup);
80 :
81 0 : int dcache_dir_open(struct inode *inode, struct file *file)
82 : {
83 0 : file->private_data = d_alloc_cursor(file->f_path.dentry);
84 :
85 0 : return file->private_data ? 0 : -ENOMEM;
86 : }
87 : EXPORT_SYMBOL(dcache_dir_open);
88 :
89 0 : int dcache_dir_close(struct inode *inode, struct file *file)
90 : {
91 0 : dput(file->private_data);
92 0 : return 0;
93 : }
94 : EXPORT_SYMBOL(dcache_dir_close);
95 :
96 : /* parent is locked at least shared */
97 : /*
98 : * Returns an element of siblings' list.
99 : * We are looking for <count>th positive after <p>; if
100 : * found, dentry is grabbed and returned to caller.
101 : * If no such element exists, NULL is returned.
102 : */
103 0 : static struct dentry *scan_positives(struct dentry *cursor,
104 : struct list_head *p,
105 : loff_t count,
106 : struct dentry *last)
107 : {
108 0 : struct dentry *dentry = cursor->d_parent, *found = NULL;
109 :
110 0 : spin_lock(&dentry->d_lock);
111 0 : while ((p = p->next) != &dentry->d_subdirs) {
112 0 : struct dentry *d = list_entry(p, struct dentry, d_child);
113 : // we must at least skip cursors, to avoid livelocks
114 0 : if (d->d_flags & DCACHE_DENTRY_CURSOR)
115 0 : continue;
116 0 : if (simple_positive(d) && !--count) {
117 0 : spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
118 0 : if (simple_positive(d))
119 : found = dget_dlock(d);
120 0 : spin_unlock(&d->d_lock);
121 0 : if (likely(found))
122 : break;
123 : count = 1;
124 : }
125 0 : if (need_resched()) {
126 0 : list_move(&cursor->d_child, p);
127 0 : p = &cursor->d_child;
128 0 : spin_unlock(&dentry->d_lock);
129 0 : cond_resched();
130 0 : spin_lock(&dentry->d_lock);
131 : }
132 : }
133 0 : spin_unlock(&dentry->d_lock);
134 0 : dput(last);
135 0 : return found;
136 : }
137 :
138 0 : loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
139 : {
140 0 : struct dentry *dentry = file->f_path.dentry;
141 0 : switch (whence) {
142 : case 1:
143 0 : offset += file->f_pos;
144 : fallthrough;
145 : case 0:
146 0 : if (offset >= 0)
147 : break;
148 : fallthrough;
149 : default:
150 : return -EINVAL;
151 : }
152 0 : if (offset != file->f_pos) {
153 0 : struct dentry *cursor = file->private_data;
154 0 : struct dentry *to = NULL;
155 :
156 0 : inode_lock_shared(dentry->d_inode);
157 :
158 0 : if (offset > 2)
159 0 : to = scan_positives(cursor, &dentry->d_subdirs,
160 : offset - 2, NULL);
161 0 : spin_lock(&dentry->d_lock);
162 0 : if (to)
163 0 : list_move(&cursor->d_child, &to->d_child);
164 : else
165 0 : list_del_init(&cursor->d_child);
166 0 : spin_unlock(&dentry->d_lock);
167 0 : dput(to);
168 :
169 0 : file->f_pos = offset;
170 :
171 0 : inode_unlock_shared(dentry->d_inode);
172 : }
173 : return offset;
174 : }
175 : EXPORT_SYMBOL(dcache_dir_lseek);
176 :
177 : /* Relationship between i_mode and the DT_xxx types */
178 : static inline unsigned char dt_type(struct inode *inode)
179 : {
180 0 : return (inode->i_mode >> 12) & 15;
181 : }
182 :
183 : /*
184 : * Directory is locked and all positive dentries in it are safe, since
185 : * for ramfs-type trees they can't go away without unlink() or rmdir(),
186 : * both impossible due to the lock on directory.
187 : */
188 :
189 0 : int dcache_readdir(struct file *file, struct dir_context *ctx)
190 : {
191 0 : struct dentry *dentry = file->f_path.dentry;
192 0 : struct dentry *cursor = file->private_data;
193 0 : struct list_head *anchor = &dentry->d_subdirs;
194 0 : struct dentry *next = NULL;
195 : struct list_head *p;
196 :
197 0 : if (!dir_emit_dots(file, ctx))
198 : return 0;
199 :
200 0 : if (ctx->pos == 2)
201 : p = anchor;
202 0 : else if (!list_empty(&cursor->d_child))
203 : p = &cursor->d_child;
204 : else
205 : return 0;
206 :
207 0 : while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
208 0 : if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
209 0 : d_inode(next)->i_ino, dt_type(d_inode(next))))
210 : break;
211 0 : ctx->pos++;
212 0 : p = &next->d_child;
213 : }
214 0 : spin_lock(&dentry->d_lock);
215 0 : if (next)
216 0 : list_move_tail(&cursor->d_child, &next->d_child);
217 : else
218 0 : list_del_init(&cursor->d_child);
219 0 : spin_unlock(&dentry->d_lock);
220 0 : dput(next);
221 :
222 0 : return 0;
223 : }
224 : EXPORT_SYMBOL(dcache_readdir);
225 :
226 0 : ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
227 : {
228 0 : return -EISDIR;
229 : }
230 : EXPORT_SYMBOL(generic_read_dir);
231 :
232 : const struct file_operations simple_dir_operations = {
233 : .open = dcache_dir_open,
234 : .release = dcache_dir_close,
235 : .llseek = dcache_dir_lseek,
236 : .read = generic_read_dir,
237 : .iterate_shared = dcache_readdir,
238 : .fsync = noop_fsync,
239 : };
240 : EXPORT_SYMBOL(simple_dir_operations);
241 :
242 : const struct inode_operations simple_dir_inode_operations = {
243 : .lookup = simple_lookup,
244 : };
245 : EXPORT_SYMBOL(simple_dir_inode_operations);
246 :
247 0 : static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
248 : {
249 0 : struct dentry *child = NULL;
250 0 : struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs;
251 :
252 0 : spin_lock(&parent->d_lock);
253 0 : while ((p = p->next) != &parent->d_subdirs) {
254 0 : struct dentry *d = container_of(p, struct dentry, d_child);
255 0 : if (simple_positive(d)) {
256 0 : spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
257 0 : if (simple_positive(d))
258 : child = dget_dlock(d);
259 0 : spin_unlock(&d->d_lock);
260 0 : if (likely(child))
261 : break;
262 : }
263 : }
264 0 : spin_unlock(&parent->d_lock);
265 0 : dput(prev);
266 0 : return child;
267 : }
268 :
269 0 : void simple_recursive_removal(struct dentry *dentry,
270 : void (*callback)(struct dentry *))
271 : {
272 : struct dentry *this = dget(dentry);
273 0 : while (true) {
274 0 : struct dentry *victim = NULL, *child;
275 0 : struct inode *inode = this->d_inode;
276 :
277 0 : inode_lock(inode);
278 0 : if (d_is_dir(this))
279 0 : inode->i_flags |= S_DEAD;
280 0 : while ((child = find_next_child(this, victim)) == NULL) {
281 : // kill and ascend
282 : // update metadata while it's still locked
283 0 : inode->i_ctime = current_time(inode);
284 0 : clear_nlink(inode);
285 0 : inode_unlock(inode);
286 0 : victim = this;
287 0 : this = this->d_parent;
288 0 : inode = this->d_inode;
289 0 : inode_lock(inode);
290 0 : if (simple_positive(victim)) {
291 0 : d_invalidate(victim); // avoid lost mounts
292 0 : if (d_is_dir(victim))
293 0 : fsnotify_rmdir(inode, victim);
294 : else
295 0 : fsnotify_unlink(inode, victim);
296 0 : if (callback)
297 0 : callback(victim);
298 0 : dput(victim); // unpin it
299 : }
300 0 : if (victim == dentry) {
301 0 : inode->i_ctime = inode->i_mtime =
302 : current_time(inode);
303 0 : if (d_is_dir(dentry))
304 0 : drop_nlink(inode);
305 0 : inode_unlock(inode);
306 0 : dput(dentry);
307 0 : return;
308 : }
309 : }
310 0 : inode_unlock(inode);
311 0 : this = child;
312 : }
313 : }
314 : EXPORT_SYMBOL(simple_recursive_removal);
315 :
316 : static const struct super_operations simple_super_operations = {
317 : .statfs = simple_statfs,
318 : };
319 :
320 24 : static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
321 : {
322 24 : struct pseudo_fs_context *ctx = fc->fs_private;
323 : struct inode *root;
324 :
325 24 : s->s_maxbytes = MAX_LFS_FILESIZE;
326 24 : s->s_blocksize = PAGE_SIZE;
327 24 : s->s_blocksize_bits = PAGE_SHIFT;
328 24 : s->s_magic = ctx->magic;
329 24 : s->s_op = ctx->ops ?: &simple_super_operations;
330 24 : s->s_xattr = ctx->xattr;
331 24 : s->s_time_gran = 1;
332 24 : root = new_inode(s);
333 24 : if (!root)
334 : return -ENOMEM;
335 :
336 : /*
337 : * since this is the first inode, make it number 1. New inodes created
338 : * after this must take care not to collide with it (by passing
339 : * max_reserved of 1 to iunique).
340 : */
341 24 : root->i_ino = 1;
342 24 : root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
343 24 : root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
344 24 : s->s_root = d_make_root(root);
345 24 : if (!s->s_root)
346 : return -ENOMEM;
347 24 : s->s_d_op = ctx->dops;
348 24 : return 0;
349 : }
350 :
351 24 : static int pseudo_fs_get_tree(struct fs_context *fc)
352 : {
353 24 : return get_tree_nodev(fc, pseudo_fs_fill_super);
354 : }
355 :
356 24 : static void pseudo_fs_free(struct fs_context *fc)
357 : {
358 24 : kfree(fc->fs_private);
359 24 : }
360 :
361 : static const struct fs_context_operations pseudo_fs_context_ops = {
362 : .free = pseudo_fs_free,
363 : .get_tree = pseudo_fs_get_tree,
364 : };
365 :
366 : /*
367 : * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
368 : * will never be mountable)
369 : */
370 24 : struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
371 : unsigned long magic)
372 : {
373 : struct pseudo_fs_context *ctx;
374 :
375 24 : ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
376 24 : if (likely(ctx)) {
377 24 : ctx->magic = magic;
378 24 : fc->fs_private = ctx;
379 24 : fc->ops = &pseudo_fs_context_ops;
380 24 : fc->sb_flags |= SB_NOUSER;
381 24 : fc->global = true;
382 : }
383 24 : return ctx;
384 : }
385 : EXPORT_SYMBOL(init_pseudo);
386 :
387 0 : int simple_open(struct inode *inode, struct file *file)
388 : {
389 0 : if (inode->i_private)
390 0 : file->private_data = inode->i_private;
391 0 : return 0;
392 : }
393 : EXPORT_SYMBOL(simple_open);
394 :
395 0 : int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
396 : {
397 0 : struct inode *inode = d_inode(old_dentry);
398 :
399 0 : inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
400 0 : inc_nlink(inode);
401 0 : ihold(inode);
402 0 : dget(dentry);
403 0 : d_instantiate(dentry, inode);
404 0 : return 0;
405 : }
406 : EXPORT_SYMBOL(simple_link);
407 :
408 0 : int simple_empty(struct dentry *dentry)
409 : {
410 : struct dentry *child;
411 0 : int ret = 0;
412 :
413 0 : spin_lock(&dentry->d_lock);
414 0 : list_for_each_entry(child, &dentry->d_subdirs, d_child) {
415 0 : spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
416 0 : if (simple_positive(child)) {
417 0 : spin_unlock(&child->d_lock);
418 : goto out;
419 : }
420 0 : spin_unlock(&child->d_lock);
421 : }
422 : ret = 1;
423 : out:
424 0 : spin_unlock(&dentry->d_lock);
425 0 : return ret;
426 : }
427 : EXPORT_SYMBOL(simple_empty);
428 :
429 0 : int simple_unlink(struct inode *dir, struct dentry *dentry)
430 : {
431 0 : struct inode *inode = d_inode(dentry);
432 :
433 0 : inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
434 0 : drop_nlink(inode);
435 0 : dput(dentry);
436 0 : return 0;
437 : }
438 : EXPORT_SYMBOL(simple_unlink);
439 :
440 0 : int simple_rmdir(struct inode *dir, struct dentry *dentry)
441 : {
442 0 : if (!simple_empty(dentry))
443 : return -ENOTEMPTY;
444 :
445 0 : drop_nlink(d_inode(dentry));
446 0 : simple_unlink(dir, dentry);
447 0 : drop_nlink(dir);
448 0 : return 0;
449 : }
450 : EXPORT_SYMBOL(simple_rmdir);
451 :
452 0 : int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
453 : struct inode *new_dir, struct dentry *new_dentry)
454 : {
455 0 : bool old_is_dir = d_is_dir(old_dentry);
456 0 : bool new_is_dir = d_is_dir(new_dentry);
457 :
458 0 : if (old_dir != new_dir && old_is_dir != new_is_dir) {
459 0 : if (old_is_dir) {
460 0 : drop_nlink(old_dir);
461 0 : inc_nlink(new_dir);
462 : } else {
463 0 : drop_nlink(new_dir);
464 0 : inc_nlink(old_dir);
465 : }
466 : }
467 0 : old_dir->i_ctime = old_dir->i_mtime =
468 0 : new_dir->i_ctime = new_dir->i_mtime =
469 0 : d_inode(old_dentry)->i_ctime =
470 0 : d_inode(new_dentry)->i_ctime = current_time(old_dir);
471 :
472 0 : return 0;
473 : }
474 : EXPORT_SYMBOL_GPL(simple_rename_exchange);
475 :
476 0 : int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
477 : struct dentry *old_dentry, struct inode *new_dir,
478 : struct dentry *new_dentry, unsigned int flags)
479 : {
480 0 : struct inode *inode = d_inode(old_dentry);
481 0 : int they_are_dirs = d_is_dir(old_dentry);
482 :
483 0 : if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
484 : return -EINVAL;
485 :
486 0 : if (flags & RENAME_EXCHANGE)
487 0 : return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
488 :
489 0 : if (!simple_empty(new_dentry))
490 : return -ENOTEMPTY;
491 :
492 0 : if (d_really_is_positive(new_dentry)) {
493 0 : simple_unlink(new_dir, new_dentry);
494 0 : if (they_are_dirs) {
495 0 : drop_nlink(d_inode(new_dentry));
496 0 : drop_nlink(old_dir);
497 : }
498 0 : } else if (they_are_dirs) {
499 0 : drop_nlink(old_dir);
500 0 : inc_nlink(new_dir);
501 : }
502 :
503 0 : old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
504 0 : new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
505 :
506 0 : return 0;
507 : }
508 : EXPORT_SYMBOL(simple_rename);
509 :
510 : /**
511 : * simple_setattr - setattr for simple filesystem
512 : * @idmap: idmap of the target mount
513 : * @dentry: dentry
514 : * @iattr: iattr structure
515 : *
516 : * Returns 0 on success, -error on failure.
517 : *
518 : * simple_setattr is a simple ->setattr implementation without a proper
519 : * implementation of size changes.
520 : *
521 : * It can either be used for in-memory filesystems or special files
522 : * on simple regular filesystems. Anything that needs to change on-disk
523 : * or wire state on size changes needs its own setattr method.
524 : */
525 0 : int simple_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
526 : struct iattr *iattr)
527 : {
528 0 : struct inode *inode = d_inode(dentry);
529 : int error;
530 :
531 0 : error = setattr_prepare(idmap, dentry, iattr);
532 0 : if (error)
533 : return error;
534 :
535 0 : if (iattr->ia_valid & ATTR_SIZE)
536 0 : truncate_setsize(inode, iattr->ia_size);
537 0 : setattr_copy(idmap, inode, iattr);
538 0 : mark_inode_dirty(inode);
539 0 : return 0;
540 : }
541 : EXPORT_SYMBOL(simple_setattr);
542 :
543 0 : static int simple_read_folio(struct file *file, struct folio *folio)
544 : {
545 0 : folio_zero_range(folio, 0, folio_size(folio));
546 0 : flush_dcache_folio(folio);
547 0 : folio_mark_uptodate(folio);
548 0 : folio_unlock(folio);
549 0 : return 0;
550 : }
551 :
552 0 : int simple_write_begin(struct file *file, struct address_space *mapping,
553 : loff_t pos, unsigned len,
554 : struct page **pagep, void **fsdata)
555 : {
556 : struct page *page;
557 : pgoff_t index;
558 :
559 0 : index = pos >> PAGE_SHIFT;
560 :
561 0 : page = grab_cache_page_write_begin(mapping, index);
562 0 : if (!page)
563 : return -ENOMEM;
564 :
565 0 : *pagep = page;
566 :
567 0 : if (!PageUptodate(page) && (len != PAGE_SIZE)) {
568 0 : unsigned from = pos & (PAGE_SIZE - 1);
569 :
570 0 : zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
571 : }
572 : return 0;
573 : }
574 : EXPORT_SYMBOL(simple_write_begin);
575 :
576 : /**
577 : * simple_write_end - .write_end helper for non-block-device FSes
578 : * @file: See .write_end of address_space_operations
579 : * @mapping: "
580 : * @pos: "
581 : * @len: "
582 : * @copied: "
583 : * @page: "
584 : * @fsdata: "
585 : *
586 : * simple_write_end does the minimum needed for updating a page after writing is
587 : * done. It has the same API signature as the .write_end of
588 : * address_space_operations vector. So it can just be set onto .write_end for
589 : * FSes that don't need any other processing. i_mutex is assumed to be held.
590 : * Block based filesystems should use generic_write_end().
591 : * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
592 : * is not called, so a filesystem that actually does store data in .write_inode
593 : * should extend on what's done here with a call to mark_inode_dirty() in the
594 : * case that i_size has changed.
595 : *
596 : * Use *ONLY* with simple_read_folio()
597 : */
598 0 : static int simple_write_end(struct file *file, struct address_space *mapping,
599 : loff_t pos, unsigned len, unsigned copied,
600 : struct page *page, void *fsdata)
601 : {
602 0 : struct inode *inode = page->mapping->host;
603 0 : loff_t last_pos = pos + copied;
604 :
605 : /* zero the stale part of the page if we did a short copy */
606 0 : if (!PageUptodate(page)) {
607 0 : if (copied < len) {
608 0 : unsigned from = pos & (PAGE_SIZE - 1);
609 :
610 0 : zero_user(page, from + copied, len - copied);
611 : }
612 : SetPageUptodate(page);
613 : }
614 : /*
615 : * No need to use i_size_read() here, the i_size
616 : * cannot change under us because we hold the i_mutex.
617 : */
618 0 : if (last_pos > inode->i_size)
619 0 : i_size_write(inode, last_pos);
620 :
621 0 : set_page_dirty(page);
622 0 : unlock_page(page);
623 0 : put_page(page);
624 :
625 0 : return copied;
626 : }
627 :
628 : /*
629 : * Provides ramfs-style behavior: data in the pagecache, but no writeback.
630 : */
631 : const struct address_space_operations ram_aops = {
632 : .read_folio = simple_read_folio,
633 : .write_begin = simple_write_begin,
634 : .write_end = simple_write_end,
635 : .dirty_folio = noop_dirty_folio,
636 : };
637 : EXPORT_SYMBOL(ram_aops);
638 :
639 : /*
640 : * the inodes created here are not hashed. If you use iunique to generate
641 : * unique inode values later for this filesystem, then you must take care
642 : * to pass it an appropriate max_reserved value to avoid collisions.
643 : */
644 0 : int simple_fill_super(struct super_block *s, unsigned long magic,
645 : const struct tree_descr *files)
646 : {
647 : struct inode *inode;
648 : struct dentry *root;
649 : struct dentry *dentry;
650 : int i;
651 :
652 0 : s->s_blocksize = PAGE_SIZE;
653 0 : s->s_blocksize_bits = PAGE_SHIFT;
654 0 : s->s_magic = magic;
655 0 : s->s_op = &simple_super_operations;
656 0 : s->s_time_gran = 1;
657 :
658 0 : inode = new_inode(s);
659 0 : if (!inode)
660 : return -ENOMEM;
661 : /*
662 : * because the root inode is 1, the files array must not contain an
663 : * entry at index 1
664 : */
665 0 : inode->i_ino = 1;
666 0 : inode->i_mode = S_IFDIR | 0755;
667 0 : inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
668 0 : inode->i_op = &simple_dir_inode_operations;
669 0 : inode->i_fop = &simple_dir_operations;
670 0 : set_nlink(inode, 2);
671 0 : root = d_make_root(inode);
672 0 : if (!root)
673 : return -ENOMEM;
674 0 : for (i = 0; !files->name || files->name[0]; i++, files++) {
675 0 : if (!files->name)
676 0 : continue;
677 :
678 : /* warn if it tries to conflict with the root inode */
679 0 : if (unlikely(i == 1))
680 0 : printk(KERN_WARNING "%s: %s passed in a files array"
681 : "with an index of 1!\n", __func__,
682 : s->s_type->name);
683 :
684 0 : dentry = d_alloc_name(root, files->name);
685 0 : if (!dentry)
686 : goto out;
687 0 : inode = new_inode(s);
688 0 : if (!inode) {
689 0 : dput(dentry);
690 0 : goto out;
691 : }
692 0 : inode->i_mode = S_IFREG | files->mode;
693 0 : inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
694 0 : inode->i_fop = files->ops;
695 0 : inode->i_ino = i;
696 0 : d_add(dentry, inode);
697 : }
698 0 : s->s_root = root;
699 0 : return 0;
700 : out:
701 0 : d_genocide(root);
702 0 : shrink_dcache_parent(root);
703 0 : dput(root);
704 0 : return -ENOMEM;
705 : }
706 : EXPORT_SYMBOL(simple_fill_super);
707 :
708 : static DEFINE_SPINLOCK(pin_fs_lock);
709 :
710 18 : int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
711 : {
712 18 : struct vfsmount *mnt = NULL;
713 18 : spin_lock(&pin_fs_lock);
714 18 : if (unlikely(!*mount)) {
715 18 : spin_unlock(&pin_fs_lock);
716 18 : mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
717 18 : if (IS_ERR(mnt))
718 0 : return PTR_ERR(mnt);
719 18 : spin_lock(&pin_fs_lock);
720 18 : if (!*mount)
721 18 : *mount = mnt;
722 : }
723 18 : mntget(*mount);
724 18 : ++*count;
725 18 : spin_unlock(&pin_fs_lock);
726 18 : mntput(mnt);
727 18 : return 0;
728 : }
729 : EXPORT_SYMBOL(simple_pin_fs);
730 :
731 17 : void simple_release_fs(struct vfsmount **mount, int *count)
732 : {
733 : struct vfsmount *mnt;
734 17 : spin_lock(&pin_fs_lock);
735 17 : mnt = *mount;
736 17 : if (!--*count)
737 17 : *mount = NULL;
738 17 : spin_unlock(&pin_fs_lock);
739 17 : mntput(mnt);
740 17 : }
741 : EXPORT_SYMBOL(simple_release_fs);
742 :
743 : /**
744 : * simple_read_from_buffer - copy data from the buffer to user space
745 : * @to: the user space buffer to read to
746 : * @count: the maximum number of bytes to read
747 : * @ppos: the current position in the buffer
748 : * @from: the buffer to read from
749 : * @available: the size of the buffer
750 : *
751 : * The simple_read_from_buffer() function reads up to @count bytes from the
752 : * buffer @from at offset @ppos into the user space address starting at @to.
753 : *
754 : * On success, the number of bytes read is returned and the offset @ppos is
755 : * advanced by this number, or negative value is returned on error.
756 : **/
757 0 : ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
758 : const void *from, size_t available)
759 : {
760 0 : loff_t pos = *ppos;
761 : size_t ret;
762 :
763 0 : if (pos < 0)
764 : return -EINVAL;
765 0 : if (pos >= available || !count)
766 : return 0;
767 0 : if (count > available - pos)
768 0 : count = available - pos;
769 0 : ret = copy_to_user(to, from + pos, count);
770 0 : if (ret == count)
771 : return -EFAULT;
772 0 : count -= ret;
773 0 : *ppos = pos + count;
774 0 : return count;
775 : }
776 : EXPORT_SYMBOL(simple_read_from_buffer);
777 :
778 : /**
779 : * simple_write_to_buffer - copy data from user space to the buffer
780 : * @to: the buffer to write to
781 : * @available: the size of the buffer
782 : * @ppos: the current position in the buffer
783 : * @from: the user space buffer to read from
784 : * @count: the maximum number of bytes to read
785 : *
786 : * The simple_write_to_buffer() function reads up to @count bytes from the user
787 : * space address starting at @from into the buffer @to at offset @ppos.
788 : *
789 : * On success, the number of bytes written is returned and the offset @ppos is
790 : * advanced by this number, or negative value is returned on error.
791 : **/
792 0 : ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
793 : const void __user *from, size_t count)
794 : {
795 0 : loff_t pos = *ppos;
796 : size_t res;
797 :
798 0 : if (pos < 0)
799 : return -EINVAL;
800 0 : if (pos >= available || !count)
801 : return 0;
802 0 : if (count > available - pos)
803 0 : count = available - pos;
804 0 : res = copy_from_user(to + pos, from, count);
805 0 : if (res == count)
806 : return -EFAULT;
807 0 : count -= res;
808 0 : *ppos = pos + count;
809 0 : return count;
810 : }
811 : EXPORT_SYMBOL(simple_write_to_buffer);
812 :
813 : /**
814 : * memory_read_from_buffer - copy data from the buffer
815 : * @to: the kernel space buffer to read to
816 : * @count: the maximum number of bytes to read
817 : * @ppos: the current position in the buffer
818 : * @from: the buffer to read from
819 : * @available: the size of the buffer
820 : *
821 : * The memory_read_from_buffer() function reads up to @count bytes from the
822 : * buffer @from at offset @ppos into the kernel space address starting at @to.
823 : *
824 : * On success, the number of bytes read is returned and the offset @ppos is
825 : * advanced by this number, or negative value is returned on error.
826 : **/
827 0 : ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
828 : const void *from, size_t available)
829 : {
830 0 : loff_t pos = *ppos;
831 :
832 0 : if (pos < 0)
833 : return -EINVAL;
834 0 : if (pos >= available)
835 : return 0;
836 0 : if (count > available - pos)
837 0 : count = available - pos;
838 0 : memcpy(to, from + pos, count);
839 0 : *ppos = pos + count;
840 :
841 0 : return count;
842 : }
843 : EXPORT_SYMBOL(memory_read_from_buffer);
844 :
845 : /*
846 : * Transaction based IO.
847 : * The file expects a single write which triggers the transaction, and then
848 : * possibly a read which collects the result - which is stored in a
849 : * file-local buffer.
850 : */
851 :
852 0 : void simple_transaction_set(struct file *file, size_t n)
853 : {
854 0 : struct simple_transaction_argresp *ar = file->private_data;
855 :
856 0 : BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
857 :
858 : /*
859 : * The barrier ensures that ar->size will really remain zero until
860 : * ar->data is ready for reading.
861 : */
862 0 : smp_mb();
863 0 : ar->size = n;
864 0 : }
865 : EXPORT_SYMBOL(simple_transaction_set);
866 :
867 0 : char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
868 : {
869 : struct simple_transaction_argresp *ar;
870 : static DEFINE_SPINLOCK(simple_transaction_lock);
871 :
872 0 : if (size > SIMPLE_TRANSACTION_LIMIT - 1)
873 : return ERR_PTR(-EFBIG);
874 :
875 0 : ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
876 0 : if (!ar)
877 : return ERR_PTR(-ENOMEM);
878 :
879 0 : spin_lock(&simple_transaction_lock);
880 :
881 : /* only one write allowed per open */
882 0 : if (file->private_data) {
883 0 : spin_unlock(&simple_transaction_lock);
884 0 : free_page((unsigned long)ar);
885 0 : return ERR_PTR(-EBUSY);
886 : }
887 :
888 0 : file->private_data = ar;
889 :
890 0 : spin_unlock(&simple_transaction_lock);
891 :
892 0 : if (copy_from_user(ar->data, buf, size))
893 : return ERR_PTR(-EFAULT);
894 :
895 0 : return ar->data;
896 : }
897 : EXPORT_SYMBOL(simple_transaction_get);
898 :
899 0 : ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
900 : {
901 0 : struct simple_transaction_argresp *ar = file->private_data;
902 :
903 0 : if (!ar)
904 : return 0;
905 0 : return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
906 : }
907 : EXPORT_SYMBOL(simple_transaction_read);
908 :
909 0 : int simple_transaction_release(struct inode *inode, struct file *file)
910 : {
911 0 : free_page((unsigned long)file->private_data);
912 0 : return 0;
913 : }
914 : EXPORT_SYMBOL(simple_transaction_release);
915 :
916 : /* Simple attribute files */
917 :
918 : struct simple_attr {
919 : int (*get)(void *, u64 *);
920 : int (*set)(void *, u64);
921 : char get_buf[24]; /* enough to store a u64 and "\n\0" */
922 : char set_buf[24];
923 : void *data;
924 : const char *fmt; /* format for read operation */
925 : struct mutex mutex; /* protects access to these buffers */
926 : };
927 :
928 : /* simple_attr_open is called by an actual attribute open file operation
929 : * to set the attribute specific access operations. */
930 0 : int simple_attr_open(struct inode *inode, struct file *file,
931 : int (*get)(void *, u64 *), int (*set)(void *, u64),
932 : const char *fmt)
933 : {
934 : struct simple_attr *attr;
935 :
936 0 : attr = kzalloc(sizeof(*attr), GFP_KERNEL);
937 0 : if (!attr)
938 : return -ENOMEM;
939 :
940 0 : attr->get = get;
941 0 : attr->set = set;
942 0 : attr->data = inode->i_private;
943 0 : attr->fmt = fmt;
944 0 : mutex_init(&attr->mutex);
945 :
946 0 : file->private_data = attr;
947 :
948 0 : return nonseekable_open(inode, file);
949 : }
950 : EXPORT_SYMBOL_GPL(simple_attr_open);
951 :
952 0 : int simple_attr_release(struct inode *inode, struct file *file)
953 : {
954 0 : kfree(file->private_data);
955 0 : return 0;
956 : }
957 : EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */
958 :
959 : /* read from the buffer that is filled with the get function */
960 0 : ssize_t simple_attr_read(struct file *file, char __user *buf,
961 : size_t len, loff_t *ppos)
962 : {
963 : struct simple_attr *attr;
964 : size_t size;
965 : ssize_t ret;
966 :
967 0 : attr = file->private_data;
968 :
969 0 : if (!attr->get)
970 : return -EACCES;
971 :
972 0 : ret = mutex_lock_interruptible(&attr->mutex);
973 0 : if (ret)
974 : return ret;
975 :
976 0 : if (*ppos && attr->get_buf[0]) {
977 : /* continued read */
978 0 : size = strlen(attr->get_buf);
979 : } else {
980 : /* first read */
981 : u64 val;
982 0 : ret = attr->get(attr->data, &val);
983 0 : if (ret)
984 : goto out;
985 :
986 0 : size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
987 : attr->fmt, (unsigned long long)val);
988 : }
989 :
990 0 : ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
991 : out:
992 0 : mutex_unlock(&attr->mutex);
993 0 : return ret;
994 : }
995 : EXPORT_SYMBOL_GPL(simple_attr_read);
996 :
997 : /* interpret the buffer as a number to call the set function with */
998 0 : static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
999 : size_t len, loff_t *ppos, bool is_signed)
1000 : {
1001 : struct simple_attr *attr;
1002 : unsigned long long val;
1003 : size_t size;
1004 : ssize_t ret;
1005 :
1006 0 : attr = file->private_data;
1007 0 : if (!attr->set)
1008 : return -EACCES;
1009 :
1010 0 : ret = mutex_lock_interruptible(&attr->mutex);
1011 0 : if (ret)
1012 : return ret;
1013 :
1014 0 : ret = -EFAULT;
1015 0 : size = min(sizeof(attr->set_buf) - 1, len);
1016 0 : if (copy_from_user(attr->set_buf, buf, size))
1017 : goto out;
1018 :
1019 0 : attr->set_buf[size] = '\0';
1020 0 : if (is_signed)
1021 0 : ret = kstrtoll(attr->set_buf, 0, &val);
1022 : else
1023 0 : ret = kstrtoull(attr->set_buf, 0, &val);
1024 0 : if (ret)
1025 : goto out;
1026 0 : ret = attr->set(attr->data, val);
1027 0 : if (ret == 0)
1028 0 : ret = len; /* on success, claim we got the whole input */
1029 : out:
1030 0 : mutex_unlock(&attr->mutex);
1031 0 : return ret;
1032 : }
1033 :
1034 0 : ssize_t simple_attr_write(struct file *file, const char __user *buf,
1035 : size_t len, loff_t *ppos)
1036 : {
1037 0 : return simple_attr_write_xsigned(file, buf, len, ppos, false);
1038 : }
1039 : EXPORT_SYMBOL_GPL(simple_attr_write);
1040 :
1041 0 : ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
1042 : size_t len, loff_t *ppos)
1043 : {
1044 0 : return simple_attr_write_xsigned(file, buf, len, ppos, true);
1045 : }
1046 : EXPORT_SYMBOL_GPL(simple_attr_write_signed);
1047 :
1048 : /**
1049 : * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
1050 : * @sb: filesystem to do the file handle conversion on
1051 : * @fid: file handle to convert
1052 : * @fh_len: length of the file handle in bytes
1053 : * @fh_type: type of file handle
1054 : * @get_inode: filesystem callback to retrieve inode
1055 : *
1056 : * This function decodes @fid as long as it has one of the well-known
1057 : * Linux filehandle types and calls @get_inode on it to retrieve the
1058 : * inode for the object specified in the file handle.
1059 : */
1060 0 : struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
1061 : int fh_len, int fh_type, struct inode *(*get_inode)
1062 : (struct super_block *sb, u64 ino, u32 gen))
1063 : {
1064 0 : struct inode *inode = NULL;
1065 :
1066 0 : if (fh_len < 2)
1067 : return NULL;
1068 :
1069 0 : switch (fh_type) {
1070 : case FILEID_INO32_GEN:
1071 : case FILEID_INO32_GEN_PARENT:
1072 0 : inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
1073 0 : break;
1074 : }
1075 :
1076 0 : return d_obtain_alias(inode);
1077 : }
1078 : EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
1079 :
1080 : /**
1081 : * generic_fh_to_parent - generic helper for the fh_to_parent export operation
1082 : * @sb: filesystem to do the file handle conversion on
1083 : * @fid: file handle to convert
1084 : * @fh_len: length of the file handle in bytes
1085 : * @fh_type: type of file handle
1086 : * @get_inode: filesystem callback to retrieve inode
1087 : *
1088 : * This function decodes @fid as long as it has one of the well-known
1089 : * Linux filehandle types and calls @get_inode on it to retrieve the
1090 : * inode for the _parent_ object specified in the file handle if it
1091 : * is specified in the file handle, or NULL otherwise.
1092 : */
1093 0 : struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
1094 : int fh_len, int fh_type, struct inode *(*get_inode)
1095 : (struct super_block *sb, u64 ino, u32 gen))
1096 : {
1097 0 : struct inode *inode = NULL;
1098 :
1099 0 : if (fh_len <= 2)
1100 : return NULL;
1101 :
1102 0 : switch (fh_type) {
1103 : case FILEID_INO32_GEN_PARENT:
1104 0 : inode = get_inode(sb, fid->i32.parent_ino,
1105 : (fh_len > 3 ? fid->i32.parent_gen : 0));
1106 0 : break;
1107 : }
1108 :
1109 0 : return d_obtain_alias(inode);
1110 : }
1111 : EXPORT_SYMBOL_GPL(generic_fh_to_parent);
1112 :
1113 : /**
1114 : * __generic_file_fsync - generic fsync implementation for simple filesystems
1115 : *
1116 : * @file: file to synchronize
1117 : * @start: start offset in bytes
1118 : * @end: end offset in bytes (inclusive)
1119 : * @datasync: only synchronize essential metadata if true
1120 : *
1121 : * This is a generic implementation of the fsync method for simple
1122 : * filesystems which track all non-inode metadata in the buffers list
1123 : * hanging off the address_space structure.
1124 : */
1125 0 : int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
1126 : int datasync)
1127 : {
1128 0 : struct inode *inode = file->f_mapping->host;
1129 : int err;
1130 : int ret;
1131 :
1132 0 : err = file_write_and_wait_range(file, start, end);
1133 0 : if (err)
1134 : return err;
1135 :
1136 0 : inode_lock(inode);
1137 0 : ret = sync_mapping_buffers(inode->i_mapping);
1138 0 : if (!(inode->i_state & I_DIRTY_ALL))
1139 : goto out;
1140 0 : if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
1141 : goto out;
1142 :
1143 0 : err = sync_inode_metadata(inode, 1);
1144 0 : if (ret == 0)
1145 0 : ret = err;
1146 :
1147 : out:
1148 0 : inode_unlock(inode);
1149 : /* check and advance again to catch errors after syncing out buffers */
1150 0 : err = file_check_and_advance_wb_err(file);
1151 0 : if (ret == 0)
1152 0 : ret = err;
1153 : return ret;
1154 : }
1155 : EXPORT_SYMBOL(__generic_file_fsync);
1156 :
1157 : /**
1158 : * generic_file_fsync - generic fsync implementation for simple filesystems
1159 : * with flush
1160 : * @file: file to synchronize
1161 : * @start: start offset in bytes
1162 : * @end: end offset in bytes (inclusive)
1163 : * @datasync: only synchronize essential metadata if true
1164 : *
1165 : */
1166 :
1167 0 : int generic_file_fsync(struct file *file, loff_t start, loff_t end,
1168 : int datasync)
1169 : {
1170 0 : struct inode *inode = file->f_mapping->host;
1171 : int err;
1172 :
1173 0 : err = __generic_file_fsync(file, start, end, datasync);
1174 0 : if (err)
1175 : return err;
1176 0 : return blkdev_issue_flush(inode->i_sb->s_bdev);
1177 : }
1178 : EXPORT_SYMBOL(generic_file_fsync);
1179 :
1180 : /**
1181 : * generic_check_addressable - Check addressability of file system
1182 : * @blocksize_bits: log of file system block size
1183 : * @num_blocks: number of blocks in file system
1184 : *
1185 : * Determine whether a file system with @num_blocks blocks (and a
1186 : * block size of 2**@blocksize_bits) is addressable by the sector_t
1187 : * and page cache of the system. Return 0 if so and -EFBIG otherwise.
1188 : */
1189 0 : int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
1190 : {
1191 0 : u64 last_fs_block = num_blocks - 1;
1192 0 : u64 last_fs_page =
1193 0 : last_fs_block >> (PAGE_SHIFT - blocksize_bits);
1194 :
1195 0 : if (unlikely(num_blocks == 0))
1196 : return 0;
1197 :
1198 0 : if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
1199 : return -EINVAL;
1200 :
1201 0 : if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
1202 : (last_fs_page > (pgoff_t)(~0ULL))) {
1203 : return -EFBIG;
1204 : }
1205 0 : return 0;
1206 : }
1207 : EXPORT_SYMBOL(generic_check_addressable);
1208 :
1209 : /*
1210 : * No-op implementation of ->fsync for in-memory filesystems.
1211 : */
1212 0 : int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1213 : {
1214 0 : return 0;
1215 : }
1216 : EXPORT_SYMBOL(noop_fsync);
1217 :
1218 0 : ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
1219 : {
1220 : /*
1221 : * iomap based filesystems support direct I/O without need for
1222 : * this callback. However, it still needs to be set in
1223 : * inode->a_ops so that open/fcntl know that direct I/O is
1224 : * generally supported.
1225 : */
1226 0 : return -EINVAL;
1227 : }
1228 : EXPORT_SYMBOL_GPL(noop_direct_IO);
1229 :
1230 : /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
1231 0 : void kfree_link(void *p)
1232 : {
1233 0 : kfree(p);
1234 0 : }
1235 : EXPORT_SYMBOL(kfree_link);
1236 :
1237 19 : struct inode *alloc_anon_inode(struct super_block *s)
1238 : {
1239 : static const struct address_space_operations anon_aops = {
1240 : .dirty_folio = noop_dirty_folio,
1241 : };
1242 19 : struct inode *inode = new_inode_pseudo(s);
1243 :
1244 19 : if (!inode)
1245 : return ERR_PTR(-ENOMEM);
1246 :
1247 19 : inode->i_ino = get_next_ino();
1248 19 : inode->i_mapping->a_ops = &anon_aops;
1249 :
1250 : /*
1251 : * Mark the inode dirty from the very beginning,
1252 : * that way it will never be moved to the dirty
1253 : * list because mark_inode_dirty() will think
1254 : * that it already _is_ on the dirty list.
1255 : */
1256 19 : inode->i_state = I_DIRTY;
1257 19 : inode->i_mode = S_IRUSR | S_IWUSR;
1258 19 : inode->i_uid = current_fsuid();
1259 19 : inode->i_gid = current_fsgid();
1260 19 : inode->i_flags |= S_PRIVATE;
1261 19 : inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
1262 19 : return inode;
1263 : }
1264 : EXPORT_SYMBOL(alloc_anon_inode);
1265 :
1266 : /**
1267 : * simple_nosetlease - generic helper for prohibiting leases
1268 : * @filp: file pointer
1269 : * @arg: type of lease to obtain
1270 : * @flp: new lease supplied for insertion
1271 : * @priv: private data for lm_setup operation
1272 : *
1273 : * Generic helper for filesystems that do not wish to allow leases to be set.
1274 : * All arguments are ignored and it just returns -EINVAL.
1275 : */
1276 : int
1277 0 : simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
1278 : void **priv)
1279 : {
1280 0 : return -EINVAL;
1281 : }
1282 : EXPORT_SYMBOL(simple_nosetlease);
1283 :
1284 : /**
1285 : * simple_get_link - generic helper to get the target of "fast" symlinks
1286 : * @dentry: not used here
1287 : * @inode: the symlink inode
1288 : * @done: not used here
1289 : *
1290 : * Generic helper for filesystems to use for symlink inodes where a pointer to
1291 : * the symlink target is stored in ->i_link. NOTE: this isn't normally called,
1292 : * since as an optimization the path lookup code uses any non-NULL ->i_link
1293 : * directly, without calling ->get_link(). But ->get_link() still must be set,
1294 : * to mark the inode_operations as being for a symlink.
1295 : *
1296 : * Return: the symlink target
1297 : */
1298 0 : const char *simple_get_link(struct dentry *dentry, struct inode *inode,
1299 : struct delayed_call *done)
1300 : {
1301 0 : return inode->i_link;
1302 : }
1303 : EXPORT_SYMBOL(simple_get_link);
1304 :
1305 : const struct inode_operations simple_symlink_inode_operations = {
1306 : .get_link = simple_get_link,
1307 : };
1308 : EXPORT_SYMBOL(simple_symlink_inode_operations);
1309 :
1310 : /*
1311 : * Operations for a permanently empty directory.
1312 : */
1313 0 : static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1314 : {
1315 0 : return ERR_PTR(-ENOENT);
1316 : }
1317 :
1318 0 : static int empty_dir_getattr(struct mnt_idmap *idmap,
1319 : const struct path *path, struct kstat *stat,
1320 : u32 request_mask, unsigned int query_flags)
1321 : {
1322 0 : struct inode *inode = d_inode(path->dentry);
1323 0 : generic_fillattr(&nop_mnt_idmap, inode, stat);
1324 0 : return 0;
1325 : }
1326 :
1327 0 : static int empty_dir_setattr(struct mnt_idmap *idmap,
1328 : struct dentry *dentry, struct iattr *attr)
1329 : {
1330 0 : return -EPERM;
1331 : }
1332 :
1333 0 : static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
1334 : {
1335 0 : return -EOPNOTSUPP;
1336 : }
1337 :
1338 : static const struct inode_operations empty_dir_inode_operations = {
1339 : .lookup = empty_dir_lookup,
1340 : .permission = generic_permission,
1341 : .setattr = empty_dir_setattr,
1342 : .getattr = empty_dir_getattr,
1343 : .listxattr = empty_dir_listxattr,
1344 : };
1345 :
1346 0 : static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
1347 : {
1348 : /* An empty directory has two entries . and .. at offsets 0 and 1 */
1349 0 : return generic_file_llseek_size(file, offset, whence, 2, 2);
1350 : }
1351 :
1352 0 : static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
1353 : {
1354 0 : dir_emit_dots(file, ctx);
1355 0 : return 0;
1356 : }
1357 :
1358 : static const struct file_operations empty_dir_operations = {
1359 : .llseek = empty_dir_llseek,
1360 : .read = generic_read_dir,
1361 : .iterate_shared = empty_dir_readdir,
1362 : .fsync = noop_fsync,
1363 : };
1364 :
1365 :
1366 0 : void make_empty_dir_inode(struct inode *inode)
1367 : {
1368 0 : set_nlink(inode, 2);
1369 0 : inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
1370 0 : inode->i_uid = GLOBAL_ROOT_UID;
1371 0 : inode->i_gid = GLOBAL_ROOT_GID;
1372 0 : inode->i_rdev = 0;
1373 0 : inode->i_size = 0;
1374 0 : inode->i_blkbits = PAGE_SHIFT;
1375 0 : inode->i_blocks = 0;
1376 :
1377 0 : inode->i_op = &empty_dir_inode_operations;
1378 0 : inode->i_opflags &= ~IOP_XATTR;
1379 0 : inode->i_fop = &empty_dir_operations;
1380 0 : }
1381 :
1382 0 : bool is_empty_dir_inode(struct inode *inode)
1383 : {
1384 0 : return (inode->i_fop == &empty_dir_operations) &&
1385 0 : (inode->i_op == &empty_dir_inode_operations);
1386 : }
1387 :
1388 : #if IS_ENABLED(CONFIG_UNICODE)
1389 : /*
1390 : * Determine if the name of a dentry should be casefolded.
1391 : *
1392 : * Return: if names will need casefolding
1393 : */
1394 : static bool needs_casefold(const struct inode *dir)
1395 : {
1396 : return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
1397 : }
1398 :
1399 : /**
1400 : * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
1401 : * @dentry: dentry whose name we are checking against
1402 : * @len: len of name of dentry
1403 : * @str: str pointer to name of dentry
1404 : * @name: Name to compare against
1405 : *
1406 : * Return: 0 if names match, 1 if mismatch, or -ERRNO
1407 : */
1408 : static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
1409 : const char *str, const struct qstr *name)
1410 : {
1411 : const struct dentry *parent = READ_ONCE(dentry->d_parent);
1412 : const struct inode *dir = READ_ONCE(parent->d_inode);
1413 : const struct super_block *sb = dentry->d_sb;
1414 : const struct unicode_map *um = sb->s_encoding;
1415 : struct qstr qstr = QSTR_INIT(str, len);
1416 : char strbuf[DNAME_INLINE_LEN];
1417 : int ret;
1418 :
1419 : if (!dir || !needs_casefold(dir))
1420 : goto fallback;
1421 : /*
1422 : * If the dentry name is stored in-line, then it may be concurrently
1423 : * modified by a rename. If this happens, the VFS will eventually retry
1424 : * the lookup, so it doesn't matter what ->d_compare() returns.
1425 : * However, it's unsafe to call utf8_strncasecmp() with an unstable
1426 : * string. Therefore, we have to copy the name into a temporary buffer.
1427 : */
1428 : if (len <= DNAME_INLINE_LEN - 1) {
1429 : memcpy(strbuf, str, len);
1430 : strbuf[len] = 0;
1431 : qstr.name = strbuf;
1432 : /* prevent compiler from optimizing out the temporary buffer */
1433 : barrier();
1434 : }
1435 : ret = utf8_strncasecmp(um, name, &qstr);
1436 : if (ret >= 0)
1437 : return ret;
1438 :
1439 : if (sb_has_strict_encoding(sb))
1440 : return -EINVAL;
1441 : fallback:
1442 : if (len != name->len)
1443 : return 1;
1444 : return !!memcmp(str, name->name, len);
1445 : }
1446 :
1447 : /**
1448 : * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
1449 : * @dentry: dentry of the parent directory
1450 : * @str: qstr of name whose hash we should fill in
1451 : *
1452 : * Return: 0 if hash was successful or unchanged, and -EINVAL on error
1453 : */
1454 : static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
1455 : {
1456 : const struct inode *dir = READ_ONCE(dentry->d_inode);
1457 : struct super_block *sb = dentry->d_sb;
1458 : const struct unicode_map *um = sb->s_encoding;
1459 : int ret = 0;
1460 :
1461 : if (!dir || !needs_casefold(dir))
1462 : return 0;
1463 :
1464 : ret = utf8_casefold_hash(um, dentry, str);
1465 : if (ret < 0 && sb_has_strict_encoding(sb))
1466 : return -EINVAL;
1467 : return 0;
1468 : }
1469 :
1470 : static const struct dentry_operations generic_ci_dentry_ops = {
1471 : .d_hash = generic_ci_d_hash,
1472 : .d_compare = generic_ci_d_compare,
1473 : };
1474 : #endif
1475 :
1476 : #ifdef CONFIG_FS_ENCRYPTION
1477 : static const struct dentry_operations generic_encrypted_dentry_ops = {
1478 : .d_revalidate = fscrypt_d_revalidate,
1479 : };
1480 : #endif
1481 :
1482 : #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
1483 : static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
1484 : .d_hash = generic_ci_d_hash,
1485 : .d_compare = generic_ci_d_compare,
1486 : .d_revalidate = fscrypt_d_revalidate,
1487 : };
1488 : #endif
1489 :
1490 : /**
1491 : * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
1492 : * @dentry: dentry to set ops on
1493 : *
1494 : * Casefolded directories need d_hash and d_compare set, so that the dentries
1495 : * contained in them are handled case-insensitively. Note that these operations
1496 : * are needed on the parent directory rather than on the dentries in it, and
1497 : * while the casefolding flag can be toggled on and off on an empty directory,
1498 : * dentry_operations can't be changed later. As a result, if the filesystem has
1499 : * casefolding support enabled at all, we have to give all dentries the
1500 : * casefolding operations even if their inode doesn't have the casefolding flag
1501 : * currently (and thus the casefolding ops would be no-ops for now).
1502 : *
1503 : * Encryption works differently in that the only dentry operation it needs is
1504 : * d_revalidate, which it only needs on dentries that have the no-key name flag.
1505 : * The no-key flag can't be set "later", so we don't have to worry about that.
1506 : *
1507 : * Finally, to maximize compatibility with overlayfs (which isn't compatible
1508 : * with certain dentry operations) and to avoid taking an unnecessary
1509 : * performance hit, we use custom dentry_operations for each possible
1510 : * combination rather than always installing all operations.
1511 : */
1512 0 : void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
1513 : {
1514 : #ifdef CONFIG_FS_ENCRYPTION
1515 : bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
1516 : #endif
1517 : #if IS_ENABLED(CONFIG_UNICODE)
1518 : bool needs_ci_ops = dentry->d_sb->s_encoding;
1519 : #endif
1520 : #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE)
1521 : if (needs_encrypt_ops && needs_ci_ops) {
1522 : d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
1523 : return;
1524 : }
1525 : #endif
1526 : #ifdef CONFIG_FS_ENCRYPTION
1527 : if (needs_encrypt_ops) {
1528 : d_set_d_op(dentry, &generic_encrypted_dentry_ops);
1529 : return;
1530 : }
1531 : #endif
1532 : #if IS_ENABLED(CONFIG_UNICODE)
1533 : if (needs_ci_ops) {
1534 : d_set_d_op(dentry, &generic_ci_dentry_ops);
1535 : return;
1536 : }
1537 : #endif
1538 0 : }
1539 : EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
1540 :
1541 : /**
1542 : * inode_maybe_inc_iversion - increments i_version
1543 : * @inode: inode with the i_version that should be updated
1544 : * @force: increment the counter even if it's not necessary?
1545 : *
1546 : * Every time the inode is modified, the i_version field must be seen to have
1547 : * changed by any observer.
1548 : *
1549 : * If "force" is set or the QUERIED flag is set, then ensure that we increment
1550 : * the value, and clear the queried flag.
1551 : *
1552 : * In the common case where neither is set, then we can return "false" without
1553 : * updating i_version.
1554 : *
1555 : * If this function returns false, and no other metadata has changed, then we
1556 : * can avoid logging the metadata.
1557 : */
1558 0 : bool inode_maybe_inc_iversion(struct inode *inode, bool force)
1559 : {
1560 : u64 cur, new;
1561 :
1562 : /*
1563 : * The i_version field is not strictly ordered with any other inode
1564 : * information, but the legacy inode_inc_iversion code used a spinlock
1565 : * to serialize increments.
1566 : *
1567 : * Here, we add full memory barriers to ensure that any de-facto
1568 : * ordering with other info is preserved.
1569 : *
1570 : * This barrier pairs with the barrier in inode_query_iversion()
1571 : */
1572 0 : smp_mb();
1573 0 : cur = inode_peek_iversion_raw(inode);
1574 : do {
1575 : /* If flag is clear then we needn't do anything */
1576 0 : if (!force && !(cur & I_VERSION_QUERIED))
1577 : return false;
1578 :
1579 : /* Since lowest bit is flag, add 2 to avoid it */
1580 0 : new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
1581 0 : } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
1582 : return true;
1583 : }
1584 : EXPORT_SYMBOL(inode_maybe_inc_iversion);
1585 :
1586 : /**
1587 : * inode_query_iversion - read i_version for later use
1588 : * @inode: inode from which i_version should be read
1589 : *
1590 : * Read the inode i_version counter. This should be used by callers that wish
1591 : * to store the returned i_version for later comparison. This will guarantee
1592 : * that a later query of the i_version will result in a different value if
1593 : * anything has changed.
1594 : *
1595 : * In this implementation, we fetch the current value, set the QUERIED flag and
1596 : * then try to swap it into place with a cmpxchg, if it wasn't already set. If
1597 : * that fails, we try again with the newly fetched value from the cmpxchg.
1598 : */
1599 0 : u64 inode_query_iversion(struct inode *inode)
1600 : {
1601 : u64 cur, new;
1602 :
1603 0 : cur = inode_peek_iversion_raw(inode);
1604 : do {
1605 : /* If flag is already set, then no need to swap */
1606 0 : if (cur & I_VERSION_QUERIED) {
1607 : /*
1608 : * This barrier (and the implicit barrier in the
1609 : * cmpxchg below) pairs with the barrier in
1610 : * inode_maybe_inc_iversion().
1611 : */
1612 0 : smp_mb();
1613 0 : break;
1614 : }
1615 :
1616 0 : new = cur | I_VERSION_QUERIED;
1617 0 : } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
1618 0 : return cur >> I_VERSION_QUERIED_SHIFT;
1619 : }
1620 : EXPORT_SYMBOL(inode_query_iversion);
|