LCOV - code coverage report
Current view: top level - mm - internal.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 10 93 10.8 %
Date: 2023-03-27 20:00:47 Functions: 0 9 0.0 %

          Line data    Source code
       1             : /* SPDX-License-Identifier: GPL-2.0-or-later */
       2             : /* internal.h: mm/ internal definitions
       3             :  *
       4             :  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
       5             :  * Written by David Howells (dhowells@redhat.com)
       6             :  */
       7             : #ifndef __MM_INTERNAL_H
       8             : #define __MM_INTERNAL_H
       9             : 
      10             : #include <linux/fs.h>
      11             : #include <linux/mm.h>
      12             : #include <linux/pagemap.h>
      13             : #include <linux/rmap.h>
      14             : #include <linux/tracepoint-defs.h>
      15             : 
      16             : struct folio_batch;
      17             : 
      18             : /*
      19             :  * The set of flags that only affect watermark checking and reclaim
      20             :  * behaviour. This is used by the MM to obey the caller constraints
      21             :  * about IO, FS and watermark checking while ignoring placement
      22             :  * hints such as HIGHMEM usage.
      23             :  */
      24             : #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\
      25             :                         __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\
      26             :                         __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\
      27             :                         __GFP_NOLOCKDEP)
      28             : 
      29             : /* The GFP flags allowed during early boot */
      30             : #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS))
      31             : 
      32             : /* Control allocation cpuset and node placement constraints */
      33             : #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)
      34             : 
      35             : /* Do not use these with a slab allocator */
      36             : #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
      37             : 
      38             : /*
      39             :  * Different from WARN_ON_ONCE(), no warning will be issued
      40             :  * when we specify __GFP_NOWARN.
      41             :  */
      42             : #define WARN_ON_ONCE_GFP(cond, gfp)     ({                              \
      43             :         static bool __section(".data.once") __warned;                 \
      44             :         int __ret_warn_once = !!(cond);                                 \
      45             :                                                                         \
      46             :         if (unlikely(!(gfp & __GFP_NOWARN) && __ret_warn_once && !__warned)) { \
      47             :                 __warned = true;                                        \
      48             :                 WARN_ON(1);                                             \
      49             :         }                                                               \
      50             :         unlikely(__ret_warn_once);                                      \
      51             : })
      52             : 
      53             : void page_writeback_init(void);
      54             : 
      55             : /*
      56             :  * If a 16GB hugetlb folio were mapped by PTEs of all of its 4kB pages,
      57             :  * its nr_pages_mapped would be 0x400000: choose the COMPOUND_MAPPED bit
      58             :  * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE).  Hugetlb currently
      59             :  * leaves nr_pages_mapped at 0, but avoid surprise if it participates later.
      60             :  */
      61             : #define COMPOUND_MAPPED         0x800000
      62             : #define FOLIO_PAGES_MAPPED      (COMPOUND_MAPPED - 1)
      63             : 
      64             : /*
      65             :  * How many individual pages have an elevated _mapcount.  Excludes
      66             :  * the folio's entire_mapcount.
      67             :  */
      68             : static inline int folio_nr_pages_mapped(struct folio *folio)
      69             : {
      70           0 :         return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;
      71             : }
      72             : 
      73             : static inline void *folio_raw_mapping(struct folio *folio)
      74             : {
      75           0 :         unsigned long mapping = (unsigned long)folio->mapping;
      76             : 
      77           0 :         return (void *)(mapping & ~PAGE_MAPPING_FLAGS);
      78             : }
      79             : 
      80             : void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio,
      81             :                                                 int nr_throttled);
      82             : static inline void acct_reclaim_writeback(struct folio *folio)
      83             : {
      84           0 :         pg_data_t *pgdat = folio_pgdat(folio);
      85           0 :         int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled);
      86             : 
      87           0 :         if (nr_throttled)
      88           0 :                 __acct_reclaim_writeback(pgdat, folio, nr_throttled);
      89             : }
      90             : 
      91             : static inline void wake_throttle_isolated(pg_data_t *pgdat)
      92             : {
      93             :         wait_queue_head_t *wqh;
      94             : 
      95           0 :         wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
      96           0 :         if (waitqueue_active(wqh))
      97           0 :                 wake_up(wqh);
      98             : }
      99             : 
     100             : vm_fault_t do_swap_page(struct vm_fault *vmf);
     101             : void folio_rotate_reclaimable(struct folio *folio);
     102             : bool __folio_end_writeback(struct folio *folio);
     103             : void deactivate_file_folio(struct folio *folio);
     104             : void folio_activate(struct folio *folio);
     105             : 
     106             : void free_pgtables(struct mmu_gather *tlb, struct maple_tree *mt,
     107             :                    struct vm_area_struct *start_vma, unsigned long floor,
     108             :                    unsigned long ceiling);
     109             : void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
     110             : 
     111             : struct zap_details;
     112             : void unmap_page_range(struct mmu_gather *tlb,
     113             :                              struct vm_area_struct *vma,
     114             :                              unsigned long addr, unsigned long end,
     115             :                              struct zap_details *details);
     116             : 
     117             : void page_cache_ra_order(struct readahead_control *, struct file_ra_state *,
     118             :                 unsigned int order);
     119             : void force_page_cache_ra(struct readahead_control *, unsigned long nr);
     120             : static inline void force_page_cache_readahead(struct address_space *mapping,
     121             :                 struct file *file, pgoff_t index, unsigned long nr_to_read)
     122             : {
     123           0 :         DEFINE_READAHEAD(ractl, file, &file->f_ra, mapping, index);
     124           0 :         force_page_cache_ra(&ractl, nr_to_read);
     125             : }
     126             : 
     127             : unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
     128             :                 pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
     129             : unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
     130             :                 pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
     131             : void filemap_free_folio(struct address_space *mapping, struct folio *folio);
     132             : int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
     133             : bool truncate_inode_partial_folio(struct folio *folio, loff_t start,
     134             :                 loff_t end);
     135             : long invalidate_inode_page(struct page *page);
     136             : unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
     137             :                 pgoff_t start, pgoff_t end, unsigned long *nr_pagevec);
     138             : 
     139             : /**
     140             :  * folio_evictable - Test whether a folio is evictable.
     141             :  * @folio: The folio to test.
     142             :  *
     143             :  * Test whether @folio is evictable -- i.e., should be placed on
     144             :  * active/inactive lists vs unevictable list.
     145             :  *
     146             :  * Reasons folio might not be evictable:
     147             :  * 1. folio's mapping marked unevictable
     148             :  * 2. One of the pages in the folio is part of an mlocked VMA
     149             :  */
     150           0 : static inline bool folio_evictable(struct folio *folio)
     151             : {
     152             :         bool ret;
     153             : 
     154             :         /* Prevent address_space of inode and swap cache from being freed */
     155             :         rcu_read_lock();
     156           0 :         ret = !mapping_unevictable(folio_mapping(folio)) &&
     157           0 :                         !folio_test_mlocked(folio);
     158             :         rcu_read_unlock();
     159           0 :         return ret;
     160             : }
     161             : 
     162             : /*
     163             :  * Turn a non-refcounted page (->_refcount == 0) into refcounted with
     164             :  * a count of one.
     165             :  */
     166             : static inline void set_page_refcounted(struct page *page)
     167             : {
     168             :         VM_BUG_ON_PAGE(PageTail(page), page);
     169             :         VM_BUG_ON_PAGE(page_ref_count(page), page);
     170       45080 :         set_page_count(page, 1);
     171             : }
     172             : 
     173             : extern unsigned long highest_memmap_pfn;
     174             : 
     175             : /*
     176             :  * Maximum number of reclaim retries without progress before the OOM
     177             :  * killer is consider the only way forward.
     178             :  */
     179             : #define MAX_RECLAIM_RETRIES 16
     180             : 
     181             : /*
     182             :  * in mm/early_ioremap.c
     183             :  */
     184             : pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
     185             :                                         unsigned long size, pgprot_t prot);
     186             : 
     187             : /*
     188             :  * in mm/vmscan.c:
     189             :  */
     190             : bool isolate_lru_page(struct page *page);
     191             : bool folio_isolate_lru(struct folio *folio);
     192             : void putback_lru_page(struct page *page);
     193             : void folio_putback_lru(struct folio *folio);
     194             : extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason);
     195             : 
     196             : /*
     197             :  * in mm/rmap.c:
     198             :  */
     199             : pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address);
     200             : 
     201             : /*
     202             :  * in mm/page_alloc.c
     203             :  */
     204             : 
     205             : /*
     206             :  * Structure for holding the mostly immutable allocation parameters passed
     207             :  * between functions involved in allocations, including the alloc_pages*
     208             :  * family of functions.
     209             :  *
     210             :  * nodemask, migratetype and highest_zoneidx are initialized only once in
     211             :  * __alloc_pages() and then never change.
     212             :  *
     213             :  * zonelist, preferred_zone and highest_zoneidx are set first in
     214             :  * __alloc_pages() for the fast path, and might be later changed
     215             :  * in __alloc_pages_slowpath(). All other functions pass the whole structure
     216             :  * by a const pointer.
     217             :  */
     218             : struct alloc_context {
     219             :         struct zonelist *zonelist;
     220             :         nodemask_t *nodemask;
     221             :         struct zoneref *preferred_zoneref;
     222             :         int migratetype;
     223             : 
     224             :         /*
     225             :          * highest_zoneidx represents highest usable zone index of
     226             :          * the allocation request. Due to the nature of the zone,
     227             :          * memory on lower zone than the highest_zoneidx will be
     228             :          * protected by lowmem_reserve[highest_zoneidx].
     229             :          *
     230             :          * highest_zoneidx is also used by reclaim/compaction to limit
     231             :          * the target zone since higher zone than this index cannot be
     232             :          * usable for this allocation request.
     233             :          */
     234             :         enum zone_type highest_zoneidx;
     235             :         bool spread_dirty_pages;
     236             : };
     237             : 
     238             : /*
     239             :  * This function returns the order of a free page in the buddy system. In
     240             :  * general, page_zone(page)->lock must be held by the caller to prevent the
     241             :  * page from being allocated in parallel and returning garbage as the order.
     242             :  * If a caller does not hold page_zone(page)->lock, it must guarantee that the
     243             :  * page cannot be allocated or merged in parallel. Alternatively, it must
     244             :  * handle invalid values gracefully, and use buddy_order_unsafe() below.
     245             :  */
     246             : static inline unsigned int buddy_order(struct page *page)
     247             : {
     248             :         /* PageBuddy() must be checked by the caller */
     249        1383 :         return page_private(page);
     250             : }
     251             : 
     252             : /*
     253             :  * Like buddy_order(), but for callers who cannot afford to hold the zone lock.
     254             :  * PageBuddy() should be checked first by the caller to minimize race window,
     255             :  * and invalid values must be handled gracefully.
     256             :  *
     257             :  * READ_ONCE is used so that if the caller assigns the result into a local
     258             :  * variable and e.g. tests it for valid range before using, the compiler cannot
     259             :  * decide to remove the variable and inline the page_private(page) multiple
     260             :  * times, potentially observing different values in the tests and the actual
     261             :  * use of the result.
     262             :  */
     263             : #define buddy_order_unsafe(page)        READ_ONCE(page_private(page))
     264             : 
     265             : /*
     266             :  * This function checks whether a page is free && is the buddy
     267             :  * we can coalesce a page and its buddy if
     268             :  * (a) the buddy is not in a hole (check before calling!) &&
     269             :  * (b) the buddy is in the buddy system &&
     270             :  * (c) a page and its buddy have the same order &&
     271             :  * (d) a page and its buddy are in the same zone.
     272             :  *
     273             :  * For recording whether a page is in the buddy system, we set PageBuddy.
     274             :  * Setting, clearing, and testing PageBuddy is serialized by zone->lock.
     275             :  *
     276             :  * For recording page's order, we use page_private(page).
     277             :  */
     278             : static inline bool page_is_buddy(struct page *page, struct page *buddy,
     279             :                                  unsigned int order)
     280             : {
     281        5610 :         if (!page_is_guard(buddy) && !PageBuddy(buddy))
     282             :                 return false;
     283             : 
     284        2758 :         if (buddy_order(buddy) != order)
     285             :                 return false;
     286             : 
     287             :         /*
     288             :          * zone check is done late to avoid uselessly calculating
     289             :          * zone/node ids for pages that could never merge.
     290             :          */
     291        3948 :         if (page_zone_id(page) != page_zone_id(buddy))
     292             :                 return false;
     293             : 
     294             :         VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy);
     295             : 
     296             :         return true;
     297             : }
     298             : 
     299             : /*
     300             :  * Locate the struct page for both the matching buddy in our
     301             :  * pair (buddy1) and the combined O(n+1) page they form (page).
     302             :  *
     303             :  * 1) Any buddy B1 will have an order O twin B2 which satisfies
     304             :  * the following equation:
     305             :  *     B2 = B1 ^ (1 << O)
     306             :  * For example, if the starting buddy (buddy2) is #8 its order
     307             :  * 1 buddy is #10:
     308             :  *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
     309             :  *
     310             :  * 2) Any buddy B will have an order O+1 parent P which
     311             :  * satisfies the following equation:
     312             :  *     P = B & ~(1 << O)
     313             :  *
     314             :  * Assumption: *_mem_map is contiguous at least up to MAX_ORDER
     315             :  */
     316             : static inline unsigned long
     317             : __find_buddy_pfn(unsigned long page_pfn, unsigned int order)
     318             : {
     319        2805 :         return page_pfn ^ (1 << order);
     320             : }
     321             : 
     322             : /*
     323             :  * Find the buddy of @page and validate it.
     324             :  * @page: The input page
     325             :  * @pfn: The pfn of the page, it saves a call to page_to_pfn() when the
     326             :  *       function is used in the performance-critical __free_one_page().
     327             :  * @order: The order of the page
     328             :  * @buddy_pfn: The output pointer to the buddy pfn, it also saves a call to
     329             :  *             page_to_pfn().
     330             :  *
     331             :  * The found buddy can be a non PageBuddy, out of @page's zone, or its order is
     332             :  * not the same as @page. The validation is necessary before use it.
     333             :  *
     334             :  * Return: the found buddy page or NULL if not found.
     335             :  */
     336             : static inline struct page *find_buddy_page_pfn(struct page *page,
     337             :                         unsigned long pfn, unsigned int order, unsigned long *buddy_pfn)
     338             : {
     339        2805 :         unsigned long __buddy_pfn = __find_buddy_pfn(pfn, order);
     340             :         struct page *buddy;
     341             : 
     342        2805 :         buddy = page + (__buddy_pfn - pfn);
     343             :         if (buddy_pfn)
     344        1860 :                 *buddy_pfn = __buddy_pfn;
     345             : 
     346        2805 :         if (page_is_buddy(page, buddy, order))
     347             :                 return buddy;
     348             :         return NULL;
     349             : }
     350             : 
     351             : extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
     352             :                                 unsigned long end_pfn, struct zone *zone);
     353             : 
     354             : static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
     355             :                                 unsigned long end_pfn, struct zone *zone)
     356             : {
     357           0 :         if (zone->contiguous)
     358           0 :                 return pfn_to_page(start_pfn);
     359             : 
     360           0 :         return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
     361             : }
     362             : 
     363             : extern int __isolate_free_page(struct page *page, unsigned int order);
     364             : extern void __putback_isolated_page(struct page *page, unsigned int order,
     365             :                                     int mt);
     366             : extern void memblock_free_pages(struct page *page, unsigned long pfn,
     367             :                                         unsigned int order);
     368             : extern void __free_pages_core(struct page *page, unsigned int order);
     369             : extern void prep_compound_page(struct page *page, unsigned int order);
     370             : extern void post_alloc_hook(struct page *page, unsigned int order,
     371             :                                         gfp_t gfp_flags);
     372             : extern int user_min_free_kbytes;
     373             : 
     374             : extern void free_unref_page(struct page *page, unsigned int order);
     375             : extern void free_unref_page_list(struct list_head *list);
     376             : 
     377             : extern void zone_pcp_reset(struct zone *zone);
     378             : extern void zone_pcp_disable(struct zone *zone);
     379             : extern void zone_pcp_enable(struct zone *zone);
     380             : 
     381             : extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
     382             :                           phys_addr_t min_addr,
     383             :                           int nid, bool exact_nid);
     384             : 
     385             : int split_free_page(struct page *free_page,
     386             :                         unsigned int order, unsigned long split_pfn_offset);
     387             : 
     388             : /*
     389             :  * This will have no effect, other than possibly generating a warning, if the
     390             :  * caller passes in a non-large folio.
     391             :  */
     392             : static inline void folio_set_order(struct folio *folio, unsigned int order)
     393             : {
     394             :         if (WARN_ON_ONCE(!folio_test_large(folio)))
     395             :                 return;
     396             : 
     397             :         folio->_folio_order = order;
     398             : #ifdef CONFIG_64BIT
     399             :         /*
     400             :          * When hugetlb dissolves a folio, we need to clear the tail
     401             :          * page, rather than setting nr_pages to 1.
     402             :          */
     403             :         folio->_folio_nr_pages = order ? 1U << order : 0;
     404             : #endif
     405             : }
     406             : 
     407             : #if defined CONFIG_COMPACTION || defined CONFIG_CMA
     408             : 
     409             : /*
     410             :  * in mm/compaction.c
     411             :  */
     412             : /*
     413             :  * compact_control is used to track pages being migrated and the free pages
     414             :  * they are being migrated to during memory compaction. The free_pfn starts
     415             :  * at the end of a zone and migrate_pfn begins at the start. Movable pages
     416             :  * are moved to the end of a zone during a compaction run and the run
     417             :  * completes when free_pfn <= migrate_pfn
     418             :  */
     419             : struct compact_control {
     420             :         struct list_head freepages;     /* List of free pages to migrate to */
     421             :         struct list_head migratepages;  /* List of pages being migrated */
     422             :         unsigned int nr_freepages;      /* Number of isolated free pages */
     423             :         unsigned int nr_migratepages;   /* Number of pages to migrate */
     424             :         unsigned long free_pfn;         /* isolate_freepages search base */
     425             :         /*
     426             :          * Acts as an in/out parameter to page isolation for migration.
     427             :          * isolate_migratepages uses it as a search base.
     428             :          * isolate_migratepages_block will update the value to the next pfn
     429             :          * after the last isolated one.
     430             :          */
     431             :         unsigned long migrate_pfn;
     432             :         unsigned long fast_start_pfn;   /* a pfn to start linear scan from */
     433             :         struct zone *zone;
     434             :         unsigned long total_migrate_scanned;
     435             :         unsigned long total_free_scanned;
     436             :         unsigned short fast_search_fail;/* failures to use free list searches */
     437             :         short search_order;             /* order to start a fast search at */
     438             :         const gfp_t gfp_mask;           /* gfp mask of a direct compactor */
     439             :         int order;                      /* order a direct compactor needs */
     440             :         int migratetype;                /* migratetype of direct compactor */
     441             :         const unsigned int alloc_flags; /* alloc flags of a direct compactor */
     442             :         const int highest_zoneidx;      /* zone index of a direct compactor */
     443             :         enum migrate_mode mode;         /* Async or sync migration mode */
     444             :         bool ignore_skip_hint;          /* Scan blocks even if marked skip */
     445             :         bool no_set_skip_hint;          /* Don't mark blocks for skipping */
     446             :         bool ignore_block_suitable;     /* Scan blocks considered unsuitable */
     447             :         bool direct_compaction;         /* False from kcompactd or /proc/... */
     448             :         bool proactive_compaction;      /* kcompactd proactive compaction */
     449             :         bool whole_zone;                /* Whole zone should/has been scanned */
     450             :         bool contended;                 /* Signal lock contention */
     451             :         bool finish_pageblock;          /* Scan the remainder of a pageblock. Used
     452             :                                          * when there are potentially transient
     453             :                                          * isolation or migration failures to
     454             :                                          * ensure forward progress.
     455             :                                          */
     456             :         bool alloc_contig;              /* alloc_contig_range allocation */
     457             : };
     458             : 
     459             : /*
     460             :  * Used in direct compaction when a page should be taken from the freelists
     461             :  * immediately when one is created during the free path.
     462             :  */
     463             : struct capture_control {
     464             :         struct compact_control *cc;
     465             :         struct page *page;
     466             : };
     467             : 
     468             : unsigned long
     469             : isolate_freepages_range(struct compact_control *cc,
     470             :                         unsigned long start_pfn, unsigned long end_pfn);
     471             : int
     472             : isolate_migratepages_range(struct compact_control *cc,
     473             :                            unsigned long low_pfn, unsigned long end_pfn);
     474             : 
     475             : int __alloc_contig_migrate_range(struct compact_control *cc,
     476             :                                         unsigned long start, unsigned long end);
     477             : #endif
     478             : int find_suitable_fallback(struct free_area *area, unsigned int order,
     479             :                         int migratetype, bool only_stealable, bool *can_steal);
     480             : 
     481             : /*
     482             :  * These three helpers classifies VMAs for virtual memory accounting.
     483             :  */
     484             : 
     485             : /*
     486             :  * Executable code area - executable, not writable, not stack
     487             :  */
     488             : static inline bool is_exec_mapping(vm_flags_t flags)
     489             : {
     490           0 :         return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
     491             : }
     492             : 
     493             : /*
     494             :  * Stack area - automatically grows in one direction
     495             :  *
     496             :  * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
     497             :  * do_mmap() forbids all other combinations.
     498             :  */
     499             : static inline bool is_stack_mapping(vm_flags_t flags)
     500             : {
     501           0 :         return (flags & VM_STACK) == VM_STACK;
     502             : }
     503             : 
     504             : /*
     505             :  * Data area - private, writable, not stack
     506             :  */
     507             : static inline bool is_data_mapping(vm_flags_t flags)
     508             : {
     509           0 :         return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
     510             : }
     511             : 
     512             : /* mm/util.c */
     513             : struct anon_vma *folio_anon_vma(struct folio *folio);
     514             : 
     515             : #ifdef CONFIG_MMU
     516             : void unmap_mapping_folio(struct folio *folio);
     517             : extern long populate_vma_page_range(struct vm_area_struct *vma,
     518             :                 unsigned long start, unsigned long end, int *locked);
     519             : extern long faultin_vma_page_range(struct vm_area_struct *vma,
     520             :                                    unsigned long start, unsigned long end,
     521             :                                    bool write, int *locked);
     522             : extern int mlock_future_check(struct mm_struct *mm, unsigned long flags,
     523             :                               unsigned long len);
     524             : /*
     525             :  * mlock_vma_folio() and munlock_vma_folio():
     526             :  * should be called with vma's mmap_lock held for read or write,
     527             :  * under page table lock for the pte/pmd being added or removed.
     528             :  *
     529             :  * mlock is usually called at the end of page_add_*_rmap(), munlock at
     530             :  * the end of page_remove_rmap(); but new anon folios are managed by
     531             :  * folio_add_lru_vma() calling mlock_new_folio().
     532             :  *
     533             :  * @compound is used to include pmd mappings of THPs, but filter out
     534             :  * pte mappings of THPs, which cannot be consistently counted: a pte
     535             :  * mapping of the THP head cannot be distinguished by the page alone.
     536             :  */
     537             : void mlock_folio(struct folio *folio);
     538           0 : static inline void mlock_vma_folio(struct folio *folio,
     539             :                         struct vm_area_struct *vma, bool compound)
     540             : {
     541             :         /*
     542             :          * The VM_SPECIAL check here serves two purposes.
     543             :          * 1) VM_IO check prevents migration from double-counting during mlock.
     544             :          * 2) Although mmap_region() and mlock_fixup() take care that VM_LOCKED
     545             :          *    is never left set on a VM_SPECIAL vma, there is an interval while
     546             :          *    file->f_op->mmap() is using vm_insert_page(s), when VM_LOCKED may
     547             :          *    still be set while VM_SPECIAL bits are added: so ignore it then.
     548             :          */
     549           0 :         if (unlikely((vma->vm_flags & (VM_LOCKED|VM_SPECIAL)) == VM_LOCKED) &&
     550           0 :             (compound || !folio_test_large(folio)))
     551           0 :                 mlock_folio(folio);
     552           0 : }
     553             : 
     554             : void munlock_folio(struct folio *folio);
     555           0 : static inline void munlock_vma_folio(struct folio *folio,
     556             :                         struct vm_area_struct *vma, bool compound)
     557             : {
     558           0 :         if (unlikely(vma->vm_flags & VM_LOCKED) &&
     559           0 :             (compound || !folio_test_large(folio)))
     560           0 :                 munlock_folio(folio);
     561           0 : }
     562             : 
     563             : void mlock_new_folio(struct folio *folio);
     564             : bool need_mlock_drain(int cpu);
     565             : void mlock_drain_local(void);
     566             : void mlock_drain_remote(int cpu);
     567             : 
     568             : extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
     569             : 
     570             : /*
     571             :  * Return the start of user virtual address at the specific offset within
     572             :  * a vma.
     573             :  */
     574             : static inline unsigned long
     575             : vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages,
     576             :                   struct vm_area_struct *vma)
     577             : {
     578             :         unsigned long address;
     579             : 
     580           0 :         if (pgoff >= vma->vm_pgoff) {
     581           0 :                 address = vma->vm_start +
     582           0 :                         ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
     583             :                 /* Check for address beyond vma (or wrapped through 0?) */
     584           0 :                 if (address < vma->vm_start || address >= vma->vm_end)
     585           0 :                         address = -EFAULT;
     586           0 :         } else if (pgoff + nr_pages - 1 >= vma->vm_pgoff) {
     587             :                 /* Test above avoids possibility of wrap to 0 on 32-bit */
     588           0 :                 address = vma->vm_start;
     589             :         } else {
     590             :                 address = -EFAULT;
     591             :         }
     592             :         return address;
     593             : }
     594             : 
     595             : /*
     596             :  * Return the start of user virtual address of a page within a vma.
     597             :  * Returns -EFAULT if all of the page is outside the range of vma.
     598             :  * If page is a compound head, the entire compound page is considered.
     599             :  */
     600             : static inline unsigned long
     601           0 : vma_address(struct page *page, struct vm_area_struct *vma)
     602             : {
     603             :         VM_BUG_ON_PAGE(PageKsm(page), page);    /* KSM page->index unusable */
     604           0 :         return vma_pgoff_address(page_to_pgoff(page), compound_nr(page), vma);
     605             : }
     606             : 
     607             : /*
     608             :  * Then at what user virtual address will none of the range be found in vma?
     609             :  * Assumes that vma_address() already returned a good starting address.
     610             :  */
     611             : static inline unsigned long vma_address_end(struct page_vma_mapped_walk *pvmw)
     612             : {
     613           0 :         struct vm_area_struct *vma = pvmw->vma;
     614             :         pgoff_t pgoff;
     615             :         unsigned long address;
     616             : 
     617             :         /* Common case, plus ->pgoff is invalid for KSM */
     618           0 :         if (pvmw->nr_pages == 1)
     619           0 :                 return pvmw->address + PAGE_SIZE;
     620             : 
     621           0 :         pgoff = pvmw->pgoff + pvmw->nr_pages;
     622           0 :         address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
     623             :         /* Check for address beyond vma (or wrapped through 0?) */
     624           0 :         if (address < vma->vm_start || address > vma->vm_end)
     625           0 :                 address = vma->vm_end;
     626             :         return address;
     627             : }
     628             : 
     629           0 : static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
     630             :                                                     struct file *fpin)
     631             : {
     632           0 :         int flags = vmf->flags;
     633             : 
     634           0 :         if (fpin)
     635             :                 return fpin;
     636             : 
     637             :         /*
     638             :          * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or
     639             :          * anything, so we only pin the file and drop the mmap_lock if only
     640             :          * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt.
     641             :          */
     642           0 :         if (fault_flag_allow_retry_first(flags) &&
     643           0 :             !(flags & FAULT_FLAG_RETRY_NOWAIT)) {
     644           0 :                 fpin = get_file(vmf->vma->vm_file);
     645           0 :                 mmap_read_unlock(vmf->vma->vm_mm);
     646             :         }
     647             :         return fpin;
     648             : }
     649             : #else /* !CONFIG_MMU */
     650             : static inline void unmap_mapping_folio(struct folio *folio) { }
     651             : static inline void mlock_new_folio(struct folio *folio) { }
     652             : static inline bool need_mlock_drain(int cpu) { return false; }
     653             : static inline void mlock_drain_local(void) { }
     654             : static inline void mlock_drain_remote(int cpu) { }
     655             : static inline void vunmap_range_noflush(unsigned long start, unsigned long end)
     656             : {
     657             : }
     658             : #endif /* !CONFIG_MMU */
     659             : 
     660             : /* Memory initialisation debug and verification */
     661             : enum mminit_level {
     662             :         MMINIT_WARNING,
     663             :         MMINIT_VERIFY,
     664             :         MMINIT_TRACE
     665             : };
     666             : 
     667             : #ifdef CONFIG_DEBUG_MEMORY_INIT
     668             : 
     669             : extern int mminit_loglevel;
     670             : 
     671             : #define mminit_dprintk(level, prefix, fmt, arg...) \
     672             : do { \
     673             :         if (level < mminit_loglevel) { \
     674             :                 if (level <= MMINIT_WARNING) \
     675             :                         pr_warn("mminit::" prefix " " fmt, ##arg);  \
     676             :                 else \
     677             :                         printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \
     678             :         } \
     679             : } while (0)
     680             : 
     681             : extern void mminit_verify_pageflags_layout(void);
     682             : extern void mminit_verify_zonelist(void);
     683             : #else
     684             : 
     685             : static inline void mminit_dprintk(enum mminit_level level,
     686             :                                 const char *prefix, const char *fmt, ...)
     687             : {
     688             : }
     689             : 
     690             : static inline void mminit_verify_pageflags_layout(void)
     691             : {
     692             : }
     693             : 
     694             : static inline void mminit_verify_zonelist(void)
     695             : {
     696             : }
     697             : #endif /* CONFIG_DEBUG_MEMORY_INIT */
     698             : 
     699             : #define NODE_RECLAIM_NOSCAN     -2
     700             : #define NODE_RECLAIM_FULL       -1
     701             : #define NODE_RECLAIM_SOME       0
     702             : #define NODE_RECLAIM_SUCCESS    1
     703             : 
     704             : #ifdef CONFIG_NUMA
     705             : extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
     706             : extern int find_next_best_node(int node, nodemask_t *used_node_mask);
     707             : #else
     708             : static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
     709             :                                 unsigned int order)
     710             : {
     711             :         return NODE_RECLAIM_NOSCAN;
     712             : }
     713             : static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
     714             : {
     715             :         return NUMA_NO_NODE;
     716             : }
     717             : #endif
     718             : 
     719             : /*
     720             :  * mm/memory-failure.c
     721             :  */
     722             : extern int hwpoison_filter(struct page *p);
     723             : 
     724             : extern u32 hwpoison_filter_dev_major;
     725             : extern u32 hwpoison_filter_dev_minor;
     726             : extern u64 hwpoison_filter_flags_mask;
     727             : extern u64 hwpoison_filter_flags_value;
     728             : extern u64 hwpoison_filter_memcg;
     729             : extern u32 hwpoison_filter_enable;
     730             : 
     731             : extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,
     732             :         unsigned long, unsigned long,
     733             :         unsigned long, unsigned long);
     734             : 
     735             : extern void set_pageblock_order(void);
     736             : unsigned int reclaim_clean_pages_from_list(struct zone *zone,
     737             :                                             struct list_head *page_list);
     738             : /* The ALLOC_WMARK bits are used as an index to zone->watermark */
     739             : #define ALLOC_WMARK_MIN         WMARK_MIN
     740             : #define ALLOC_WMARK_LOW         WMARK_LOW
     741             : #define ALLOC_WMARK_HIGH        WMARK_HIGH
     742             : #define ALLOC_NO_WATERMARKS     0x04 /* don't check watermarks at all */
     743             : 
     744             : /* Mask to get the watermark bits */
     745             : #define ALLOC_WMARK_MASK        (ALLOC_NO_WATERMARKS-1)
     746             : 
     747             : /*
     748             :  * Only MMU archs have async oom victim reclaim - aka oom_reaper so we
     749             :  * cannot assume a reduced access to memory reserves is sufficient for
     750             :  * !MMU
     751             :  */
     752             : #ifdef CONFIG_MMU
     753             : #define ALLOC_OOM               0x08
     754             : #else
     755             : #define ALLOC_OOM               ALLOC_NO_WATERMARKS
     756             : #endif
     757             : 
     758             : #define ALLOC_NON_BLOCK          0x10 /* Caller cannot block. Allow access
     759             :                                        * to 25% of the min watermark or
     760             :                                        * 62.5% if __GFP_HIGH is set.
     761             :                                        */
     762             : #define ALLOC_MIN_RESERVE        0x20 /* __GFP_HIGH set. Allow access to 50%
     763             :                                        * of the min watermark.
     764             :                                        */
     765             : #define ALLOC_CPUSET             0x40 /* check for correct cpuset */
     766             : #define ALLOC_CMA                0x80 /* allow allocations from CMA areas */
     767             : #ifdef CONFIG_ZONE_DMA32
     768             : #define ALLOC_NOFRAGMENT        0x100 /* avoid mixing pageblock types */
     769             : #else
     770             : #define ALLOC_NOFRAGMENT          0x0
     771             : #endif
     772             : #define ALLOC_HIGHATOMIC        0x200 /* Allows access to MIGRATE_HIGHATOMIC */
     773             : #define ALLOC_KSWAPD            0x800 /* allow waking of kswapd, __GFP_KSWAPD_RECLAIM set */
     774             : 
     775             : /* Flags that allow allocations below the min watermark. */
     776             : #define ALLOC_RESERVES (ALLOC_NON_BLOCK|ALLOC_MIN_RESERVE|ALLOC_HIGHATOMIC|ALLOC_OOM)
     777             : 
     778             : enum ttu_flags;
     779             : struct tlbflush_unmap_batch;
     780             : 
     781             : 
     782             : /*
     783             :  * only for MM internal work items which do not depend on
     784             :  * any allocations or locks which might depend on allocations
     785             :  */
     786             : extern struct workqueue_struct *mm_percpu_wq;
     787             : 
     788             : #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
     789             : void try_to_unmap_flush(void);
     790             : void try_to_unmap_flush_dirty(void);
     791             : void flush_tlb_batched_pending(struct mm_struct *mm);
     792             : #else
     793             : static inline void try_to_unmap_flush(void)
     794             : {
     795             : }
     796             : static inline void try_to_unmap_flush_dirty(void)
     797             : {
     798             : }
     799             : static inline void flush_tlb_batched_pending(struct mm_struct *mm)
     800             : {
     801             : }
     802             : #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
     803             : 
     804             : extern const struct trace_print_flags pageflag_names[];
     805             : extern const struct trace_print_flags vmaflag_names[];
     806             : extern const struct trace_print_flags gfpflag_names[];
     807             : 
     808             : static inline bool is_migrate_highatomic(enum migratetype migratetype)
     809             : {
     810             :         return migratetype == MIGRATE_HIGHATOMIC;
     811             : }
     812             : 
     813           0 : static inline bool is_migrate_highatomic_page(struct page *page)
     814             : {
     815           0 :         return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC;
     816             : }
     817             : 
     818             : void setup_zone_pageset(struct zone *zone);
     819             : 
     820             : struct migration_target_control {
     821             :         int nid;                /* preferred node id */
     822             :         nodemask_t *nmask;
     823             :         gfp_t gfp_mask;
     824             : };
     825             : 
     826             : /*
     827             :  * mm/filemap.c
     828             :  */
     829             : size_t splice_folio_into_pipe(struct pipe_inode_info *pipe,
     830             :                               struct folio *folio, loff_t fpos, size_t size);
     831             : 
     832             : /*
     833             :  * mm/vmalloc.c
     834             :  */
     835             : #ifdef CONFIG_MMU
     836             : int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
     837             :                 pgprot_t prot, struct page **pages, unsigned int page_shift);
     838             : #else
     839             : static inline
     840             : int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
     841             :                 pgprot_t prot, struct page **pages, unsigned int page_shift)
     842             : {
     843             :         return -EINVAL;
     844             : }
     845             : #endif
     846             : 
     847             : int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
     848             :                                pgprot_t prot, struct page **pages,
     849             :                                unsigned int page_shift);
     850             : 
     851             : void vunmap_range_noflush(unsigned long start, unsigned long end);
     852             : 
     853             : void __vunmap_range_noflush(unsigned long start, unsigned long end);
     854             : 
     855             : int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
     856             :                       unsigned long addr, int page_nid, int *flags);
     857             : 
     858             : void free_zone_device_page(struct page *page);
     859             : int migrate_device_coherent_page(struct page *page);
     860             : 
     861             : /*
     862             :  * mm/gup.c
     863             :  */
     864             : struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
     865             : int __must_check try_grab_page(struct page *page, unsigned int flags);
     866             : 
     867             : enum {
     868             :         /* mark page accessed */
     869             :         FOLL_TOUCH = 1 << 16,
     870             :         /* a retry, previous pass started an IO */
     871             :         FOLL_TRIED = 1 << 17,
     872             :         /* we are working on non-current tsk/mm */
     873             :         FOLL_REMOTE = 1 << 18,
     874             :         /* pages must be released via unpin_user_page */
     875             :         FOLL_PIN = 1 << 19,
     876             :         /* gup_fast: prevent fall-back to slow gup */
     877             :         FOLL_FAST_ONLY = 1 << 20,
     878             :         /* allow unlocking the mmap lock */
     879             :         FOLL_UNLOCKABLE = 1 << 21,
     880             : };
     881             : 
     882             : /*
     883             :  * Indicates for which pages that are write-protected in the page table,
     884             :  * whether GUP has to trigger unsharing via FAULT_FLAG_UNSHARE such that the
     885             :  * GUP pin will remain consistent with the pages mapped into the page tables
     886             :  * of the MM.
     887             :  *
     888             :  * Temporary unmapping of PageAnonExclusive() pages or clearing of
     889             :  * PageAnonExclusive() has to protect against concurrent GUP:
     890             :  * * Ordinary GUP: Using the PT lock
     891             :  * * GUP-fast and fork(): mm->write_protect_seq
     892             :  * * GUP-fast and KSM or temporary unmapping (swap, migration): see
     893             :  *    page_try_share_anon_rmap()
     894             :  *
     895             :  * Must be called with the (sub)page that's actually referenced via the
     896             :  * page table entry, which might not necessarily be the head page for a
     897             :  * PTE-mapped THP.
     898             :  *
     899             :  * If the vma is NULL, we're coming from the GUP-fast path and might have
     900             :  * to fallback to the slow path just to lookup the vma.
     901             :  */
     902           0 : static inline bool gup_must_unshare(struct vm_area_struct *vma,
     903             :                                     unsigned int flags, struct page *page)
     904             : {
     905             :         /*
     906             :          * FOLL_WRITE is implicitly handled correctly as the page table entry
     907             :          * has to be writable -- and if it references (part of) an anonymous
     908             :          * folio, that part is required to be marked exclusive.
     909             :          */
     910           0 :         if ((flags & (FOLL_WRITE | FOLL_PIN)) != FOLL_PIN)
     911             :                 return false;
     912             :         /*
     913             :          * Note: PageAnon(page) is stable until the page is actually getting
     914             :          * freed.
     915             :          */
     916           0 :         if (!PageAnon(page)) {
     917             :                 /*
     918             :                  * We only care about R/O long-term pining: R/O short-term
     919             :                  * pinning does not have the semantics to observe successive
     920             :                  * changes through the process page tables.
     921             :                  */
     922           0 :                 if (!(flags & FOLL_LONGTERM))
     923             :                         return false;
     924             : 
     925             :                 /* We really need the vma ... */
     926           0 :                 if (!vma)
     927             :                         return true;
     928             : 
     929             :                 /*
     930             :                  * ... because we only care about writable private ("COW")
     931             :                  * mappings where we have to break COW early.
     932             :                  */
     933           0 :                 return is_cow_mapping(vma->vm_flags);
     934             :         }
     935             : 
     936             :         /* Paired with a memory barrier in page_try_share_anon_rmap(). */
     937             :         if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
     938             :                 smp_rmb();
     939             : 
     940             :         /*
     941             :          * Note that PageKsm() pages cannot be exclusive, and consequently,
     942             :          * cannot get pinned.
     943             :          */
     944           0 :         return !PageAnonExclusive(page);
     945             : }
     946             : 
     947             : extern bool mirrored_kernelcore;
     948             : 
     949             : static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
     950             : {
     951             :         /*
     952             :          * NOTE: we must check this before VM_SOFTDIRTY on soft-dirty
     953             :          * enablements, because when without soft-dirty being compiled in,
     954             :          * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
     955             :          * will be constantly true.
     956             :          */
     957             :         if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
     958             :                 return false;
     959             : 
     960             :         /*
     961             :          * Soft-dirty is kind of special: its tracking is enabled when the
     962             :          * vma flags not set.
     963             :          */
     964             :         return !(vma->vm_flags & VM_SOFTDIRTY);
     965             : }
     966             : 
     967             : /*
     968             :  * VMA Iterator functions shared between nommu and mmap
     969             :  */
     970             : static inline int vma_iter_prealloc(struct vma_iterator *vmi)
     971             : {
     972           0 :         return mas_preallocate(&vmi->mas, GFP_KERNEL);
     973             : }
     974             : 
     975             : static inline void vma_iter_clear(struct vma_iterator *vmi,
     976             :                                   unsigned long start, unsigned long end)
     977             : {
     978           0 :         mas_set_range(&vmi->mas, start, end - 1);
     979           0 :         mas_store_prealloc(&vmi->mas, NULL);
     980             : }
     981             : 
     982             : static inline struct vm_area_struct *vma_iter_load(struct vma_iterator *vmi)
     983             : {
     984           0 :         return mas_walk(&vmi->mas);
     985             : }
     986             : 
     987             : /* Store a VMA with preallocated memory */
     988           0 : static inline void vma_iter_store(struct vma_iterator *vmi,
     989             :                                   struct vm_area_struct *vma)
     990             : {
     991             : 
     992             : #if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
     993             :         if (WARN_ON(vmi->mas.node != MAS_START && vmi->mas.index > vma->vm_start)) {
     994             :                 printk("%lu > %lu\n", vmi->mas.index, vma->vm_start);
     995             :                 printk("store of vma %lu-%lu", vma->vm_start, vma->vm_end);
     996             :                 printk("into slot    %lu-%lu", vmi->mas.index, vmi->mas.last);
     997             :                 mt_dump(vmi->mas.tree);
     998             :         }
     999             :         if (WARN_ON(vmi->mas.node != MAS_START && vmi->mas.last <  vma->vm_start)) {
    1000             :                 printk("%lu < %lu\n", vmi->mas.last, vma->vm_start);
    1001             :                 printk("store of vma %lu-%lu", vma->vm_start, vma->vm_end);
    1002             :                 printk("into slot    %lu-%lu", vmi->mas.index, vmi->mas.last);
    1003             :                 mt_dump(vmi->mas.tree);
    1004             :         }
    1005             : #endif
    1006             : 
    1007           0 :         if (vmi->mas.node != MAS_START &&
    1008           0 :             ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
    1009             :                 vma_iter_invalidate(vmi);
    1010             : 
    1011           0 :         vmi->mas.index = vma->vm_start;
    1012           0 :         vmi->mas.last = vma->vm_end - 1;
    1013           0 :         mas_store_prealloc(&vmi->mas, vma);
    1014           0 : }
    1015             : 
    1016           0 : static inline int vma_iter_store_gfp(struct vma_iterator *vmi,
    1017             :                         struct vm_area_struct *vma, gfp_t gfp)
    1018             : {
    1019           0 :         if (vmi->mas.node != MAS_START &&
    1020           0 :             ((vmi->mas.index > vma->vm_start) || (vmi->mas.last < vma->vm_start)))
    1021             :                 vma_iter_invalidate(vmi);
    1022             : 
    1023           0 :         vmi->mas.index = vma->vm_start;
    1024           0 :         vmi->mas.last = vma->vm_end - 1;
    1025           0 :         mas_store_gfp(&vmi->mas, vma, gfp);
    1026           0 :         if (unlikely(mas_is_err(&vmi->mas)))
    1027             :                 return -ENOMEM;
    1028             : 
    1029           0 :         return 0;
    1030             : }
    1031             : 
    1032             : /*
    1033             :  * VMA lock generalization
    1034             :  */
    1035             : struct vma_prepare {
    1036             :         struct vm_area_struct *vma;
    1037             :         struct vm_area_struct *adj_next;
    1038             :         struct file *file;
    1039             :         struct address_space *mapping;
    1040             :         struct anon_vma *anon_vma;
    1041             :         struct vm_area_struct *insert;
    1042             :         struct vm_area_struct *remove;
    1043             :         struct vm_area_struct *remove2;
    1044             : };
    1045             : #endif  /* __MM_INTERNAL_H */

Generated by: LCOV version 1.14