LCOV - coverage.info

LCOV - code coverage report

Current view:	top level - mm - mlock.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	0	317	0.0 %
Date:	2023-07-19 18:55:55	Functions:	0	27	0.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *      linux/mm/mlock.c
       4             :  *
       5             :  *  (C) Copyright 1995 Linus Torvalds
       6             :  *  (C) Copyright 2002 Christoph Hellwig
       7             :  */
       8             : 
       9             : #include <linux/capability.h>
      10             : #include <linux/mman.h>
      11             : #include <linux/mm.h>
      12             : #include <linux/sched/user.h>
      13             : #include <linux/swap.h>
      14             : #include <linux/swapops.h>
      15             : #include <linux/pagemap.h>
      16             : #include <linux/pagevec.h>
      17             : #include <linux/pagewalk.h>
      18             : #include <linux/mempolicy.h>
      19             : #include <linux/syscalls.h>
      20             : #include <linux/sched.h>
      21             : #include <linux/export.h>
      22             : #include <linux/rmap.h>
      23             : #include <linux/mmzone.h>
      24             : #include <linux/hugetlb.h>
      25             : #include <linux/memcontrol.h>
      26             : #include <linux/mm_inline.h>
      27             : #include <linux/secretmem.h>
      28             : 
      29             : #include "internal.h"
      30             : 
      31             : struct mlock_fbatch {
      32             :         local_lock_t lock;
      33             :         struct folio_batch fbatch;
      34             : };
      35             : 
      36             : static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch) = {
      37             :         .lock = INIT_LOCAL_LOCK(lock),
      38             : };
      39             : 
      40           0 : bool can_do_mlock(void)
      41             : {
      42           0 :         if (rlimit(RLIMIT_MEMLOCK) != 0)
      43             :                 return true;
      44           0 :         if (capable(CAP_IPC_LOCK))
      45             :                 return true;
      46           0 :         return false;
      47             : }
      48             : EXPORT_SYMBOL(can_do_mlock);
      49             : 
      50             : /*
      51             :  * Mlocked folios are marked with the PG_mlocked flag for efficient testing
      52             :  * in vmscan and, possibly, the fault path; and to support semi-accurate
      53             :  * statistics.
      54             :  *
      55             :  * An mlocked folio [folio_test_mlocked(folio)] is unevictable.  As such, it
      56             :  * will be ostensibly placed on the LRU "unevictable" list (actually no such
      57             :  * list exists), rather than the [in]active lists. PG_unevictable is set to
      58             :  * indicate the unevictable state.
      59             :  */
      60             : 
      61           0 : static struct lruvec *__mlock_folio(struct folio *folio, struct lruvec *lruvec)
      62             : {
      63             :         /* There is nothing more we can do while it's off LRU */
      64           0 :         if (!folio_test_clear_lru(folio))
      65             :                 return lruvec;
      66             : 
      67           0 :         lruvec = folio_lruvec_relock_irq(folio, lruvec);
      68             : 
      69           0 :         if (unlikely(folio_evictable(folio))) {
      70             :                 /*
      71             :                  * This is a little surprising, but quite possible: PG_mlocked
      72             :                  * must have got cleared already by another CPU.  Could this
      73             :                  * folio be unevictable?  I'm not sure, but move it now if so.
      74             :                  */
      75           0 :                 if (folio_test_unevictable(folio)) {
      76           0 :                         lruvec_del_folio(lruvec, folio);
      77           0 :                         folio_clear_unevictable(folio);
      78           0 :                         lruvec_add_folio(lruvec, folio);
      79             : 
      80           0 :                         __count_vm_events(UNEVICTABLE_PGRESCUED,
      81             :                                           folio_nr_pages(folio));
      82             :                 }
      83             :                 goto out;
      84             :         }
      85             : 
      86           0 :         if (folio_test_unevictable(folio)) {
      87           0 :                 if (folio_test_mlocked(folio))
      88           0 :                         folio->mlock_count++;
      89             :                 goto out;
      90             :         }
      91             : 
      92           0 :         lruvec_del_folio(lruvec, folio);
      93           0 :         folio_clear_active(folio);
      94           0 :         folio_set_unevictable(folio);
      95           0 :         folio->mlock_count = !!folio_test_mlocked(folio);
      96           0 :         lruvec_add_folio(lruvec, folio);
      97           0 :         __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio));
      98             : out:
      99           0 :         folio_set_lru(folio);
     100           0 :         return lruvec;
     101             : }
     102             : 
     103           0 : static struct lruvec *__mlock_new_folio(struct folio *folio, struct lruvec *lruvec)
     104             : {
     105             :         VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
     106             : 
     107           0 :         lruvec = folio_lruvec_relock_irq(folio, lruvec);
     108             : 
     109             :         /* As above, this is a little surprising, but possible */
     110           0 :         if (unlikely(folio_evictable(folio)))
     111             :                 goto out;
     112             : 
     113           0 :         folio_set_unevictable(folio);
     114           0 :         folio->mlock_count = !!folio_test_mlocked(folio);
     115           0 :         __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio));
     116             : out:
     117           0 :         lruvec_add_folio(lruvec, folio);
     118           0 :         folio_set_lru(folio);
     119           0 :         return lruvec;
     120             : }
     121             : 
     122           0 : static struct lruvec *__munlock_folio(struct folio *folio, struct lruvec *lruvec)
     123             : {
     124           0 :         int nr_pages = folio_nr_pages(folio);
     125           0 :         bool isolated = false;
     126             : 
     127           0 :         if (!folio_test_clear_lru(folio))
     128             :                 goto munlock;
     129             : 
     130           0 :         isolated = true;
     131           0 :         lruvec = folio_lruvec_relock_irq(folio, lruvec);
     132             : 
     133           0 :         if (folio_test_unevictable(folio)) {
     134             :                 /* Then mlock_count is maintained, but might undercount */
     135           0 :                 if (folio->mlock_count)
     136           0 :                         folio->mlock_count--;
     137           0 :                 if (folio->mlock_count)
     138             :                         goto out;
     139             :         }
     140             :         /* else assume that was the last mlock: reclaim will fix it if not */
     141             : 
     142             : munlock:
     143           0 :         if (folio_test_clear_mlocked(folio)) {
     144           0 :                 __zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages);
     145           0 :                 if (isolated || !folio_test_unevictable(folio))
     146           0 :                         __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
     147             :                 else
     148           0 :                         __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
     149             :         }
     150             : 
     151             :         /* folio_evictable() has to be checked *after* clearing Mlocked */
     152           0 :         if (isolated && folio_test_unevictable(folio) && folio_evictable(folio)) {
     153           0 :                 lruvec_del_folio(lruvec, folio);
     154           0 :                 folio_clear_unevictable(folio);
     155           0 :                 lruvec_add_folio(lruvec, folio);
     156           0 :                 __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
     157             :         }
     158             : out:
     159           0 :         if (isolated)
     160             :                 folio_set_lru(folio);
     161           0 :         return lruvec;
     162             : }
     163             : 
     164             : /*
     165             :  * Flags held in the low bits of a struct folio pointer on the mlock_fbatch.
     166             :  */
     167             : #define LRU_FOLIO 0x1
     168             : #define NEW_FOLIO 0x2
     169             : static inline struct folio *mlock_lru(struct folio *folio)
     170             : {
     171           0 :         return (struct folio *)((unsigned long)folio + LRU_FOLIO);
     172             : }
     173             : 
     174             : static inline struct folio *mlock_new(struct folio *folio)
     175             : {
     176           0 :         return (struct folio *)((unsigned long)folio + NEW_FOLIO);
     177             : }
     178             : 
     179             : /*
     180             :  * mlock_folio_batch() is derived from folio_batch_move_lru(): perhaps that can
     181             :  * make use of such folio pointer flags in future, but for now just keep it for
     182             :  * mlock.  We could use three separate folio batches instead, but one feels
     183             :  * better (munlocking a full folio batch does not need to drain mlocking folio
     184             :  * batches first).
     185             :  */
     186           0 : static void mlock_folio_batch(struct folio_batch *fbatch)
     187             : {
     188           0 :         struct lruvec *lruvec = NULL;
     189             :         unsigned long mlock;
     190             :         struct folio *folio;
     191             :         int i;
     192             : 
     193           0 :         for (i = 0; i < folio_batch_count(fbatch); i++) {
     194           0 :                 folio = fbatch->folios[i];
     195           0 :                 mlock = (unsigned long)folio & (LRU_FOLIO | NEW_FOLIO);
     196           0 :                 folio = (struct folio *)((unsigned long)folio - mlock);
     197           0 :                 fbatch->folios[i] = folio;
     198             : 
     199           0 :                 if (mlock & LRU_FOLIO)
     200           0 :                         lruvec = __mlock_folio(folio, lruvec);
     201           0 :                 else if (mlock & NEW_FOLIO)
     202           0 :                         lruvec = __mlock_new_folio(folio, lruvec);
     203             :                 else
     204           0 :                         lruvec = __munlock_folio(folio, lruvec);
     205             :         }
     206             : 
     207           0 :         if (lruvec)
     208           0 :                 unlock_page_lruvec_irq(lruvec);
     209           0 :         folios_put(fbatch->folios, folio_batch_count(fbatch));
     210           0 :         folio_batch_reinit(fbatch);
     211           0 : }
     212             : 
     213           0 : void mlock_drain_local(void)
     214             : {
     215             :         struct folio_batch *fbatch;
     216             : 
     217           0 :         local_lock(&mlock_fbatch.lock);
     218           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     219           0 :         if (folio_batch_count(fbatch))
     220           0 :                 mlock_folio_batch(fbatch);
     221           0 :         local_unlock(&mlock_fbatch.lock);
     222           0 : }
     223             : 
     224           0 : void mlock_drain_remote(int cpu)
     225             : {
     226             :         struct folio_batch *fbatch;
     227             : 
     228           0 :         WARN_ON_ONCE(cpu_online(cpu));
     229           0 :         fbatch = &per_cpu(mlock_fbatch.fbatch, cpu);
     230           0 :         if (folio_batch_count(fbatch))
     231           0 :                 mlock_folio_batch(fbatch);
     232           0 : }
     233             : 
     234           0 : bool need_mlock_drain(int cpu)
     235             : {
     236           0 :         return folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu));
     237             : }
     238             : 
     239             : /**
     240             :  * mlock_folio - mlock a folio already on (or temporarily off) LRU
     241             :  * @folio: folio to be mlocked.
     242             :  */
     243           0 : void mlock_folio(struct folio *folio)
     244             : {
     245             :         struct folio_batch *fbatch;
     246             : 
     247           0 :         local_lock(&mlock_fbatch.lock);
     248           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     249             : 
     250           0 :         if (!folio_test_set_mlocked(folio)) {
     251           0 :                 int nr_pages = folio_nr_pages(folio);
     252             : 
     253           0 :                 zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
     254           0 :                 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
     255             :         }
     256             : 
     257           0 :         folio_get(folio);
     258           0 :         if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
     259           0 :             folio_test_large(folio) || lru_cache_disabled())
     260           0 :                 mlock_folio_batch(fbatch);
     261           0 :         local_unlock(&mlock_fbatch.lock);
     262           0 : }
     263             : 
     264             : /**
     265             :  * mlock_new_folio - mlock a newly allocated folio not yet on LRU
     266             :  * @folio: folio to be mlocked, either normal or a THP head.
     267             :  */
     268           0 : void mlock_new_folio(struct folio *folio)
     269             : {
     270             :         struct folio_batch *fbatch;
     271           0 :         int nr_pages = folio_nr_pages(folio);
     272             : 
     273           0 :         local_lock(&mlock_fbatch.lock);
     274           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     275           0 :         folio_set_mlocked(folio);
     276             : 
     277           0 :         zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
     278           0 :         __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
     279             : 
     280           0 :         folio_get(folio);
     281           0 :         if (!folio_batch_add(fbatch, mlock_new(folio)) ||
     282           0 :             folio_test_large(folio) || lru_cache_disabled())
     283           0 :                 mlock_folio_batch(fbatch);
     284           0 :         local_unlock(&mlock_fbatch.lock);
     285           0 : }
     286             : 
     287             : /**
     288             :  * munlock_folio - munlock a folio
     289             :  * @folio: folio to be munlocked, either normal or a THP head.
     290             :  */
     291           0 : void munlock_folio(struct folio *folio)
     292             : {
     293             :         struct folio_batch *fbatch;
     294             : 
     295           0 :         local_lock(&mlock_fbatch.lock);
     296           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     297             :         /*
     298             :          * folio_test_clear_mlocked(folio) must be left to __munlock_folio(),
     299             :          * which will check whether the folio is multiply mlocked.
     300             :          */
     301           0 :         folio_get(folio);
     302           0 :         if (!folio_batch_add(fbatch, folio) ||
     303           0 :             folio_test_large(folio) || lru_cache_disabled())
     304           0 :                 mlock_folio_batch(fbatch);
     305           0 :         local_unlock(&mlock_fbatch.lock);
     306           0 : }
     307             : 
     308           0 : static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
     309             :                            unsigned long end, struct mm_walk *walk)
     310             : 
     311             : {
     312           0 :         struct vm_area_struct *vma = walk->vma;
     313             :         spinlock_t *ptl;
     314             :         pte_t *start_pte, *pte;
     315             :         struct folio *folio;
     316             : 
     317           0 :         ptl = pmd_trans_huge_lock(pmd, vma);
     318             :         if (ptl) {
     319             :                 if (!pmd_present(*pmd))
     320             :                         goto out;
     321             :                 if (is_huge_zero_pmd(*pmd))
     322             :                         goto out;
     323             :                 folio = page_folio(pmd_page(*pmd));
     324             :                 if (vma->vm_flags & VM_LOCKED)
     325             :                         mlock_folio(folio);
     326             :                 else
     327             :                         munlock_folio(folio);
     328             :                 goto out;
     329             :         }
     330             : 
     331           0 :         start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
     332           0 :         for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
     333           0 :                 if (!pte_present(*pte))
     334           0 :                         continue;
     335           0 :                 folio = vm_normal_folio(vma, addr, *pte);
     336           0 :                 if (!folio || folio_is_zone_device(folio))
     337           0 :                         continue;
     338           0 :                 if (folio_test_large(folio))
     339           0 :                         continue;
     340           0 :                 if (vma->vm_flags & VM_LOCKED)
     341           0 :                         mlock_folio(folio);
     342             :                 else
     343           0 :                         munlock_folio(folio);
     344             :         }
     345             :         pte_unmap(start_pte);
     346             : out:
     347           0 :         spin_unlock(ptl);
     348           0 :         cond_resched();
     349           0 :         return 0;
     350             : }
     351             : 
     352             : /*
     353             :  * mlock_vma_pages_range() - mlock any pages already in the range,
     354             :  *                           or munlock all pages in the range.
     355             :  * @vma - vma containing range to be mlock()ed or munlock()ed
     356             :  * @start - start address in @vma of the range
     357             :  * @end - end of range in @vma
     358             :  * @newflags - the new set of flags for @vma.
     359             :  *
     360             :  * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
     361             :  * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
     362             :  */
     363           0 : static void mlock_vma_pages_range(struct vm_area_struct *vma,
     364             :         unsigned long start, unsigned long end, vm_flags_t newflags)
     365             : {
     366             :         static const struct mm_walk_ops mlock_walk_ops = {
     367             :                 .pmd_entry = mlock_pte_range,
     368             :         };
     369             : 
     370             :         /*
     371             :          * There is a slight chance that concurrent page migration,
     372             :          * or page reclaim finding a page of this now-VM_LOCKED vma,
     373             :          * will call mlock_vma_folio() and raise page's mlock_count:
     374             :          * double counting, leaving the page unevictable indefinitely.
     375             :          * Communicate this danger to mlock_vma_folio() with VM_IO,
     376             :          * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas.
     377             :          * mmap_lock is held in write mode here, so this weird
     378             :          * combination should not be visible to other mmap_lock users;
     379             :          * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
     380             :          */
     381           0 :         if (newflags & VM_LOCKED)
     382           0 :                 newflags |= VM_IO;
     383           0 :         vm_flags_reset_once(vma, newflags);
     384             : 
     385           0 :         lru_add_drain();
     386           0 :         walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL);
     387           0 :         lru_add_drain();
     388             : 
     389           0 :         if (newflags & VM_IO) {
     390           0 :                 newflags &= ~VM_IO;
     391             :                 vm_flags_reset_once(vma, newflags);
     392             :         }
     393           0 : }
     394             : 
     395             : /*
     396             :  * mlock_fixup  - handle mlock[all]/munlock[all] requests.
     397             :  *
     398             :  * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
     399             :  * munlock is a no-op.  However, for some special vmas, we go ahead and
     400             :  * populate the ptes.
     401             :  *
     402             :  * For vmas that pass the filters, merge/split as appropriate.
     403             :  */
     404           0 : static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
     405             :                struct vm_area_struct **prev, unsigned long start,
     406             :                unsigned long end, vm_flags_t newflags)
     407             : {
     408           0 :         struct mm_struct *mm = vma->vm_mm;
     409             :         pgoff_t pgoff;
     410             :         int nr_pages;
     411           0 :         int ret = 0;
     412           0 :         vm_flags_t oldflags = vma->vm_flags;
     413             : 
     414           0 :         if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
     415           0 :             is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
     416           0 :             vma_is_dax(vma) || vma_is_secretmem(vma))
     417             :                 /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
     418             :                 goto out;
     419             : 
     420           0 :         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
     421           0 :         *prev = vma_merge(vmi, mm, *prev, start, end, newflags,
     422             :                         vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
     423             :                         vma->vm_userfaultfd_ctx, anon_vma_name(vma));
     424           0 :         if (*prev) {
     425             :                 vma = *prev;
     426             :                 goto success;
     427             :         }
     428             : 
     429           0 :         if (start != vma->vm_start) {
     430           0 :                 ret = split_vma(vmi, vma, start, 1);
     431           0 :                 if (ret)
     432             :                         goto out;
     433             :         }
     434             : 
     435           0 :         if (end != vma->vm_end) {
     436           0 :                 ret = split_vma(vmi, vma, end, 0);
     437           0 :                 if (ret)
     438             :                         goto out;
     439             :         }
     440             : 
     441             : success:
     442             :         /*
     443             :          * Keep track of amount of locked VM.
     444             :          */
     445           0 :         nr_pages = (end - start) >> PAGE_SHIFT;
     446           0 :         if (!(newflags & VM_LOCKED))
     447           0 :                 nr_pages = -nr_pages;
     448           0 :         else if (oldflags & VM_LOCKED)
     449           0 :                 nr_pages = 0;
     450           0 :         mm->locked_vm += nr_pages;
     451             : 
     452             :         /*
     453             :          * vm_flags is protected by the mmap_lock held in write mode.
     454             :          * It's okay if try_to_unmap_one unmaps a page just after we
     455             :          * set VM_LOCKED, populate_vma_page_range will bring it back.
     456             :          */
     457             : 
     458           0 :         if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
     459             :                 /* No work to do, and mlocking twice would be wrong */
     460             :                 vm_flags_reset(vma, newflags);
     461             :         } else {
     462           0 :                 mlock_vma_pages_range(vma, start, end, newflags);
     463             :         }
     464             : out:
     465           0 :         *prev = vma;
     466           0 :         return ret;
     467             : }
     468             : 
     469           0 : static int apply_vma_lock_flags(unsigned long start, size_t len,
     470             :                                 vm_flags_t flags)
     471             : {
     472             :         unsigned long nstart, end, tmp;
     473             :         struct vm_area_struct *vma, *prev;
     474             :         int error;
     475           0 :         VMA_ITERATOR(vmi, current->mm, start);
     476             : 
     477             :         VM_BUG_ON(offset_in_page(start));
     478             :         VM_BUG_ON(len != PAGE_ALIGN(len));
     479           0 :         end = start + len;
     480           0 :         if (end < start)
     481             :                 return -EINVAL;
     482           0 :         if (end == start)
     483             :                 return 0;
     484           0 :         vma = vma_iter_load(&vmi);
     485           0 :         if (!vma)
     486             :                 return -ENOMEM;
     487             : 
     488           0 :         prev = vma_prev(&vmi);
     489           0 :         if (start > vma->vm_start)
     490           0 :                 prev = vma;
     491             : 
     492           0 :         nstart = start;
     493           0 :         tmp = vma->vm_start;
     494           0 :         for_each_vma_range(vmi, vma, end) {
     495             :                 vm_flags_t newflags;
     496             : 
     497           0 :                 if (vma->vm_start != tmp)
     498             :                         return -ENOMEM;
     499             : 
     500           0 :                 newflags = vma->vm_flags & ~VM_LOCKED_MASK;
     501           0 :                 newflags |= flags;
     502             :                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
     503           0 :                 tmp = vma->vm_end;
     504           0 :                 if (tmp > end)
     505           0 :                         tmp = end;
     506           0 :                 error = mlock_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
     507           0 :                 if (error)
     508             :                         break;
     509             :                 nstart = tmp;
     510             :         }
     511             : 
     512           0 :         if (vma_iter_end(&vmi) < end)
     513             :                 return -ENOMEM;
     514             : 
     515           0 :         return error;
     516             : }
     517             : 
     518             : /*
     519             :  * Go through vma areas and sum size of mlocked
     520             :  * vma pages, as return value.
     521             :  * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
     522             :  * is also counted.
     523             :  * Return value: previously mlocked page counts
     524             :  */
     525           0 : static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
     526             :                 unsigned long start, size_t len)
     527             : {
     528             :         struct vm_area_struct *vma;
     529           0 :         unsigned long count = 0;
     530             :         unsigned long end;
     531           0 :         VMA_ITERATOR(vmi, mm, start);
     532             : 
     533             :         /* Don't overflow past ULONG_MAX */
     534           0 :         if (unlikely(ULONG_MAX - len < start))
     535             :                 end = ULONG_MAX;
     536             :         else
     537           0 :                 end = start + len;
     538             : 
     539           0 :         for_each_vma_range(vmi, vma, end) {
     540           0 :                 if (vma->vm_flags & VM_LOCKED) {
     541           0 :                         if (start > vma->vm_start)
     542           0 :                                 count -= (start - vma->vm_start);
     543           0 :                         if (end < vma->vm_end) {
     544           0 :                                 count += end - vma->vm_start;
     545           0 :                                 break;
     546             :                         }
     547           0 :                         count += vma->vm_end - vma->vm_start;
     548             :                 }
     549             :         }
     550             : 
     551           0 :         return count >> PAGE_SHIFT;
     552             : }
     553             : 
     554             : /*
     555             :  * convert get_user_pages() return value to posix mlock() error
     556             :  */
     557             : static int __mlock_posix_error_return(long retval)
     558             : {
     559           0 :         if (retval == -EFAULT)
     560             :                 retval = -ENOMEM;
     561           0 :         else if (retval == -ENOMEM)
     562           0 :                 retval = -EAGAIN;
     563           0 :         return retval;
     564             : }
     565             : 
     566           0 : static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
     567             : {
     568             :         unsigned long locked;
     569             :         unsigned long lock_limit;
     570           0 :         int error = -ENOMEM;
     571             : 
     572           0 :         start = untagged_addr(start);
     573             : 
     574           0 :         if (!can_do_mlock())
     575             :                 return -EPERM;
     576             : 
     577           0 :         len = PAGE_ALIGN(len + (offset_in_page(start)));
     578           0 :         start &= PAGE_MASK;
     579             : 
     580           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     581           0 :         lock_limit >>= PAGE_SHIFT;
     582           0 :         locked = len >> PAGE_SHIFT;
     583             : 
     584           0 :         if (mmap_write_lock_killable(current->mm))
     585             :                 return -EINTR;
     586             : 
     587           0 :         locked += current->mm->locked_vm;
     588           0 :         if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
     589             :                 /*
     590             :                  * It is possible that the regions requested intersect with
     591             :                  * previously mlocked areas, that part area in "mm->locked_vm"
     592             :                  * should not be counted to new mlock increment count. So check
     593             :                  * and adjust locked count if necessary.
     594             :                  */
     595           0 :                 locked -= count_mm_mlocked_page_nr(current->mm,
     596             :                                 start, len);
     597             :         }
     598             : 
     599             :         /* check against resource limits */
     600           0 :         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
     601           0 :                 error = apply_vma_lock_flags(start, len, flags);
     602             : 
     603           0 :         mmap_write_unlock(current->mm);
     604           0 :         if (error)
     605             :                 return error;
     606             : 
     607           0 :         error = __mm_populate(start, len, 0);
     608           0 :         if (error)
     609           0 :                 return __mlock_posix_error_return(error);
     610             :         return 0;
     611             : }
     612             : 
     613           0 : SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
     614             : {
     615           0 :         return do_mlock(start, len, VM_LOCKED);
     616             : }
     617             : 
     618           0 : SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
     619             : {
     620           0 :         vm_flags_t vm_flags = VM_LOCKED;
     621             : 
     622           0 :         if (flags & ~MLOCK_ONFAULT)
     623             :                 return -EINVAL;
     624             : 
     625           0 :         if (flags & MLOCK_ONFAULT)
     626           0 :                 vm_flags |= VM_LOCKONFAULT;
     627             : 
     628           0 :         return do_mlock(start, len, vm_flags);
     629             : }
     630             : 
     631           0 : SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
     632             : {
     633             :         int ret;
     634             : 
     635           0 :         start = untagged_addr(start);
     636             : 
     637           0 :         len = PAGE_ALIGN(len + (offset_in_page(start)));
     638           0 :         start &= PAGE_MASK;
     639             : 
     640           0 :         if (mmap_write_lock_killable(current->mm))
     641             :                 return -EINTR;
     642           0 :         ret = apply_vma_lock_flags(start, len, 0);
     643           0 :         mmap_write_unlock(current->mm);
     644             : 
     645           0 :         return ret;
     646             : }
     647             : 
     648             : /*
     649             :  * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
     650             :  * and translate into the appropriate modifications to mm->def_flags and/or the
     651             :  * flags for all current VMAs.
     652             :  *
     653             :  * There are a couple of subtleties with this.  If mlockall() is called multiple
     654             :  * times with different flags, the values do not necessarily stack.  If mlockall
     655             :  * is called once including the MCL_FUTURE flag and then a second time without
     656             :  * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
     657             :  */
     658           0 : static int apply_mlockall_flags(int flags)
     659             : {
     660           0 :         VMA_ITERATOR(vmi, current->mm, 0);
     661           0 :         struct vm_area_struct *vma, *prev = NULL;
     662           0 :         vm_flags_t to_add = 0;
     663             : 
     664           0 :         current->mm->def_flags &= ~VM_LOCKED_MASK;
     665           0 :         if (flags & MCL_FUTURE) {
     666           0 :                 current->mm->def_flags |= VM_LOCKED;
     667             : 
     668           0 :                 if (flags & MCL_ONFAULT)
     669           0 :                         current->mm->def_flags |= VM_LOCKONFAULT;
     670             : 
     671           0 :                 if (!(flags & MCL_CURRENT))
     672             :                         goto out;
     673             :         }
     674             : 
     675           0 :         if (flags & MCL_CURRENT) {
     676           0 :                 to_add |= VM_LOCKED;
     677           0 :                 if (flags & MCL_ONFAULT)
     678           0 :                         to_add |= VM_LOCKONFAULT;
     679             :         }
     680             : 
     681           0 :         for_each_vma(vmi, vma) {
     682             :                 vm_flags_t newflags;
     683             : 
     684           0 :                 newflags = vma->vm_flags & ~VM_LOCKED_MASK;
     685           0 :                 newflags |= to_add;
     686             : 
     687             :                 /* Ignore errors */
     688           0 :                 mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end,
     689             :                             newflags);
     690           0 :                 cond_resched();
     691             :         }
     692             : out:
     693           0 :         return 0;
     694             : }
     695             : 
     696           0 : SYSCALL_DEFINE1(mlockall, int, flags)
     697             : {
     698             :         unsigned long lock_limit;
     699             :         int ret;
     700             : 
     701           0 :         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
     702             :             flags == MCL_ONFAULT)
     703             :                 return -EINVAL;
     704             : 
     705           0 :         if (!can_do_mlock())
     706             :                 return -EPERM;
     707             : 
     708           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     709           0 :         lock_limit >>= PAGE_SHIFT;
     710             : 
     711           0 :         if (mmap_write_lock_killable(current->mm))
     712             :                 return -EINTR;
     713             : 
     714           0 :         ret = -ENOMEM;
     715           0 :         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
     716           0 :             capable(CAP_IPC_LOCK))
     717           0 :                 ret = apply_mlockall_flags(flags);
     718           0 :         mmap_write_unlock(current->mm);
     719           0 :         if (!ret && (flags & MCL_CURRENT))
     720           0 :                 mm_populate(0, TASK_SIZE);
     721             : 
     722           0 :         return ret;
     723             : }
     724             : 
     725           0 : SYSCALL_DEFINE0(munlockall)
     726             : {
     727             :         int ret;
     728             : 
     729           0 :         if (mmap_write_lock_killable(current->mm))
     730             :                 return -EINTR;
     731           0 :         ret = apply_mlockall_flags(0);
     732           0 :         mmap_write_unlock(current->mm);
     733           0 :         return ret;
     734             : }
     735             : 
     736             : /*
     737             :  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
     738             :  * shm segments) get accounted against the user_struct instead.
     739             :  */
     740             : static DEFINE_SPINLOCK(shmlock_user_lock);
     741             : 
     742           0 : int user_shm_lock(size_t size, struct ucounts *ucounts)
     743             : {
     744             :         unsigned long lock_limit, locked;
     745             :         long memlock;
     746           0 :         int allowed = 0;
     747             : 
     748           0 :         locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
     749           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     750           0 :         if (lock_limit != RLIM_INFINITY)
     751           0 :                 lock_limit >>= PAGE_SHIFT;
     752           0 :         spin_lock(&shmlock_user_lock);
     753           0 :         memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     754             : 
     755           0 :         if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
     756           0 :                 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     757           0 :                 goto out;
     758             :         }
     759           0 :         if (!get_ucounts(ucounts)) {
     760           0 :                 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     761           0 :                 allowed = 0;
     762           0 :                 goto out;
     763             :         }
     764             :         allowed = 1;
     765             : out:
     766           0 :         spin_unlock(&shmlock_user_lock);
     767           0 :         return allowed;
     768             : }
     769             : 
     770           0 : void user_shm_unlock(size_t size, struct ucounts *ucounts)
     771             : {
     772           0 :         spin_lock(&shmlock_user_lock);
     773           0 :         dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
     774           0 :         spin_unlock(&shmlock_user_lock);
     775           0 :         put_ucounts(ucounts);
     776           0 : }

Generated by: LCOV version 1.14