LCOV - coverage.info

LCOV - code coverage report

Current view:	top level - mm - mlock.c (source / functions)		Hit	Total	Coverage
Test:	coverage.info	Lines:	0	321	0.0 %
Date:	2023-08-24 13:40:31	Functions:	0	27	0.0 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  *      linux/mm/mlock.c
       4             :  *
       5             :  *  (C) Copyright 1995 Linus Torvalds
       6             :  *  (C) Copyright 2002 Christoph Hellwig
       7             :  */
       8             : 
       9             : #include <linux/capability.h>
      10             : #include <linux/mman.h>
      11             : #include <linux/mm.h>
      12             : #include <linux/sched/user.h>
      13             : #include <linux/swap.h>
      14             : #include <linux/swapops.h>
      15             : #include <linux/pagemap.h>
      16             : #include <linux/pagevec.h>
      17             : #include <linux/pagewalk.h>
      18             : #include <linux/mempolicy.h>
      19             : #include <linux/syscalls.h>
      20             : #include <linux/sched.h>
      21             : #include <linux/export.h>
      22             : #include <linux/rmap.h>
      23             : #include <linux/mmzone.h>
      24             : #include <linux/hugetlb.h>
      25             : #include <linux/memcontrol.h>
      26             : #include <linux/mm_inline.h>
      27             : #include <linux/secretmem.h>
      28             : 
      29             : #include "internal.h"
      30             : 
      31             : struct mlock_fbatch {
      32             :         local_lock_t lock;
      33             :         struct folio_batch fbatch;
      34             : };
      35             : 
      36             : static DEFINE_PER_CPU(struct mlock_fbatch, mlock_fbatch) = {
      37             :         .lock = INIT_LOCAL_LOCK(lock),
      38             : };
      39             : 
      40           0 : bool can_do_mlock(void)
      41             : {
      42           0 :         if (rlimit(RLIMIT_MEMLOCK) != 0)
      43             :                 return true;
      44           0 :         if (capable(CAP_IPC_LOCK))
      45             :                 return true;
      46           0 :         return false;
      47             : }
      48             : EXPORT_SYMBOL(can_do_mlock);
      49             : 
      50             : /*
      51             :  * Mlocked folios are marked with the PG_mlocked flag for efficient testing
      52             :  * in vmscan and, possibly, the fault path; and to support semi-accurate
      53             :  * statistics.
      54             :  *
      55             :  * An mlocked folio [folio_test_mlocked(folio)] is unevictable.  As such, it
      56             :  * will be ostensibly placed on the LRU "unevictable" list (actually no such
      57             :  * list exists), rather than the [in]active lists. PG_unevictable is set to
      58             :  * indicate the unevictable state.
      59             :  */
      60             : 
      61           0 : static struct lruvec *__mlock_folio(struct folio *folio, struct lruvec *lruvec)
      62             : {
      63             :         /* There is nothing more we can do while it's off LRU */
      64           0 :         if (!folio_test_clear_lru(folio))
      65             :                 return lruvec;
      66             : 
      67           0 :         lruvec = folio_lruvec_relock_irq(folio, lruvec);
      68             : 
      69           0 :         if (unlikely(folio_evictable(folio))) {
      70             :                 /*
      71             :                  * This is a little surprising, but quite possible: PG_mlocked
      72             :                  * must have got cleared already by another CPU.  Could this
      73             :                  * folio be unevictable?  I'm not sure, but move it now if so.
      74             :                  */
      75           0 :                 if (folio_test_unevictable(folio)) {
      76           0 :                         lruvec_del_folio(lruvec, folio);
      77           0 :                         folio_clear_unevictable(folio);
      78           0 :                         lruvec_add_folio(lruvec, folio);
      79             : 
      80           0 :                         __count_vm_events(UNEVICTABLE_PGRESCUED,
      81             :                                           folio_nr_pages(folio));
      82             :                 }
      83             :                 goto out;
      84             :         }
      85             : 
      86           0 :         if (folio_test_unevictable(folio)) {
      87           0 :                 if (folio_test_mlocked(folio))
      88           0 :                         folio->mlock_count++;
      89             :                 goto out;
      90             :         }
      91             : 
      92           0 :         lruvec_del_folio(lruvec, folio);
      93           0 :         folio_clear_active(folio);
      94           0 :         folio_set_unevictable(folio);
      95           0 :         folio->mlock_count = !!folio_test_mlocked(folio);
      96           0 :         lruvec_add_folio(lruvec, folio);
      97           0 :         __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio));
      98             : out:
      99           0 :         folio_set_lru(folio);
     100           0 :         return lruvec;
     101             : }
     102             : 
     103           0 : static struct lruvec *__mlock_new_folio(struct folio *folio, struct lruvec *lruvec)
     104             : {
     105             :         VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
     106             : 
     107           0 :         lruvec = folio_lruvec_relock_irq(folio, lruvec);
     108             : 
     109             :         /* As above, this is a little surprising, but possible */
     110           0 :         if (unlikely(folio_evictable(folio)))
     111             :                 goto out;
     112             : 
     113           0 :         folio_set_unevictable(folio);
     114           0 :         folio->mlock_count = !!folio_test_mlocked(folio);
     115           0 :         __count_vm_events(UNEVICTABLE_PGCULLED, folio_nr_pages(folio));
     116             : out:
     117           0 :         lruvec_add_folio(lruvec, folio);
     118           0 :         folio_set_lru(folio);
     119           0 :         return lruvec;
     120             : }
     121             : 
     122           0 : static struct lruvec *__munlock_folio(struct folio *folio, struct lruvec *lruvec)
     123             : {
     124           0 :         int nr_pages = folio_nr_pages(folio);
     125           0 :         bool isolated = false;
     126             : 
     127           0 :         if (!folio_test_clear_lru(folio))
     128             :                 goto munlock;
     129             : 
     130           0 :         isolated = true;
     131           0 :         lruvec = folio_lruvec_relock_irq(folio, lruvec);
     132             : 
     133           0 :         if (folio_test_unevictable(folio)) {
     134             :                 /* Then mlock_count is maintained, but might undercount */
     135           0 :                 if (folio->mlock_count)
     136           0 :                         folio->mlock_count--;
     137           0 :                 if (folio->mlock_count)
     138             :                         goto out;
     139             :         }
     140             :         /* else assume that was the last mlock: reclaim will fix it if not */
     141             : 
     142             : munlock:
     143           0 :         if (folio_test_clear_mlocked(folio)) {
     144           0 :                 __zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages);
     145           0 :                 if (isolated || !folio_test_unevictable(folio))
     146           0 :                         __count_vm_events(UNEVICTABLE_PGMUNLOCKED, nr_pages);
     147             :                 else
     148           0 :                         __count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
     149             :         }
     150             : 
     151             :         /* folio_evictable() has to be checked *after* clearing Mlocked */
     152           0 :         if (isolated && folio_test_unevictable(folio) && folio_evictable(folio)) {
     153           0 :                 lruvec_del_folio(lruvec, folio);
     154           0 :                 folio_clear_unevictable(folio);
     155           0 :                 lruvec_add_folio(lruvec, folio);
     156           0 :                 __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
     157             :         }
     158             : out:
     159           0 :         if (isolated)
     160             :                 folio_set_lru(folio);
     161           0 :         return lruvec;
     162             : }
     163             : 
     164             : /*
     165             :  * Flags held in the low bits of a struct folio pointer on the mlock_fbatch.
     166             :  */
     167             : #define LRU_FOLIO 0x1
     168             : #define NEW_FOLIO 0x2
     169             : static inline struct folio *mlock_lru(struct folio *folio)
     170             : {
     171           0 :         return (struct folio *)((unsigned long)folio + LRU_FOLIO);
     172             : }
     173             : 
     174             : static inline struct folio *mlock_new(struct folio *folio)
     175             : {
     176           0 :         return (struct folio *)((unsigned long)folio + NEW_FOLIO);
     177             : }
     178             : 
     179             : /*
     180             :  * mlock_folio_batch() is derived from folio_batch_move_lru(): perhaps that can
     181             :  * make use of such folio pointer flags in future, but for now just keep it for
     182             :  * mlock.  We could use three separate folio batches instead, but one feels
     183             :  * better (munlocking a full folio batch does not need to drain mlocking folio
     184             :  * batches first).
     185             :  */
     186           0 : static void mlock_folio_batch(struct folio_batch *fbatch)
     187             : {
     188           0 :         struct lruvec *lruvec = NULL;
     189             :         unsigned long mlock;
     190             :         struct folio *folio;
     191             :         int i;
     192             : 
     193           0 :         for (i = 0; i < folio_batch_count(fbatch); i++) {
     194           0 :                 folio = fbatch->folios[i];
     195           0 :                 mlock = (unsigned long)folio & (LRU_FOLIO | NEW_FOLIO);
     196           0 :                 folio = (struct folio *)((unsigned long)folio - mlock);
     197           0 :                 fbatch->folios[i] = folio;
     198             : 
     199           0 :                 if (mlock & LRU_FOLIO)
     200           0 :                         lruvec = __mlock_folio(folio, lruvec);
     201           0 :                 else if (mlock & NEW_FOLIO)
     202           0 :                         lruvec = __mlock_new_folio(folio, lruvec);
     203             :                 else
     204           0 :                         lruvec = __munlock_folio(folio, lruvec);
     205             :         }
     206             : 
     207           0 :         if (lruvec)
     208           0 :                 unlock_page_lruvec_irq(lruvec);
     209           0 :         folios_put(fbatch->folios, folio_batch_count(fbatch));
     210           0 :         folio_batch_reinit(fbatch);
     211           0 : }
     212             : 
     213           0 : void mlock_drain_local(void)
     214             : {
     215             :         struct folio_batch *fbatch;
     216             : 
     217           0 :         local_lock(&mlock_fbatch.lock);
     218           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     219           0 :         if (folio_batch_count(fbatch))
     220           0 :                 mlock_folio_batch(fbatch);
     221           0 :         local_unlock(&mlock_fbatch.lock);
     222           0 : }
     223             : 
     224           0 : void mlock_drain_remote(int cpu)
     225             : {
     226             :         struct folio_batch *fbatch;
     227             : 
     228           0 :         WARN_ON_ONCE(cpu_online(cpu));
     229           0 :         fbatch = &per_cpu(mlock_fbatch.fbatch, cpu);
     230           0 :         if (folio_batch_count(fbatch))
     231           0 :                 mlock_folio_batch(fbatch);
     232           0 : }
     233             : 
     234           0 : bool need_mlock_drain(int cpu)
     235             : {
     236           0 :         return folio_batch_count(&per_cpu(mlock_fbatch.fbatch, cpu));
     237             : }
     238             : 
     239             : /**
     240             :  * mlock_folio - mlock a folio already on (or temporarily off) LRU
     241             :  * @folio: folio to be mlocked.
     242             :  */
     243           0 : void mlock_folio(struct folio *folio)
     244             : {
     245             :         struct folio_batch *fbatch;
     246             : 
     247           0 :         local_lock(&mlock_fbatch.lock);
     248           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     249             : 
     250           0 :         if (!folio_test_set_mlocked(folio)) {
     251           0 :                 int nr_pages = folio_nr_pages(folio);
     252             : 
     253           0 :                 zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
     254           0 :                 __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
     255             :         }
     256             : 
     257           0 :         folio_get(folio);
     258           0 :         if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
     259           0 :             folio_test_large(folio) || lru_cache_disabled())
     260           0 :                 mlock_folio_batch(fbatch);
     261           0 :         local_unlock(&mlock_fbatch.lock);
     262           0 : }
     263             : 
     264             : /**
     265             :  * mlock_new_folio - mlock a newly allocated folio not yet on LRU
     266             :  * @folio: folio to be mlocked, either normal or a THP head.
     267             :  */
     268           0 : void mlock_new_folio(struct folio *folio)
     269             : {
     270             :         struct folio_batch *fbatch;
     271           0 :         int nr_pages = folio_nr_pages(folio);
     272             : 
     273           0 :         local_lock(&mlock_fbatch.lock);
     274           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     275           0 :         folio_set_mlocked(folio);
     276             : 
     277           0 :         zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
     278           0 :         __count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
     279             : 
     280           0 :         folio_get(folio);
     281           0 :         if (!folio_batch_add(fbatch, mlock_new(folio)) ||
     282           0 :             folio_test_large(folio) || lru_cache_disabled())
     283           0 :                 mlock_folio_batch(fbatch);
     284           0 :         local_unlock(&mlock_fbatch.lock);
     285           0 : }
     286             : 
     287             : /**
     288             :  * munlock_folio - munlock a folio
     289             :  * @folio: folio to be munlocked, either normal or a THP head.
     290             :  */
     291           0 : void munlock_folio(struct folio *folio)
     292             : {
     293             :         struct folio_batch *fbatch;
     294             : 
     295           0 :         local_lock(&mlock_fbatch.lock);
     296           0 :         fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
     297             :         /*
     298             :          * folio_test_clear_mlocked(folio) must be left to __munlock_folio(),
     299             :          * which will check whether the folio is multiply mlocked.
     300             :          */
     301           0 :         folio_get(folio);
     302           0 :         if (!folio_batch_add(fbatch, folio) ||
     303           0 :             folio_test_large(folio) || lru_cache_disabled())
     304           0 :                 mlock_folio_batch(fbatch);
     305           0 :         local_unlock(&mlock_fbatch.lock);
     306           0 : }
     307             : 
     308           0 : static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
     309             :                            unsigned long end, struct mm_walk *walk)
     310             : 
     311             : {
     312           0 :         struct vm_area_struct *vma = walk->vma;
     313             :         spinlock_t *ptl;
     314             :         pte_t *start_pte, *pte;
     315             :         pte_t ptent;
     316             :         struct folio *folio;
     317             : 
     318           0 :         ptl = pmd_trans_huge_lock(pmd, vma);
     319             :         if (ptl) {
     320             :                 if (!pmd_present(*pmd))
     321             :                         goto out;
     322             :                 if (is_huge_zero_pmd(*pmd))
     323             :                         goto out;
     324             :                 folio = page_folio(pmd_page(*pmd));
     325             :                 if (vma->vm_flags & VM_LOCKED)
     326             :                         mlock_folio(folio);
     327             :                 else
     328             :                         munlock_folio(folio);
     329             :                 goto out;
     330             :         }
     331             : 
     332           0 :         start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
     333           0 :         if (!start_pte) {
     334           0 :                 walk->action = ACTION_AGAIN;
     335           0 :                 return 0;
     336             :         }
     337           0 :         for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
     338           0 :                 ptent = ptep_get(pte);
     339           0 :                 if (!pte_present(ptent))
     340           0 :                         continue;
     341           0 :                 folio = vm_normal_folio(vma, addr, ptent);
     342           0 :                 if (!folio || folio_is_zone_device(folio))
     343           0 :                         continue;
     344           0 :                 if (folio_test_large(folio))
     345           0 :                         continue;
     346           0 :                 if (vma->vm_flags & VM_LOCKED)
     347           0 :                         mlock_folio(folio);
     348             :                 else
     349           0 :                         munlock_folio(folio);
     350             :         }
     351             :         pte_unmap(start_pte);
     352             : out:
     353           0 :         spin_unlock(ptl);
     354           0 :         cond_resched();
     355           0 :         return 0;
     356             : }
     357             : 
     358             : /*
     359             :  * mlock_vma_pages_range() - mlock any pages already in the range,
     360             :  *                           or munlock all pages in the range.
     361             :  * @vma - vma containing range to be mlock()ed or munlock()ed
     362             :  * @start - start address in @vma of the range
     363             :  * @end - end of range in @vma
     364             :  * @newflags - the new set of flags for @vma.
     365             :  *
     366             :  * Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
     367             :  * called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
     368             :  */
     369           0 : static void mlock_vma_pages_range(struct vm_area_struct *vma,
     370             :         unsigned long start, unsigned long end, vm_flags_t newflags)
     371             : {
     372             :         static const struct mm_walk_ops mlock_walk_ops = {
     373             :                 .pmd_entry = mlock_pte_range,
     374             :         };
     375             : 
     376             :         /*
     377             :          * There is a slight chance that concurrent page migration,
     378             :          * or page reclaim finding a page of this now-VM_LOCKED vma,
     379             :          * will call mlock_vma_folio() and raise page's mlock_count:
     380             :          * double counting, leaving the page unevictable indefinitely.
     381             :          * Communicate this danger to mlock_vma_folio() with VM_IO,
     382             :          * which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas.
     383             :          * mmap_lock is held in write mode here, so this weird
     384             :          * combination should not be visible to other mmap_lock users;
     385             :          * but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
     386             :          */
     387           0 :         if (newflags & VM_LOCKED)
     388           0 :                 newflags |= VM_IO;
     389           0 :         vm_flags_reset_once(vma, newflags);
     390             : 
     391           0 :         lru_add_drain();
     392           0 :         walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL);
     393           0 :         lru_add_drain();
     394             : 
     395           0 :         if (newflags & VM_IO) {
     396           0 :                 newflags &= ~VM_IO;
     397             :                 vm_flags_reset_once(vma, newflags);
     398             :         }
     399           0 : }
     400             : 
     401             : /*
     402             :  * mlock_fixup  - handle mlock[all]/munlock[all] requests.
     403             :  *
     404             :  * Filters out "special" vmas -- VM_LOCKED never gets set for these, and
     405             :  * munlock is a no-op.  However, for some special vmas, we go ahead and
     406             :  * populate the ptes.
     407             :  *
     408             :  * For vmas that pass the filters, merge/split as appropriate.
     409             :  */
     410           0 : static int mlock_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
     411             :                struct vm_area_struct **prev, unsigned long start,
     412             :                unsigned long end, vm_flags_t newflags)
     413             : {
     414           0 :         struct mm_struct *mm = vma->vm_mm;
     415             :         pgoff_t pgoff;
     416             :         int nr_pages;
     417           0 :         int ret = 0;
     418           0 :         vm_flags_t oldflags = vma->vm_flags;
     419             : 
     420           0 :         if (newflags == oldflags || (oldflags & VM_SPECIAL) ||
     421           0 :             is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) ||
     422           0 :             vma_is_dax(vma) || vma_is_secretmem(vma))
     423             :                 /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
     424             :                 goto out;
     425             : 
     426           0 :         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
     427           0 :         *prev = vma_merge(vmi, mm, *prev, start, end, newflags,
     428             :                         vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
     429             :                         vma->vm_userfaultfd_ctx, anon_vma_name(vma));
     430           0 :         if (*prev) {
     431             :                 vma = *prev;
     432             :                 goto success;
     433             :         }
     434             : 
     435           0 :         if (start != vma->vm_start) {
     436           0 :                 ret = split_vma(vmi, vma, start, 1);
     437           0 :                 if (ret)
     438             :                         goto out;
     439             :         }
     440             : 
     441           0 :         if (end != vma->vm_end) {
     442           0 :                 ret = split_vma(vmi, vma, end, 0);
     443           0 :                 if (ret)
     444             :                         goto out;
     445             :         }
     446             : 
     447             : success:
     448             :         /*
     449             :          * Keep track of amount of locked VM.
     450             :          */
     451           0 :         nr_pages = (end - start) >> PAGE_SHIFT;
     452           0 :         if (!(newflags & VM_LOCKED))
     453           0 :                 nr_pages = -nr_pages;
     454           0 :         else if (oldflags & VM_LOCKED)
     455           0 :                 nr_pages = 0;
     456           0 :         mm->locked_vm += nr_pages;
     457             : 
     458             :         /*
     459             :          * vm_flags is protected by the mmap_lock held in write mode.
     460             :          * It's okay if try_to_unmap_one unmaps a page just after we
     461             :          * set VM_LOCKED, populate_vma_page_range will bring it back.
     462             :          */
     463             : 
     464           0 :         if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
     465             :                 /* No work to do, and mlocking twice would be wrong */
     466             :                 vm_flags_reset(vma, newflags);
     467             :         } else {
     468           0 :                 mlock_vma_pages_range(vma, start, end, newflags);
     469             :         }
     470             : out:
     471           0 :         *prev = vma;
     472           0 :         return ret;
     473             : }
     474             : 
     475           0 : static int apply_vma_lock_flags(unsigned long start, size_t len,
     476             :                                 vm_flags_t flags)
     477             : {
     478             :         unsigned long nstart, end, tmp;
     479             :         struct vm_area_struct *vma, *prev;
     480             :         int error;
     481           0 :         VMA_ITERATOR(vmi, current->mm, start);
     482             : 
     483             :         VM_BUG_ON(offset_in_page(start));
     484             :         VM_BUG_ON(len != PAGE_ALIGN(len));
     485           0 :         end = start + len;
     486           0 :         if (end < start)
     487             :                 return -EINVAL;
     488           0 :         if (end == start)
     489             :                 return 0;
     490           0 :         vma = vma_iter_load(&vmi);
     491           0 :         if (!vma)
     492             :                 return -ENOMEM;
     493             : 
     494           0 :         prev = vma_prev(&vmi);
     495           0 :         if (start > vma->vm_start)
     496           0 :                 prev = vma;
     497             : 
     498           0 :         nstart = start;
     499           0 :         tmp = vma->vm_start;
     500           0 :         for_each_vma_range(vmi, vma, end) {
     501             :                 vm_flags_t newflags;
     502             : 
     503           0 :                 if (vma->vm_start != tmp)
     504             :                         return -ENOMEM;
     505             : 
     506           0 :                 newflags = vma->vm_flags & ~VM_LOCKED_MASK;
     507           0 :                 newflags |= flags;
     508             :                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
     509           0 :                 tmp = vma->vm_end;
     510           0 :                 if (tmp > end)
     511           0 :                         tmp = end;
     512           0 :                 error = mlock_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
     513           0 :                 if (error)
     514             :                         break;
     515             :                 nstart = tmp;
     516             :         }
     517             : 
     518           0 :         if (vma_iter_end(&vmi) < end)
     519             :                 return -ENOMEM;
     520             : 
     521           0 :         return error;
     522             : }
     523             : 
     524             : /*
     525             :  * Go through vma areas and sum size of mlocked
     526             :  * vma pages, as return value.
     527             :  * Note deferred memory locking case(mlock2(,,MLOCK_ONFAULT)
     528             :  * is also counted.
     529             :  * Return value: previously mlocked page counts
     530             :  */
     531           0 : static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
     532             :                 unsigned long start, size_t len)
     533             : {
     534             :         struct vm_area_struct *vma;
     535           0 :         unsigned long count = 0;
     536             :         unsigned long end;
     537           0 :         VMA_ITERATOR(vmi, mm, start);
     538             : 
     539             :         /* Don't overflow past ULONG_MAX */
     540           0 :         if (unlikely(ULONG_MAX - len < start))
     541             :                 end = ULONG_MAX;
     542             :         else
     543           0 :                 end = start + len;
     544             : 
     545           0 :         for_each_vma_range(vmi, vma, end) {
     546           0 :                 if (vma->vm_flags & VM_LOCKED) {
     547           0 :                         if (start > vma->vm_start)
     548           0 :                                 count -= (start - vma->vm_start);
     549           0 :                         if (end < vma->vm_end) {
     550           0 :                                 count += end - vma->vm_start;
     551           0 :                                 break;
     552             :                         }
     553           0 :                         count += vma->vm_end - vma->vm_start;
     554             :                 }
     555             :         }
     556             : 
     557           0 :         return count >> PAGE_SHIFT;
     558             : }
     559             : 
     560             : /*
     561             :  * convert get_user_pages() return value to posix mlock() error
     562             :  */
     563             : static int __mlock_posix_error_return(long retval)
     564             : {
     565           0 :         if (retval == -EFAULT)
     566             :                 retval = -ENOMEM;
     567           0 :         else if (retval == -ENOMEM)
     568           0 :                 retval = -EAGAIN;
     569           0 :         return retval;
     570             : }
     571             : 
     572           0 : static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags)
     573             : {
     574             :         unsigned long locked;
     575             :         unsigned long lock_limit;
     576           0 :         int error = -ENOMEM;
     577             : 
     578           0 :         start = untagged_addr(start);
     579             : 
     580           0 :         if (!can_do_mlock())
     581             :                 return -EPERM;
     582             : 
     583           0 :         len = PAGE_ALIGN(len + (offset_in_page(start)));
     584           0 :         start &= PAGE_MASK;
     585             : 
     586           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     587           0 :         lock_limit >>= PAGE_SHIFT;
     588           0 :         locked = len >> PAGE_SHIFT;
     589             : 
     590           0 :         if (mmap_write_lock_killable(current->mm))
     591             :                 return -EINTR;
     592             : 
     593           0 :         locked += current->mm->locked_vm;
     594           0 :         if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) {
     595             :                 /*
     596             :                  * It is possible that the regions requested intersect with
     597             :                  * previously mlocked areas, that part area in "mm->locked_vm"
     598             :                  * should not be counted to new mlock increment count. So check
     599             :                  * and adjust locked count if necessary.
     600             :                  */
     601           0 :                 locked -= count_mm_mlocked_page_nr(current->mm,
     602             :                                 start, len);
     603             :         }
     604             : 
     605             :         /* check against resource limits */
     606           0 :         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
     607           0 :                 error = apply_vma_lock_flags(start, len, flags);
     608             : 
     609           0 :         mmap_write_unlock(current->mm);
     610           0 :         if (error)
     611             :                 return error;
     612             : 
     613           0 :         error = __mm_populate(start, len, 0);
     614           0 :         if (error)
     615           0 :                 return __mlock_posix_error_return(error);
     616             :         return 0;
     617             : }
     618             : 
     619           0 : SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
     620             : {
     621           0 :         return do_mlock(start, len, VM_LOCKED);
     622             : }
     623             : 
     624           0 : SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags)
     625             : {
     626           0 :         vm_flags_t vm_flags = VM_LOCKED;
     627             : 
     628           0 :         if (flags & ~MLOCK_ONFAULT)
     629             :                 return -EINVAL;
     630             : 
     631           0 :         if (flags & MLOCK_ONFAULT)
     632           0 :                 vm_flags |= VM_LOCKONFAULT;
     633             : 
     634           0 :         return do_mlock(start, len, vm_flags);
     635             : }
     636             : 
     637           0 : SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
     638             : {
     639             :         int ret;
     640             : 
     641           0 :         start = untagged_addr(start);
     642             : 
     643           0 :         len = PAGE_ALIGN(len + (offset_in_page(start)));
     644           0 :         start &= PAGE_MASK;
     645             : 
     646           0 :         if (mmap_write_lock_killable(current->mm))
     647             :                 return -EINTR;
     648           0 :         ret = apply_vma_lock_flags(start, len, 0);
     649           0 :         mmap_write_unlock(current->mm);
     650             : 
     651           0 :         return ret;
     652             : }
     653             : 
     654             : /*
     655             :  * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall)
     656             :  * and translate into the appropriate modifications to mm->def_flags and/or the
     657             :  * flags for all current VMAs.
     658             :  *
     659             :  * There are a couple of subtleties with this.  If mlockall() is called multiple
     660             :  * times with different flags, the values do not necessarily stack.  If mlockall
     661             :  * is called once including the MCL_FUTURE flag and then a second time without
     662             :  * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags.
     663             :  */
     664           0 : static int apply_mlockall_flags(int flags)
     665             : {
     666           0 :         VMA_ITERATOR(vmi, current->mm, 0);
     667           0 :         struct vm_area_struct *vma, *prev = NULL;
     668           0 :         vm_flags_t to_add = 0;
     669             : 
     670           0 :         current->mm->def_flags &= ~VM_LOCKED_MASK;
     671           0 :         if (flags & MCL_FUTURE) {
     672           0 :                 current->mm->def_flags |= VM_LOCKED;
     673             : 
     674           0 :                 if (flags & MCL_ONFAULT)
     675           0 :                         current->mm->def_flags |= VM_LOCKONFAULT;
     676             : 
     677           0 :                 if (!(flags & MCL_CURRENT))
     678             :                         goto out;
     679             :         }
     680             : 
     681           0 :         if (flags & MCL_CURRENT) {
     682           0 :                 to_add |= VM_LOCKED;
     683           0 :                 if (flags & MCL_ONFAULT)
     684           0 :                         to_add |= VM_LOCKONFAULT;
     685             :         }
     686             : 
     687           0 :         for_each_vma(vmi, vma) {
     688             :                 vm_flags_t newflags;
     689             : 
     690           0 :                 newflags = vma->vm_flags & ~VM_LOCKED_MASK;
     691           0 :                 newflags |= to_add;
     692             : 
     693             :                 /* Ignore errors */
     694           0 :                 mlock_fixup(&vmi, vma, &prev, vma->vm_start, vma->vm_end,
     695             :                             newflags);
     696           0 :                 cond_resched();
     697             :         }
     698             : out:
     699           0 :         return 0;
     700             : }
     701             : 
     702           0 : SYSCALL_DEFINE1(mlockall, int, flags)
     703             : {
     704             :         unsigned long lock_limit;
     705             :         int ret;
     706             : 
     707           0 :         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) ||
     708             :             flags == MCL_ONFAULT)
     709             :                 return -EINVAL;
     710             : 
     711           0 :         if (!can_do_mlock())
     712             :                 return -EPERM;
     713             : 
     714           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     715           0 :         lock_limit >>= PAGE_SHIFT;
     716             : 
     717           0 :         if (mmap_write_lock_killable(current->mm))
     718             :                 return -EINTR;
     719             : 
     720           0 :         ret = -ENOMEM;
     721           0 :         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
     722           0 :             capable(CAP_IPC_LOCK))
     723           0 :                 ret = apply_mlockall_flags(flags);
     724           0 :         mmap_write_unlock(current->mm);
     725           0 :         if (!ret && (flags & MCL_CURRENT))
     726           0 :                 mm_populate(0, TASK_SIZE);
     727             : 
     728           0 :         return ret;
     729             : }
     730             : 
     731           0 : SYSCALL_DEFINE0(munlockall)
     732             : {
     733             :         int ret;
     734             : 
     735           0 :         if (mmap_write_lock_killable(current->mm))
     736             :                 return -EINTR;
     737           0 :         ret = apply_mlockall_flags(0);
     738           0 :         mmap_write_unlock(current->mm);
     739           0 :         return ret;
     740             : }
     741             : 
     742             : /*
     743             :  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
     744             :  * shm segments) get accounted against the user_struct instead.
     745             :  */
     746             : static DEFINE_SPINLOCK(shmlock_user_lock);
     747             : 
     748           0 : int user_shm_lock(size_t size, struct ucounts *ucounts)
     749             : {
     750             :         unsigned long lock_limit, locked;
     751             :         long memlock;
     752           0 :         int allowed = 0;
     753             : 
     754           0 :         locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
     755           0 :         lock_limit = rlimit(RLIMIT_MEMLOCK);
     756           0 :         if (lock_limit != RLIM_INFINITY)
     757           0 :                 lock_limit >>= PAGE_SHIFT;
     758           0 :         spin_lock(&shmlock_user_lock);
     759           0 :         memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     760             : 
     761           0 :         if ((memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) {
     762           0 :                 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     763           0 :                 goto out;
     764             :         }
     765           0 :         if (!get_ucounts(ucounts)) {
     766           0 :                 dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked);
     767           0 :                 allowed = 0;
     768           0 :                 goto out;
     769             :         }
     770             :         allowed = 1;
     771             : out:
     772           0 :         spin_unlock(&shmlock_user_lock);
     773           0 :         return allowed;
     774             : }
     775             : 
     776           0 : void user_shm_unlock(size_t size, struct ucounts *ucounts)
     777             : {
     778           0 :         spin_lock(&shmlock_user_lock);
     779           0 :         dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
     780           0 :         spin_unlock(&shmlock_user_lock);
     781           0 :         put_ucounts(ucounts);
     782           0 : }

Generated by: LCOV version 1.14