LCOV - code coverage report
Current view: top level - lib - iov_iter.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 666 0.0 %
Date: 2023-08-24 13:40:31 Functions: 0 59 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : #include <crypto/hash.h>
       3             : #include <linux/export.h>
       4             : #include <linux/bvec.h>
       5             : #include <linux/fault-inject-usercopy.h>
       6             : #include <linux/uio.h>
       7             : #include <linux/pagemap.h>
       8             : #include <linux/highmem.h>
       9             : #include <linux/slab.h>
      10             : #include <linux/vmalloc.h>
      11             : #include <linux/splice.h>
      12             : #include <linux/compat.h>
      13             : #include <net/checksum.h>
      14             : #include <linux/scatterlist.h>
      15             : #include <linux/instrumented.h>
      16             : 
      17             : /* covers ubuf and kbuf alike */
      18             : #define iterate_buf(i, n, base, len, off, __p, STEP) {          \
      19             :         size_t __maybe_unused off = 0;                          \
      20             :         len = n;                                                \
      21             :         base = __p + i->iov_offset;                          \
      22             :         len -= (STEP);                                          \
      23             :         i->iov_offset += len;                                        \
      24             :         n = len;                                                \
      25             : }
      26             : 
      27             : /* covers iovec and kvec alike */
      28             : #define iterate_iovec(i, n, base, len, off, __p, STEP) {        \
      29             :         size_t off = 0;                                         \
      30             :         size_t skip = i->iov_offset;                         \
      31             :         do {                                                    \
      32             :                 len = min(n, __p->iov_len - skip);           \
      33             :                 if (likely(len)) {                              \
      34             :                         base = __p->iov_base + skip;         \
      35             :                         len -= (STEP);                          \
      36             :                         off += len;                             \
      37             :                         skip += len;                            \
      38             :                         n -= len;                               \
      39             :                         if (skip < __p->iov_len)          \
      40             :                                 break;                          \
      41             :                 }                                               \
      42             :                 __p++;                                          \
      43             :                 skip = 0;                                       \
      44             :         } while (n);                                            \
      45             :         i->iov_offset = skip;                                        \
      46             :         n = off;                                                \
      47             : }
      48             : 
      49             : #define iterate_bvec(i, n, base, len, off, p, STEP) {           \
      50             :         size_t off = 0;                                         \
      51             :         unsigned skip = i->iov_offset;                               \
      52             :         while (n) {                                             \
      53             :                 unsigned offset = p->bv_offset + skip;               \
      54             :                 unsigned left;                                  \
      55             :                 void *kaddr = kmap_local_page(p->bv_page +   \
      56             :                                         offset / PAGE_SIZE);    \
      57             :                 base = kaddr + offset % PAGE_SIZE;              \
      58             :                 len = min(min(n, (size_t)(p->bv_len - skip)),        \
      59             :                      (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
      60             :                 left = (STEP);                                  \
      61             :                 kunmap_local(kaddr);                            \
      62             :                 len -= left;                                    \
      63             :                 off += len;                                     \
      64             :                 skip += len;                                    \
      65             :                 if (skip == p->bv_len) {                     \
      66             :                         skip = 0;                               \
      67             :                         p++;                                    \
      68             :                 }                                               \
      69             :                 n -= len;                                       \
      70             :                 if (left)                                       \
      71             :                         break;                                  \
      72             :         }                                                       \
      73             :         i->iov_offset = skip;                                        \
      74             :         n = off;                                                \
      75             : }
      76             : 
      77             : #define iterate_xarray(i, n, base, len, __off, STEP) {          \
      78             :         __label__ __out;                                        \
      79             :         size_t __off = 0;                                       \
      80             :         struct folio *folio;                                    \
      81             :         loff_t start = i->xarray_start + i->iov_offset;           \
      82             :         pgoff_t index = start / PAGE_SIZE;                      \
      83             :         XA_STATE(xas, i->xarray, index);                     \
      84             :                                                                 \
      85             :         len = PAGE_SIZE - offset_in_page(start);                \
      86             :         rcu_read_lock();                                        \
      87             :         xas_for_each(&xas, folio, ULONG_MAX) {                      \
      88             :                 unsigned left;                                  \
      89             :                 size_t offset;                                  \
      90             :                 if (xas_retry(&xas, folio))                 \
      91             :                         continue;                               \
      92             :                 if (WARN_ON(xa_is_value(folio)))                \
      93             :                         break;                                  \
      94             :                 if (WARN_ON(folio_test_hugetlb(folio)))         \
      95             :                         break;                                  \
      96             :                 offset = offset_in_folio(folio, start + __off); \
      97             :                 while (offset < folio_size(folio)) {         \
      98             :                         base = kmap_local_folio(folio, offset); \
      99             :                         len = min(n, len);                      \
     100             :                         left = (STEP);                          \
     101             :                         kunmap_local(base);                     \
     102             :                         len -= left;                            \
     103             :                         __off += len;                           \
     104             :                         n -= len;                               \
     105             :                         if (left || n == 0)                     \
     106             :                                 goto __out;                     \
     107             :                         offset += len;                          \
     108             :                         len = PAGE_SIZE;                        \
     109             :                 }                                               \
     110             :         }                                                       \
     111             : __out:                                                          \
     112             :         rcu_read_unlock();                                      \
     113             :         i->iov_offset += __off;                                      \
     114             :         n = __off;                                              \
     115             : }
     116             : 
     117             : #define __iterate_and_advance(i, n, base, len, off, I, K) {     \
     118             :         if (unlikely(i->count < n))                               \
     119             :                 n = i->count;                                        \
     120             :         if (likely(n)) {                                        \
     121             :                 if (likely(iter_is_ubuf(i))) {                  \
     122             :                         void __user *base;                      \
     123             :                         size_t len;                             \
     124             :                         iterate_buf(i, n, base, len, off,       \
     125             :                                                 i->ubuf, (I))        \
     126             :                 } else if (likely(iter_is_iovec(i))) {          \
     127             :                         const struct iovec *iov = iter_iov(i);  \
     128             :                         void __user *base;                      \
     129             :                         size_t len;                             \
     130             :                         iterate_iovec(i, n, base, len, off,     \
     131             :                                                 iov, (I))       \
     132             :                         i->nr_segs -= iov - iter_iov(i);     \
     133             :                         i->__iov = iov;                              \
     134             :                 } else if (iov_iter_is_bvec(i)) {               \
     135             :                         const struct bio_vec *bvec = i->bvec;        \
     136             :                         void *base;                             \
     137             :                         size_t len;                             \
     138             :                         iterate_bvec(i, n, base, len, off,      \
     139             :                                                 bvec, (K))      \
     140             :                         i->nr_segs -= bvec - i->bvec;             \
     141             :                         i->bvec = bvec;                              \
     142             :                 } else if (iov_iter_is_kvec(i)) {               \
     143             :                         const struct kvec *kvec = i->kvec;   \
     144             :                         void *base;                             \
     145             :                         size_t len;                             \
     146             :                         iterate_iovec(i, n, base, len, off,     \
     147             :                                                 kvec, (K))      \
     148             :                         i->nr_segs -= kvec - i->kvec;             \
     149             :                         i->kvec = kvec;                              \
     150             :                 } else if (iov_iter_is_xarray(i)) {             \
     151             :                         void *base;                             \
     152             :                         size_t len;                             \
     153             :                         iterate_xarray(i, n, base, len, off,    \
     154             :                                                         (K))    \
     155             :                 }                                               \
     156             :                 i->count -= n;                                       \
     157             :         }                                                       \
     158             : }
     159             : #define iterate_and_advance(i, n, base, len, off, I, K) \
     160             :         __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
     161             : 
     162           0 : static int copyout(void __user *to, const void *from, size_t n)
     163             : {
     164             :         if (should_fail_usercopy())
     165             :                 return n;
     166           0 :         if (access_ok(to, n)) {
     167           0 :                 instrument_copy_to_user(to, from, n);
     168           0 :                 n = raw_copy_to_user(to, from, n);
     169             :         }
     170           0 :         return n;
     171             : }
     172             : 
     173             : static int copyout_nofault(void __user *to, const void *from, size_t n)
     174             : {
     175             :         long res;
     176             : 
     177             :         if (should_fail_usercopy())
     178             :                 return n;
     179             : 
     180           0 :         res = copy_to_user_nofault(to, from, n);
     181             : 
     182           0 :         return res < 0 ? n : res;
     183             : }
     184             : 
     185           0 : static int copyin(void *to, const void __user *from, size_t n)
     186             : {
     187           0 :         size_t res = n;
     188             : 
     189             :         if (should_fail_usercopy())
     190             :                 return n;
     191           0 :         if (access_ok(from, n)) {
     192           0 :                 instrument_copy_from_user_before(to, from, n);
     193           0 :                 res = raw_copy_from_user(to, from, n);
     194             :                 instrument_copy_from_user_after(to, from, n, res);
     195             :         }
     196           0 :         return res;
     197             : }
     198             : 
     199             : /*
     200             :  * fault_in_iov_iter_readable - fault in iov iterator for reading
     201             :  * @i: iterator
     202             :  * @size: maximum length
     203             :  *
     204             :  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
     205             :  * @size.  For each iovec, fault in each page that constitutes the iovec.
     206             :  *
     207             :  * Returns the number of bytes not faulted in (like copy_to_user() and
     208             :  * copy_from_user()).
     209             :  *
     210             :  * Always returns 0 for non-userspace iterators.
     211             :  */
     212           0 : size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
     213             : {
     214           0 :         if (iter_is_ubuf(i)) {
     215           0 :                 size_t n = min(size, iov_iter_count(i));
     216           0 :                 n -= fault_in_readable(i->ubuf + i->iov_offset, n);
     217           0 :                 return size - n;
     218           0 :         } else if (iter_is_iovec(i)) {
     219           0 :                 size_t count = min(size, iov_iter_count(i));
     220             :                 const struct iovec *p;
     221             :                 size_t skip;
     222             : 
     223           0 :                 size -= count;
     224           0 :                 for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
     225           0 :                         size_t len = min(count, p->iov_len - skip);
     226             :                         size_t ret;
     227             : 
     228           0 :                         if (unlikely(!len))
     229           0 :                                 continue;
     230           0 :                         ret = fault_in_readable(p->iov_base + skip, len);
     231           0 :                         count -= len - ret;
     232           0 :                         if (ret)
     233             :                                 break;
     234             :                 }
     235           0 :                 return count + size;
     236             :         }
     237             :         return 0;
     238             : }
     239             : EXPORT_SYMBOL(fault_in_iov_iter_readable);
     240             : 
     241             : /*
     242             :  * fault_in_iov_iter_writeable - fault in iov iterator for writing
     243             :  * @i: iterator
     244             :  * @size: maximum length
     245             :  *
     246             :  * Faults in the iterator using get_user_pages(), i.e., without triggering
     247             :  * hardware page faults.  This is primarily useful when we already know that
     248             :  * some or all of the pages in @i aren't in memory.
     249             :  *
     250             :  * Returns the number of bytes not faulted in, like copy_to_user() and
     251             :  * copy_from_user().
     252             :  *
     253             :  * Always returns 0 for non-user-space iterators.
     254             :  */
     255           0 : size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
     256             : {
     257           0 :         if (iter_is_ubuf(i)) {
     258           0 :                 size_t n = min(size, iov_iter_count(i));
     259           0 :                 n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
     260           0 :                 return size - n;
     261           0 :         } else if (iter_is_iovec(i)) {
     262           0 :                 size_t count = min(size, iov_iter_count(i));
     263             :                 const struct iovec *p;
     264             :                 size_t skip;
     265             : 
     266           0 :                 size -= count;
     267           0 :                 for (p = iter_iov(i), skip = i->iov_offset; count; p++, skip = 0) {
     268           0 :                         size_t len = min(count, p->iov_len - skip);
     269             :                         size_t ret;
     270             : 
     271           0 :                         if (unlikely(!len))
     272           0 :                                 continue;
     273           0 :                         ret = fault_in_safe_writeable(p->iov_base + skip, len);
     274           0 :                         count -= len - ret;
     275           0 :                         if (ret)
     276             :                                 break;
     277             :                 }
     278           0 :                 return count + size;
     279             :         }
     280             :         return 0;
     281             : }
     282             : EXPORT_SYMBOL(fault_in_iov_iter_writeable);
     283             : 
     284           0 : void iov_iter_init(struct iov_iter *i, unsigned int direction,
     285             :                         const struct iovec *iov, unsigned long nr_segs,
     286             :                         size_t count)
     287             : {
     288           0 :         WARN_ON(direction & ~(READ | WRITE));
     289           0 :         *i = (struct iov_iter) {
     290             :                 .iter_type = ITER_IOVEC,
     291             :                 .copy_mc = false,
     292             :                 .nofault = false,
     293             :                 .user_backed = true,
     294             :                 .data_source = direction,
     295             :                 .__iov = iov,
     296             :                 .nr_segs = nr_segs,
     297             :                 .iov_offset = 0,
     298             :                 .count = count
     299             :         };
     300           0 : }
     301             : EXPORT_SYMBOL(iov_iter_init);
     302             : 
     303           0 : static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
     304             :                               __wsum sum, size_t off)
     305             : {
     306           0 :         __wsum next = csum_partial_copy_nocheck(from, to, len);
     307           0 :         return csum_block_add(sum, next, off);
     308             : }
     309             : 
     310           0 : size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
     311             : {
     312           0 :         if (WARN_ON_ONCE(i->data_source))
     313             :                 return 0;
     314           0 :         if (user_backed_iter(i))
     315             :                 might_fault();
     316           0 :         iterate_and_advance(i, bytes, base, len, off,
     317             :                 copyout(base, addr + off, len),
     318             :                 memcpy(base, addr + off, len)
     319             :         )
     320             : 
     321             :         return bytes;
     322             : }
     323             : EXPORT_SYMBOL(_copy_to_iter);
     324             : 
     325             : #ifdef CONFIG_ARCH_HAS_COPY_MC
     326             : static int copyout_mc(void __user *to, const void *from, size_t n)
     327             : {
     328             :         if (access_ok(to, n)) {
     329             :                 instrument_copy_to_user(to, from, n);
     330             :                 n = copy_mc_to_user((__force void *) to, from, n);
     331             :         }
     332             :         return n;
     333             : }
     334             : 
     335             : /**
     336             :  * _copy_mc_to_iter - copy to iter with source memory error exception handling
     337             :  * @addr: source kernel address
     338             :  * @bytes: total transfer length
     339             :  * @i: destination iterator
     340             :  *
     341             :  * The pmem driver deploys this for the dax operation
     342             :  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
     343             :  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
     344             :  * successfully copied.
     345             :  *
     346             :  * The main differences between this and typical _copy_to_iter().
     347             :  *
     348             :  * * Typical tail/residue handling after a fault retries the copy
     349             :  *   byte-by-byte until the fault happens again. Re-triggering machine
     350             :  *   checks is potentially fatal so the implementation uses source
     351             :  *   alignment and poison alignment assumptions to avoid re-triggering
     352             :  *   hardware exceptions.
     353             :  *
     354             :  * * ITER_KVEC and ITER_BVEC can return short copies.  Compare to
     355             :  *   copy_to_iter() where only ITER_IOVEC attempts might return a short copy.
     356             :  *
     357             :  * Return: number of bytes copied (may be %0)
     358             :  */
     359             : size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
     360             : {
     361             :         if (WARN_ON_ONCE(i->data_source))
     362             :                 return 0;
     363             :         if (user_backed_iter(i))
     364             :                 might_fault();
     365             :         __iterate_and_advance(i, bytes, base, len, off,
     366             :                 copyout_mc(base, addr + off, len),
     367             :                 copy_mc_to_kernel(base, addr + off, len)
     368             :         )
     369             : 
     370             :         return bytes;
     371             : }
     372             : EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
     373             : #endif /* CONFIG_ARCH_HAS_COPY_MC */
     374             : 
     375           0 : static void *memcpy_from_iter(struct iov_iter *i, void *to, const void *from,
     376             :                                  size_t size)
     377             : {
     378           0 :         if (iov_iter_is_copy_mc(i))
     379             :                 return (void *)copy_mc_to_kernel(to, from, size);
     380           0 :         return memcpy(to, from, size);
     381             : }
     382             : 
     383           0 : size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
     384             : {
     385           0 :         if (WARN_ON_ONCE(!i->data_source))
     386             :                 return 0;
     387             : 
     388           0 :         if (user_backed_iter(i))
     389             :                 might_fault();
     390           0 :         iterate_and_advance(i, bytes, base, len, off,
     391             :                 copyin(addr + off, base, len),
     392             :                 memcpy_from_iter(i, addr + off, base, len)
     393             :         )
     394             : 
     395             :         return bytes;
     396             : }
     397             : EXPORT_SYMBOL(_copy_from_iter);
     398             : 
     399           0 : size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
     400             : {
     401           0 :         if (WARN_ON_ONCE(!i->data_source))
     402             :                 return 0;
     403             : 
     404           0 :         iterate_and_advance(i, bytes, base, len, off,
     405             :                 __copy_from_user_inatomic_nocache(addr + off, base, len),
     406             :                 memcpy(addr + off, base, len)
     407             :         )
     408             : 
     409             :         return bytes;
     410             : }
     411             : EXPORT_SYMBOL(_copy_from_iter_nocache);
     412             : 
     413             : #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
     414             : /**
     415             :  * _copy_from_iter_flushcache - write destination through cpu cache
     416             :  * @addr: destination kernel address
     417             :  * @bytes: total transfer length
     418             :  * @i: source iterator
     419             :  *
     420             :  * The pmem driver arranges for filesystem-dax to use this facility via
     421             :  * dax_copy_from_iter() for ensuring that writes to persistent memory
     422             :  * are flushed through the CPU cache. It is differentiated from
     423             :  * _copy_from_iter_nocache() in that guarantees all data is flushed for
     424             :  * all iterator types. The _copy_from_iter_nocache() only attempts to
     425             :  * bypass the cache for the ITER_IOVEC case, and on some archs may use
     426             :  * instructions that strand dirty-data in the cache.
     427             :  *
     428             :  * Return: number of bytes copied (may be %0)
     429             :  */
     430             : size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
     431             : {
     432             :         if (WARN_ON_ONCE(!i->data_source))
     433             :                 return 0;
     434             : 
     435             :         iterate_and_advance(i, bytes, base, len, off,
     436             :                 __copy_from_user_flushcache(addr + off, base, len),
     437             :                 memcpy_flushcache(addr + off, base, len)
     438             :         )
     439             : 
     440             :         return bytes;
     441             : }
     442             : EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
     443             : #endif
     444             : 
     445           0 : static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
     446             : {
     447             :         struct page *head;
     448           0 :         size_t v = n + offset;
     449             : 
     450             :         /*
     451             :          * The general case needs to access the page order in order
     452             :          * to compute the page size.
     453             :          * However, we mostly deal with order-0 pages and thus can
     454             :          * avoid a possible cache line miss for requests that fit all
     455             :          * page orders.
     456             :          */
     457           0 :         if (n <= v && v <= PAGE_SIZE)
     458             :                 return true;
     459             : 
     460           0 :         head = compound_head(page);
     461           0 :         v += (page - head) << PAGE_SHIFT;
     462             : 
     463           0 :         if (WARN_ON(n > v || v > page_size(head)))
     464             :                 return false;
     465           0 :         return true;
     466             : }
     467             : 
     468           0 : size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
     469             :                          struct iov_iter *i)
     470             : {
     471           0 :         size_t res = 0;
     472           0 :         if (!page_copy_sane(page, offset, bytes))
     473             :                 return 0;
     474           0 :         if (WARN_ON_ONCE(i->data_source))
     475             :                 return 0;
     476           0 :         page += offset / PAGE_SIZE; // first subpage
     477           0 :         offset %= PAGE_SIZE;
     478             :         while (1) {
     479           0 :                 void *kaddr = kmap_local_page(page);
     480           0 :                 size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
     481           0 :                 n = _copy_to_iter(kaddr + offset, n, i);
     482             :                 kunmap_local(kaddr);
     483           0 :                 res += n;
     484           0 :                 bytes -= n;
     485           0 :                 if (!bytes || !n)
     486             :                         break;
     487           0 :                 offset += n;
     488           0 :                 if (offset == PAGE_SIZE) {
     489           0 :                         page++;
     490           0 :                         offset = 0;
     491             :                 }
     492             :         }
     493             :         return res;
     494             : }
     495             : EXPORT_SYMBOL(copy_page_to_iter);
     496             : 
     497           0 : size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes,
     498             :                                  struct iov_iter *i)
     499             : {
     500           0 :         size_t res = 0;
     501             : 
     502           0 :         if (!page_copy_sane(page, offset, bytes))
     503             :                 return 0;
     504           0 :         if (WARN_ON_ONCE(i->data_source))
     505             :                 return 0;
     506           0 :         page += offset / PAGE_SIZE; // first subpage
     507           0 :         offset %= PAGE_SIZE;
     508             :         while (1) {
     509           0 :                 void *kaddr = kmap_local_page(page);
     510           0 :                 size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
     511             : 
     512           0 :                 iterate_and_advance(i, n, base, len, off,
     513             :                         copyout_nofault(base, kaddr + offset + off, len),
     514             :                         memcpy(base, kaddr + offset + off, len)
     515             :                 )
     516             :                 kunmap_local(kaddr);
     517           0 :                 res += n;
     518           0 :                 bytes -= n;
     519           0 :                 if (!bytes || !n)
     520             :                         break;
     521           0 :                 offset += n;
     522           0 :                 if (offset == PAGE_SIZE) {
     523           0 :                         page++;
     524           0 :                         offset = 0;
     525             :                 }
     526             :         }
     527             :         return res;
     528             : }
     529             : EXPORT_SYMBOL(copy_page_to_iter_nofault);
     530             : 
     531           0 : size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
     532             :                          struct iov_iter *i)
     533             : {
     534           0 :         size_t res = 0;
     535           0 :         if (!page_copy_sane(page, offset, bytes))
     536             :                 return 0;
     537           0 :         page += offset / PAGE_SIZE; // first subpage
     538           0 :         offset %= PAGE_SIZE;
     539             :         while (1) {
     540           0 :                 void *kaddr = kmap_local_page(page);
     541           0 :                 size_t n = min(bytes, (size_t)PAGE_SIZE - offset);
     542           0 :                 n = _copy_from_iter(kaddr + offset, n, i);
     543             :                 kunmap_local(kaddr);
     544           0 :                 res += n;
     545           0 :                 bytes -= n;
     546           0 :                 if (!bytes || !n)
     547             :                         break;
     548           0 :                 offset += n;
     549           0 :                 if (offset == PAGE_SIZE) {
     550           0 :                         page++;
     551           0 :                         offset = 0;
     552             :                 }
     553             :         }
     554             :         return res;
     555             : }
     556             : EXPORT_SYMBOL(copy_page_from_iter);
     557             : 
     558           0 : size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
     559             : {
     560           0 :         iterate_and_advance(i, bytes, base, len, count,
     561             :                 clear_user(base, len),
     562             :                 memset(base, 0, len)
     563             :         )
     564             : 
     565           0 :         return bytes;
     566             : }
     567             : EXPORT_SYMBOL(iov_iter_zero);
     568             : 
     569           0 : size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
     570             :                                   struct iov_iter *i)
     571             : {
     572           0 :         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
     573           0 :         if (!page_copy_sane(page, offset, bytes)) {
     574           0 :                 kunmap_atomic(kaddr);
     575           0 :                 return 0;
     576             :         }
     577           0 :         if (WARN_ON_ONCE(!i->data_source)) {
     578           0 :                 kunmap_atomic(kaddr);
     579           0 :                 return 0;
     580             :         }
     581           0 :         iterate_and_advance(i, bytes, base, len, off,
     582             :                 copyin(p + off, base, len),
     583             :                 memcpy_from_iter(i, p + off, base, len)
     584             :         )
     585           0 :         kunmap_atomic(kaddr);
     586           0 :         return bytes;
     587             : }
     588             : EXPORT_SYMBOL(copy_page_from_iter_atomic);
     589             : 
     590           0 : static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
     591             : {
     592             :         const struct bio_vec *bvec, *end;
     593             : 
     594           0 :         if (!i->count)
     595             :                 return;
     596           0 :         i->count -= size;
     597             : 
     598           0 :         size += i->iov_offset;
     599             : 
     600           0 :         for (bvec = i->bvec, end = bvec + i->nr_segs; bvec < end; bvec++) {
     601           0 :                 if (likely(size < bvec->bv_len))
     602             :                         break;
     603           0 :                 size -= bvec->bv_len;
     604             :         }
     605           0 :         i->iov_offset = size;
     606           0 :         i->nr_segs -= bvec - i->bvec;
     607           0 :         i->bvec = bvec;
     608             : }
     609             : 
     610           0 : static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
     611             : {
     612             :         const struct iovec *iov, *end;
     613             : 
     614           0 :         if (!i->count)
     615             :                 return;
     616           0 :         i->count -= size;
     617             : 
     618           0 :         size += i->iov_offset; // from beginning of current segment
     619           0 :         for (iov = iter_iov(i), end = iov + i->nr_segs; iov < end; iov++) {
     620           0 :                 if (likely(size < iov->iov_len))
     621             :                         break;
     622           0 :                 size -= iov->iov_len;
     623             :         }
     624           0 :         i->iov_offset = size;
     625           0 :         i->nr_segs -= iov - iter_iov(i);
     626           0 :         i->__iov = iov;
     627             : }
     628             : 
     629           0 : void iov_iter_advance(struct iov_iter *i, size_t size)
     630             : {
     631           0 :         if (unlikely(i->count < size))
     632           0 :                 size = i->count;
     633           0 :         if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
     634           0 :                 i->iov_offset += size;
     635           0 :                 i->count -= size;
     636           0 :         } else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
     637             :                 /* iovec and kvec have identical layouts */
     638           0 :                 iov_iter_iovec_advance(i, size);
     639           0 :         } else if (iov_iter_is_bvec(i)) {
     640           0 :                 iov_iter_bvec_advance(i, size);
     641           0 :         } else if (iov_iter_is_discard(i)) {
     642           0 :                 i->count -= size;
     643             :         }
     644           0 : }
     645             : EXPORT_SYMBOL(iov_iter_advance);
     646             : 
     647           0 : void iov_iter_revert(struct iov_iter *i, size_t unroll)
     648             : {
     649           0 :         if (!unroll)
     650             :                 return;
     651           0 :         if (WARN_ON(unroll > MAX_RW_COUNT))
     652             :                 return;
     653           0 :         i->count += unroll;
     654           0 :         if (unlikely(iov_iter_is_discard(i)))
     655             :                 return;
     656           0 :         if (unroll <= i->iov_offset) {
     657           0 :                 i->iov_offset -= unroll;
     658           0 :                 return;
     659             :         }
     660           0 :         unroll -= i->iov_offset;
     661           0 :         if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
     662           0 :                 BUG(); /* We should never go beyond the start of the specified
     663             :                         * range since we might then be straying into pages that
     664             :                         * aren't pinned.
     665             :                         */
     666           0 :         } else if (iov_iter_is_bvec(i)) {
     667           0 :                 const struct bio_vec *bvec = i->bvec;
     668           0 :                 while (1) {
     669           0 :                         size_t n = (--bvec)->bv_len;
     670           0 :                         i->nr_segs++;
     671           0 :                         if (unroll <= n) {
     672           0 :                                 i->bvec = bvec;
     673           0 :                                 i->iov_offset = n - unroll;
     674           0 :                                 return;
     675             :                         }
     676           0 :                         unroll -= n;
     677             :                 }
     678             :         } else { /* same logics for iovec and kvec */
     679           0 :                 const struct iovec *iov = iter_iov(i);
     680           0 :                 while (1) {
     681           0 :                         size_t n = (--iov)->iov_len;
     682           0 :                         i->nr_segs++;
     683           0 :                         if (unroll <= n) {
     684           0 :                                 i->__iov = iov;
     685           0 :                                 i->iov_offset = n - unroll;
     686           0 :                                 return;
     687             :                         }
     688           0 :                         unroll -= n;
     689             :                 }
     690             :         }
     691             : }
     692             : EXPORT_SYMBOL(iov_iter_revert);
     693             : 
     694             : /*
     695             :  * Return the count of just the current iov_iter segment.
     696             :  */
     697           0 : size_t iov_iter_single_seg_count(const struct iov_iter *i)
     698             : {
     699           0 :         if (i->nr_segs > 1) {
     700           0 :                 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
     701           0 :                         return min(i->count, iter_iov(i)->iov_len - i->iov_offset);
     702           0 :                 if (iov_iter_is_bvec(i))
     703           0 :                         return min(i->count, i->bvec->bv_len - i->iov_offset);
     704             :         }
     705           0 :         return i->count;
     706             : }
     707             : EXPORT_SYMBOL(iov_iter_single_seg_count);
     708             : 
     709           0 : void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
     710             :                         const struct kvec *kvec, unsigned long nr_segs,
     711             :                         size_t count)
     712             : {
     713           0 :         WARN_ON(direction & ~(READ | WRITE));
     714           0 :         *i = (struct iov_iter){
     715             :                 .iter_type = ITER_KVEC,
     716             :                 .copy_mc = false,
     717             :                 .data_source = direction,
     718             :                 .kvec = kvec,
     719             :                 .nr_segs = nr_segs,
     720             :                 .iov_offset = 0,
     721             :                 .count = count
     722             :         };
     723           0 : }
     724             : EXPORT_SYMBOL(iov_iter_kvec);
     725             : 
     726           0 : void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
     727             :                         const struct bio_vec *bvec, unsigned long nr_segs,
     728             :                         size_t count)
     729             : {
     730           0 :         WARN_ON(direction & ~(READ | WRITE));
     731           0 :         *i = (struct iov_iter){
     732             :                 .iter_type = ITER_BVEC,
     733             :                 .copy_mc = false,
     734             :                 .data_source = direction,
     735             :                 .bvec = bvec,
     736             :                 .nr_segs = nr_segs,
     737             :                 .iov_offset = 0,
     738             :                 .count = count
     739             :         };
     740           0 : }
     741             : EXPORT_SYMBOL(iov_iter_bvec);
     742             : 
     743             : /**
     744             :  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
     745             :  * @i: The iterator to initialise.
     746             :  * @direction: The direction of the transfer.
     747             :  * @xarray: The xarray to access.
     748             :  * @start: The start file position.
     749             :  * @count: The size of the I/O buffer in bytes.
     750             :  *
     751             :  * Set up an I/O iterator to either draw data out of the pages attached to an
     752             :  * inode or to inject data into those pages.  The pages *must* be prevented
     753             :  * from evaporation, either by taking a ref on them or locking them by the
     754             :  * caller.
     755             :  */
     756           0 : void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
     757             :                      struct xarray *xarray, loff_t start, size_t count)
     758             : {
     759           0 :         BUG_ON(direction & ~1);
     760           0 :         *i = (struct iov_iter) {
     761             :                 .iter_type = ITER_XARRAY,
     762             :                 .copy_mc = false,
     763             :                 .data_source = direction,
     764             :                 .xarray = xarray,
     765             :                 .xarray_start = start,
     766             :                 .count = count,
     767             :                 .iov_offset = 0
     768             :         };
     769           0 : }
     770             : EXPORT_SYMBOL(iov_iter_xarray);
     771             : 
     772             : /**
     773             :  * iov_iter_discard - Initialise an I/O iterator that discards data
     774             :  * @i: The iterator to initialise.
     775             :  * @direction: The direction of the transfer.
     776             :  * @count: The size of the I/O buffer in bytes.
     777             :  *
     778             :  * Set up an I/O iterator that just discards everything that's written to it.
     779             :  * It's only available as a READ iterator.
     780             :  */
     781           0 : void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
     782             : {
     783           0 :         BUG_ON(direction != READ);
     784           0 :         *i = (struct iov_iter){
     785             :                 .iter_type = ITER_DISCARD,
     786             :                 .copy_mc = false,
     787             :                 .data_source = false,
     788             :                 .count = count,
     789             :                 .iov_offset = 0
     790             :         };
     791           0 : }
     792             : EXPORT_SYMBOL(iov_iter_discard);
     793             : 
     794           0 : static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask,
     795             :                                    unsigned len_mask)
     796             : {
     797           0 :         size_t size = i->count;
     798           0 :         size_t skip = i->iov_offset;
     799             :         unsigned k;
     800             : 
     801           0 :         for (k = 0; k < i->nr_segs; k++, skip = 0) {
     802           0 :                 const struct iovec *iov = iter_iov(i) + k;
     803           0 :                 size_t len = iov->iov_len - skip;
     804             : 
     805           0 :                 if (len > size)
     806           0 :                         len = size;
     807           0 :                 if (len & len_mask)
     808             :                         return false;
     809           0 :                 if ((unsigned long)(iov->iov_base + skip) & addr_mask)
     810             :                         return false;
     811             : 
     812           0 :                 size -= len;
     813           0 :                 if (!size)
     814             :                         break;
     815             :         }
     816             :         return true;
     817             : }
     818             : 
     819           0 : static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
     820             :                                   unsigned len_mask)
     821             : {
     822           0 :         size_t size = i->count;
     823           0 :         unsigned skip = i->iov_offset;
     824             :         unsigned k;
     825             : 
     826           0 :         for (k = 0; k < i->nr_segs; k++, skip = 0) {
     827           0 :                 size_t len = i->bvec[k].bv_len - skip;
     828             : 
     829           0 :                 if (len > size)
     830           0 :                         len = size;
     831           0 :                 if (len & len_mask)
     832             :                         return false;
     833           0 :                 if ((unsigned long)(i->bvec[k].bv_offset + skip) & addr_mask)
     834             :                         return false;
     835             : 
     836           0 :                 size -= len;
     837           0 :                 if (!size)
     838             :                         break;
     839             :         }
     840             :         return true;
     841             : }
     842             : 
     843             : /**
     844             :  * iov_iter_is_aligned() - Check if the addresses and lengths of each segments
     845             :  *      are aligned to the parameters.
     846             :  *
     847             :  * @i: &struct iov_iter to restore
     848             :  * @addr_mask: bit mask to check against the iov element's addresses
     849             :  * @len_mask: bit mask to check against the iov element's lengths
     850             :  *
     851             :  * Return: false if any addresses or lengths intersect with the provided masks
     852             :  */
     853           0 : bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
     854             :                          unsigned len_mask)
     855             : {
     856           0 :         if (likely(iter_is_ubuf(i))) {
     857           0 :                 if (i->count & len_mask)
     858             :                         return false;
     859           0 :                 if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask)
     860             :                         return false;
     861           0 :                 return true;
     862             :         }
     863             : 
     864           0 :         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
     865           0 :                 return iov_iter_aligned_iovec(i, addr_mask, len_mask);
     866             : 
     867           0 :         if (iov_iter_is_bvec(i))
     868           0 :                 return iov_iter_aligned_bvec(i, addr_mask, len_mask);
     869             : 
     870           0 :         if (iov_iter_is_xarray(i)) {
     871           0 :                 if (i->count & len_mask)
     872             :                         return false;
     873           0 :                 if ((i->xarray_start + i->iov_offset) & addr_mask)
     874             :                         return false;
     875             :         }
     876             : 
     877           0 :         return true;
     878             : }
     879             : EXPORT_SYMBOL_GPL(iov_iter_is_aligned);
     880             : 
     881           0 : static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
     882             : {
     883           0 :         unsigned long res = 0;
     884           0 :         size_t size = i->count;
     885           0 :         size_t skip = i->iov_offset;
     886             :         unsigned k;
     887             : 
     888           0 :         for (k = 0; k < i->nr_segs; k++, skip = 0) {
     889           0 :                 const struct iovec *iov = iter_iov(i) + k;
     890           0 :                 size_t len = iov->iov_len - skip;
     891           0 :                 if (len) {
     892           0 :                         res |= (unsigned long)iov->iov_base + skip;
     893           0 :                         if (len > size)
     894           0 :                                 len = size;
     895           0 :                         res |= len;
     896           0 :                         size -= len;
     897           0 :                         if (!size)
     898             :                                 break;
     899             :                 }
     900             :         }
     901           0 :         return res;
     902             : }
     903             : 
     904           0 : static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
     905             : {
     906           0 :         unsigned res = 0;
     907           0 :         size_t size = i->count;
     908           0 :         unsigned skip = i->iov_offset;
     909             :         unsigned k;
     910             : 
     911           0 :         for (k = 0; k < i->nr_segs; k++, skip = 0) {
     912           0 :                 size_t len = i->bvec[k].bv_len - skip;
     913           0 :                 res |= (unsigned long)i->bvec[k].bv_offset + skip;
     914           0 :                 if (len > size)
     915           0 :                         len = size;
     916           0 :                 res |= len;
     917           0 :                 size -= len;
     918           0 :                 if (!size)
     919             :                         break;
     920             :         }
     921           0 :         return res;
     922             : }
     923             : 
     924           0 : unsigned long iov_iter_alignment(const struct iov_iter *i)
     925             : {
     926           0 :         if (likely(iter_is_ubuf(i))) {
     927           0 :                 size_t size = i->count;
     928           0 :                 if (size)
     929           0 :                         return ((unsigned long)i->ubuf + i->iov_offset) | size;
     930             :                 return 0;
     931             :         }
     932             : 
     933             :         /* iovec and kvec have identical layouts */
     934           0 :         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
     935           0 :                 return iov_iter_alignment_iovec(i);
     936             : 
     937           0 :         if (iov_iter_is_bvec(i))
     938           0 :                 return iov_iter_alignment_bvec(i);
     939             : 
     940           0 :         if (iov_iter_is_xarray(i))
     941           0 :                 return (i->xarray_start + i->iov_offset) | i->count;
     942             : 
     943             :         return 0;
     944             : }
     945             : EXPORT_SYMBOL(iov_iter_alignment);
     946             : 
     947           0 : unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
     948             : {
     949           0 :         unsigned long res = 0;
     950           0 :         unsigned long v = 0;
     951           0 :         size_t size = i->count;
     952             :         unsigned k;
     953             : 
     954           0 :         if (iter_is_ubuf(i))
     955             :                 return 0;
     956             : 
     957           0 :         if (WARN_ON(!iter_is_iovec(i)))
     958             :                 return ~0U;
     959             : 
     960           0 :         for (k = 0; k < i->nr_segs; k++) {
     961           0 :                 const struct iovec *iov = iter_iov(i) + k;
     962           0 :                 if (iov->iov_len) {
     963           0 :                         unsigned long base = (unsigned long)iov->iov_base;
     964           0 :                         if (v) // if not the first one
     965           0 :                                 res |= base | v; // this start | previous end
     966           0 :                         v = base + iov->iov_len;
     967           0 :                         if (size <= iov->iov_len)
     968             :                                 break;
     969           0 :                         size -= iov->iov_len;
     970             :                 }
     971             :         }
     972             :         return res;
     973             : }
     974             : EXPORT_SYMBOL(iov_iter_gap_alignment);
     975             : 
     976           0 : static int want_pages_array(struct page ***res, size_t size,
     977             :                             size_t start, unsigned int maxpages)
     978             : {
     979           0 :         unsigned int count = DIV_ROUND_UP(size + start, PAGE_SIZE);
     980             : 
     981           0 :         if (count > maxpages)
     982           0 :                 count = maxpages;
     983           0 :         WARN_ON(!count);        // caller should've prevented that
     984           0 :         if (!*res) {
     985           0 :                 *res = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
     986           0 :                 if (!*res)
     987             :                         return 0;
     988             :         }
     989           0 :         return count;
     990             : }
     991             : 
     992           0 : static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
     993             :                                           pgoff_t index, unsigned int nr_pages)
     994             : {
     995           0 :         XA_STATE(xas, xa, index);
     996             :         struct page *page;
     997           0 :         unsigned int ret = 0;
     998             : 
     999             :         rcu_read_lock();
    1000           0 :         for (page = xas_load(&xas); page; page = xas_next(&xas)) {
    1001           0 :                 if (xas_retry(&xas, page))
    1002           0 :                         continue;
    1003             : 
    1004             :                 /* Has the page moved or been split? */
    1005           0 :                 if (unlikely(page != xas_reload(&xas))) {
    1006           0 :                         xas_reset(&xas);
    1007           0 :                         continue;
    1008             :                 }
    1009             : 
    1010           0 :                 pages[ret] = find_subpage(page, xas.xa_index);
    1011           0 :                 get_page(pages[ret]);
    1012           0 :                 if (++ret == nr_pages)
    1013             :                         break;
    1014             :         }
    1015             :         rcu_read_unlock();
    1016           0 :         return ret;
    1017             : }
    1018             : 
    1019           0 : static ssize_t iter_xarray_get_pages(struct iov_iter *i,
    1020             :                                      struct page ***pages, size_t maxsize,
    1021             :                                      unsigned maxpages, size_t *_start_offset)
    1022             : {
    1023             :         unsigned nr, offset, count;
    1024             :         pgoff_t index;
    1025             :         loff_t pos;
    1026             : 
    1027           0 :         pos = i->xarray_start + i->iov_offset;
    1028           0 :         index = pos >> PAGE_SHIFT;
    1029           0 :         offset = pos & ~PAGE_MASK;
    1030           0 :         *_start_offset = offset;
    1031             : 
    1032           0 :         count = want_pages_array(pages, maxsize, offset, maxpages);
    1033           0 :         if (!count)
    1034             :                 return -ENOMEM;
    1035           0 :         nr = iter_xarray_populate_pages(*pages, i->xarray, index, count);
    1036           0 :         if (nr == 0)
    1037             :                 return 0;
    1038             : 
    1039           0 :         maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
    1040           0 :         i->iov_offset += maxsize;
    1041           0 :         i->count -= maxsize;
    1042           0 :         return maxsize;
    1043             : }
    1044             : 
    1045             : /* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
    1046           0 : static unsigned long first_iovec_segment(const struct iov_iter *i, size_t *size)
    1047             : {
    1048             :         size_t skip;
    1049             :         long k;
    1050             : 
    1051           0 :         if (iter_is_ubuf(i))
    1052           0 :                 return (unsigned long)i->ubuf + i->iov_offset;
    1053             : 
    1054           0 :         for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
    1055           0 :                 const struct iovec *iov = iter_iov(i) + k;
    1056           0 :                 size_t len = iov->iov_len - skip;
    1057             : 
    1058           0 :                 if (unlikely(!len))
    1059           0 :                         continue;
    1060           0 :                 if (*size > len)
    1061           0 :                         *size = len;
    1062           0 :                 return (unsigned long)iov->iov_base + skip;
    1063             :         }
    1064           0 :         BUG(); // if it had been empty, we wouldn't get called
    1065             : }
    1066             : 
    1067             : /* must be done on non-empty ITER_BVEC one */
    1068             : static struct page *first_bvec_segment(const struct iov_iter *i,
    1069             :                                        size_t *size, size_t *start)
    1070             : {
    1071             :         struct page *page;
    1072           0 :         size_t skip = i->iov_offset, len;
    1073             : 
    1074           0 :         len = i->bvec->bv_len - skip;
    1075           0 :         if (*size > len)
    1076           0 :                 *size = len;
    1077           0 :         skip += i->bvec->bv_offset;
    1078           0 :         page = i->bvec->bv_page + skip / PAGE_SIZE;
    1079           0 :         *start = skip % PAGE_SIZE;
    1080             :         return page;
    1081             : }
    1082             : 
    1083           0 : static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
    1084             :                    struct page ***pages, size_t maxsize,
    1085             :                    unsigned int maxpages, size_t *start)
    1086             : {
    1087           0 :         unsigned int n, gup_flags = 0;
    1088             : 
    1089           0 :         if (maxsize > i->count)
    1090           0 :                 maxsize = i->count;
    1091           0 :         if (!maxsize)
    1092             :                 return 0;
    1093           0 :         if (maxsize > MAX_RW_COUNT)
    1094           0 :                 maxsize = MAX_RW_COUNT;
    1095             : 
    1096           0 :         if (likely(user_backed_iter(i))) {
    1097             :                 unsigned long addr;
    1098             :                 int res;
    1099             : 
    1100           0 :                 if (iov_iter_rw(i) != WRITE)
    1101           0 :                         gup_flags |= FOLL_WRITE;
    1102           0 :                 if (i->nofault)
    1103           0 :                         gup_flags |= FOLL_NOFAULT;
    1104             : 
    1105           0 :                 addr = first_iovec_segment(i, &maxsize);
    1106           0 :                 *start = addr % PAGE_SIZE;
    1107           0 :                 addr &= PAGE_MASK;
    1108           0 :                 n = want_pages_array(pages, maxsize, *start, maxpages);
    1109           0 :                 if (!n)
    1110             :                         return -ENOMEM;
    1111           0 :                 res = get_user_pages_fast(addr, n, gup_flags, *pages);
    1112           0 :                 if (unlikely(res <= 0))
    1113           0 :                         return res;
    1114           0 :                 maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - *start);
    1115           0 :                 iov_iter_advance(i, maxsize);
    1116           0 :                 return maxsize;
    1117             :         }
    1118           0 :         if (iov_iter_is_bvec(i)) {
    1119             :                 struct page **p;
    1120             :                 struct page *page;
    1121             : 
    1122           0 :                 page = first_bvec_segment(i, &maxsize, start);
    1123           0 :                 n = want_pages_array(pages, maxsize, *start, maxpages);
    1124           0 :                 if (!n)
    1125             :                         return -ENOMEM;
    1126           0 :                 p = *pages;
    1127           0 :                 for (int k = 0; k < n; k++)
    1128           0 :                         get_page(p[k] = page + k);
    1129           0 :                 maxsize = min_t(size_t, maxsize, n * PAGE_SIZE - *start);
    1130           0 :                 i->count -= maxsize;
    1131           0 :                 i->iov_offset += maxsize;
    1132           0 :                 if (i->iov_offset == i->bvec->bv_len) {
    1133           0 :                         i->iov_offset = 0;
    1134           0 :                         i->bvec++;
    1135           0 :                         i->nr_segs--;
    1136             :                 }
    1137           0 :                 return maxsize;
    1138             :         }
    1139           0 :         if (iov_iter_is_xarray(i))
    1140           0 :                 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
    1141             :         return -EFAULT;
    1142             : }
    1143             : 
    1144           0 : ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
    1145             :                 size_t maxsize, unsigned maxpages, size_t *start)
    1146             : {
    1147           0 :         if (!maxpages)
    1148             :                 return 0;
    1149           0 :         BUG_ON(!pages);
    1150             : 
    1151           0 :         return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, start);
    1152             : }
    1153             : EXPORT_SYMBOL(iov_iter_get_pages2);
    1154             : 
    1155           0 : ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i,
    1156             :                 struct page ***pages, size_t maxsize, size_t *start)
    1157             : {
    1158             :         ssize_t len;
    1159             : 
    1160           0 :         *pages = NULL;
    1161             : 
    1162           0 :         len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start);
    1163           0 :         if (len <= 0) {
    1164           0 :                 kvfree(*pages);
    1165           0 :                 *pages = NULL;
    1166             :         }
    1167           0 :         return len;
    1168             : }
    1169             : EXPORT_SYMBOL(iov_iter_get_pages_alloc2);
    1170             : 
    1171           0 : size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
    1172             :                                struct iov_iter *i)
    1173             : {
    1174             :         __wsum sum, next;
    1175           0 :         sum = *csum;
    1176           0 :         if (WARN_ON_ONCE(!i->data_source))
    1177             :                 return 0;
    1178             : 
    1179           0 :         iterate_and_advance(i, bytes, base, len, off, ({
    1180             :                 next = csum_and_copy_from_user(base, addr + off, len);
    1181             :                 sum = csum_block_add(sum, next, off);
    1182             :                 next ? 0 : len;
    1183             :         }), ({
    1184             :                 sum = csum_and_memcpy(addr + off, base, len, sum, off);
    1185             :         })
    1186             :         )
    1187           0 :         *csum = sum;
    1188           0 :         return bytes;
    1189             : }
    1190             : EXPORT_SYMBOL(csum_and_copy_from_iter);
    1191             : 
    1192           0 : size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
    1193             :                              struct iov_iter *i)
    1194             : {
    1195           0 :         struct csum_state *csstate = _csstate;
    1196             :         __wsum sum, next;
    1197             : 
    1198           0 :         if (WARN_ON_ONCE(i->data_source))
    1199             :                 return 0;
    1200           0 :         if (unlikely(iov_iter_is_discard(i))) {
    1201             :                 // can't use csum_memcpy() for that one - data is not copied
    1202           0 :                 csstate->csum = csum_block_add(csstate->csum,
    1203             :                                                csum_partial(addr, bytes, 0),
    1204           0 :                                                csstate->off);
    1205           0 :                 csstate->off += bytes;
    1206           0 :                 return bytes;
    1207             :         }
    1208             : 
    1209           0 :         sum = csum_shift(csstate->csum, csstate->off);
    1210           0 :         iterate_and_advance(i, bytes, base, len, off, ({
    1211             :                 next = csum_and_copy_to_user(addr + off, base, len);
    1212             :                 sum = csum_block_add(sum, next, off);
    1213             :                 next ? 0 : len;
    1214             :         }), ({
    1215             :                 sum = csum_and_memcpy(base, addr + off, len, sum, off);
    1216             :         })
    1217             :         )
    1218           0 :         csstate->csum = csum_shift(sum, csstate->off);
    1219           0 :         csstate->off += bytes;
    1220           0 :         return bytes;
    1221             : }
    1222             : EXPORT_SYMBOL(csum_and_copy_to_iter);
    1223             : 
    1224           0 : size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
    1225             :                 struct iov_iter *i)
    1226             : {
    1227             : #ifdef CONFIG_CRYPTO_HASH
    1228             :         struct ahash_request *hash = hashp;
    1229             :         struct scatterlist sg;
    1230             :         size_t copied;
    1231             : 
    1232             :         copied = copy_to_iter(addr, bytes, i);
    1233             :         sg_init_one(&sg, addr, copied);
    1234             :         ahash_request_set_crypt(hash, &sg, NULL, copied);
    1235             :         crypto_ahash_update(hash);
    1236             :         return copied;
    1237             : #else
    1238           0 :         return 0;
    1239             : #endif
    1240             : }
    1241             : EXPORT_SYMBOL(hash_and_copy_to_iter);
    1242             : 
    1243           0 : static int iov_npages(const struct iov_iter *i, int maxpages)
    1244             : {
    1245           0 :         size_t skip = i->iov_offset, size = i->count;
    1246             :         const struct iovec *p;
    1247           0 :         int npages = 0;
    1248             : 
    1249           0 :         for (p = iter_iov(i); size; skip = 0, p++) {
    1250           0 :                 unsigned offs = offset_in_page(p->iov_base + skip);
    1251           0 :                 size_t len = min(p->iov_len - skip, size);
    1252             : 
    1253           0 :                 if (len) {
    1254           0 :                         size -= len;
    1255           0 :                         npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
    1256           0 :                         if (unlikely(npages > maxpages))
    1257             :                                 return maxpages;
    1258             :                 }
    1259             :         }
    1260             :         return npages;
    1261             : }
    1262             : 
    1263             : static int bvec_npages(const struct iov_iter *i, int maxpages)
    1264             : {
    1265           0 :         size_t skip = i->iov_offset, size = i->count;
    1266             :         const struct bio_vec *p;
    1267           0 :         int npages = 0;
    1268             : 
    1269           0 :         for (p = i->bvec; size; skip = 0, p++) {
    1270           0 :                 unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
    1271           0 :                 size_t len = min(p->bv_len - skip, size);
    1272             : 
    1273           0 :                 size -= len;
    1274           0 :                 npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
    1275           0 :                 if (unlikely(npages > maxpages))
    1276             :                         return maxpages;
    1277             :         }
    1278             :         return npages;
    1279             : }
    1280             : 
    1281           0 : int iov_iter_npages(const struct iov_iter *i, int maxpages)
    1282             : {
    1283           0 :         if (unlikely(!i->count))
    1284             :                 return 0;
    1285           0 :         if (likely(iter_is_ubuf(i))) {
    1286           0 :                 unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
    1287           0 :                 int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
    1288           0 :                 return min(npages, maxpages);
    1289             :         }
    1290             :         /* iovec and kvec have identical layouts */
    1291           0 :         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
    1292           0 :                 return iov_npages(i, maxpages);
    1293           0 :         if (iov_iter_is_bvec(i))
    1294             :                 return bvec_npages(i, maxpages);
    1295           0 :         if (iov_iter_is_xarray(i)) {
    1296           0 :                 unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
    1297           0 :                 int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
    1298           0 :                 return min(npages, maxpages);
    1299             :         }
    1300             :         return 0;
    1301             : }
    1302             : EXPORT_SYMBOL(iov_iter_npages);
    1303             : 
    1304           0 : const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
    1305             : {
    1306           0 :         *new = *old;
    1307           0 :         if (iov_iter_is_bvec(new))
    1308           0 :                 return new->bvec = kmemdup(new->bvec,
    1309           0 :                                     new->nr_segs * sizeof(struct bio_vec),
    1310             :                                     flags);
    1311           0 :         else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
    1312             :                 /* iovec and kvec have identical layout */
    1313           0 :                 return new->__iov = kmemdup(new->__iov,
    1314           0 :                                    new->nr_segs * sizeof(struct iovec),
    1315             :                                    flags);
    1316             :         return NULL;
    1317             : }
    1318             : EXPORT_SYMBOL(dup_iter);
    1319             : 
    1320           0 : static __noclone int copy_compat_iovec_from_user(struct iovec *iov,
    1321             :                 const struct iovec __user *uvec, unsigned long nr_segs)
    1322             : {
    1323           0 :         const struct compat_iovec __user *uiov =
    1324             :                 (const struct compat_iovec __user *)uvec;
    1325           0 :         int ret = -EFAULT, i;
    1326             : 
    1327           0 :         if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
    1328             :                 return -EFAULT;
    1329             : 
    1330           0 :         for (i = 0; i < nr_segs; i++) {
    1331             :                 compat_uptr_t buf;
    1332             :                 compat_ssize_t len;
    1333             : 
    1334           0 :                 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
    1335           0 :                 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
    1336             : 
    1337             :                 /* check for compat_size_t not fitting in compat_ssize_t .. */
    1338           0 :                 if (len < 0) {
    1339             :                         ret = -EINVAL;
    1340             :                         goto uaccess_end;
    1341             :                 }
    1342           0 :                 iov[i].iov_base = compat_ptr(buf);
    1343           0 :                 iov[i].iov_len = len;
    1344             :         }
    1345             : 
    1346             :         ret = 0;
    1347             : uaccess_end:
    1348             :         user_access_end();
    1349             :         return ret;
    1350             : }
    1351             : 
    1352           0 : static __noclone int copy_iovec_from_user(struct iovec *iov,
    1353             :                 const struct iovec __user *uiov, unsigned long nr_segs)
    1354             : {
    1355           0 :         int ret = -EFAULT;
    1356             : 
    1357           0 :         if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
    1358             :                 return -EFAULT;
    1359             : 
    1360             :         do {
    1361             :                 void __user *buf;
    1362             :                 ssize_t len;
    1363             : 
    1364           0 :                 unsafe_get_user(len, &uiov->iov_len, uaccess_end);
    1365           0 :                 unsafe_get_user(buf, &uiov->iov_base, uaccess_end);
    1366             : 
    1367             :                 /* check for size_t not fitting in ssize_t .. */
    1368           0 :                 if (unlikely(len < 0)) {
    1369             :                         ret = -EINVAL;
    1370             :                         goto uaccess_end;
    1371             :                 }
    1372           0 :                 iov->iov_base = buf;
    1373           0 :                 iov->iov_len = len;
    1374             : 
    1375           0 :                 uiov++; iov++;
    1376           0 :         } while (--nr_segs);
    1377             : 
    1378             :         ret = 0;
    1379             : uaccess_end:
    1380             :         user_access_end();
    1381             :         return ret;
    1382             : }
    1383             : 
    1384           0 : struct iovec *iovec_from_user(const struct iovec __user *uvec,
    1385             :                 unsigned long nr_segs, unsigned long fast_segs,
    1386             :                 struct iovec *fast_iov, bool compat)
    1387             : {
    1388           0 :         struct iovec *iov = fast_iov;
    1389             :         int ret;
    1390             : 
    1391             :         /*
    1392             :          * SuS says "The readv() function *may* fail if the iovcnt argument was
    1393             :          * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
    1394             :          * traditionally returned zero for zero segments, so...
    1395             :          */
    1396           0 :         if (nr_segs == 0)
    1397             :                 return iov;
    1398           0 :         if (nr_segs > UIO_MAXIOV)
    1399             :                 return ERR_PTR(-EINVAL);
    1400           0 :         if (nr_segs > fast_segs) {
    1401           0 :                 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
    1402           0 :                 if (!iov)
    1403             :                         return ERR_PTR(-ENOMEM);
    1404             :         }
    1405             : 
    1406           0 :         if (unlikely(compat))
    1407           0 :                 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
    1408             :         else
    1409           0 :                 ret = copy_iovec_from_user(iov, uvec, nr_segs);
    1410           0 :         if (ret) {
    1411           0 :                 if (iov != fast_iov)
    1412           0 :                         kfree(iov);
    1413           0 :                 return ERR_PTR(ret);
    1414             :         }
    1415             : 
    1416             :         return iov;
    1417             : }
    1418             : 
    1419             : /*
    1420             :  * Single segment iovec supplied by the user, import it as ITER_UBUF.
    1421             :  */
    1422           0 : static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec,
    1423             :                                    struct iovec **iovp, struct iov_iter *i,
    1424             :                                    bool compat)
    1425             : {
    1426           0 :         struct iovec *iov = *iovp;
    1427             :         ssize_t ret;
    1428             : 
    1429           0 :         if (compat)
    1430           0 :                 ret = copy_compat_iovec_from_user(iov, uvec, 1);
    1431             :         else
    1432           0 :                 ret = copy_iovec_from_user(iov, uvec, 1);
    1433           0 :         if (unlikely(ret))
    1434             :                 return ret;
    1435             : 
    1436           0 :         ret = import_ubuf(type, iov->iov_base, iov->iov_len, i);
    1437           0 :         if (unlikely(ret))
    1438             :                 return ret;
    1439           0 :         *iovp = NULL;
    1440           0 :         return i->count;
    1441             : }
    1442             : 
    1443           0 : ssize_t __import_iovec(int type, const struct iovec __user *uvec,
    1444             :                  unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
    1445             :                  struct iov_iter *i, bool compat)
    1446             : {
    1447           0 :         ssize_t total_len = 0;
    1448             :         unsigned long seg;
    1449             :         struct iovec *iov;
    1450             : 
    1451           0 :         if (nr_segs == 1)
    1452           0 :                 return __import_iovec_ubuf(type, uvec, iovp, i, compat);
    1453             : 
    1454           0 :         iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
    1455           0 :         if (IS_ERR(iov)) {
    1456           0 :                 *iovp = NULL;
    1457           0 :                 return PTR_ERR(iov);
    1458             :         }
    1459             : 
    1460             :         /*
    1461             :          * According to the Single Unix Specification we should return EINVAL if
    1462             :          * an element length is < 0 when cast to ssize_t or if the total length
    1463             :          * would overflow the ssize_t return value of the system call.
    1464             :          *
    1465             :          * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
    1466             :          * overflow case.
    1467             :          */
    1468           0 :         for (seg = 0; seg < nr_segs; seg++) {
    1469           0 :                 ssize_t len = (ssize_t)iov[seg].iov_len;
    1470             : 
    1471           0 :                 if (!access_ok(iov[seg].iov_base, len)) {
    1472           0 :                         if (iov != *iovp)
    1473           0 :                                 kfree(iov);
    1474           0 :                         *iovp = NULL;
    1475           0 :                         return -EFAULT;
    1476             :                 }
    1477             : 
    1478           0 :                 if (len > MAX_RW_COUNT - total_len) {
    1479           0 :                         len = MAX_RW_COUNT - total_len;
    1480           0 :                         iov[seg].iov_len = len;
    1481             :                 }
    1482           0 :                 total_len += len;
    1483             :         }
    1484             : 
    1485           0 :         iov_iter_init(i, type, iov, nr_segs, total_len);
    1486           0 :         if (iov == *iovp)
    1487           0 :                 *iovp = NULL;
    1488             :         else
    1489           0 :                 *iovp = iov;
    1490             :         return total_len;
    1491             : }
    1492             : 
    1493             : /**
    1494             :  * import_iovec() - Copy an array of &struct iovec from userspace
    1495             :  *     into the kernel, check that it is valid, and initialize a new
    1496             :  *     &struct iov_iter iterator to access it.
    1497             :  *
    1498             :  * @type: One of %READ or %WRITE.
    1499             :  * @uvec: Pointer to the userspace array.
    1500             :  * @nr_segs: Number of elements in userspace array.
    1501             :  * @fast_segs: Number of elements in @iov.
    1502             :  * @iovp: (input and output parameter) Pointer to pointer to (usually small
    1503             :  *     on-stack) kernel array.
    1504             :  * @i: Pointer to iterator that will be initialized on success.
    1505             :  *
    1506             :  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
    1507             :  * then this function places %NULL in *@iov on return. Otherwise, a new
    1508             :  * array will be allocated and the result placed in *@iov. This means that
    1509             :  * the caller may call kfree() on *@iov regardless of whether the small
    1510             :  * on-stack array was used or not (and regardless of whether this function
    1511             :  * returns an error or not).
    1512             :  *
    1513             :  * Return: Negative error code on error, bytes imported on success
    1514             :  */
    1515           0 : ssize_t import_iovec(int type, const struct iovec __user *uvec,
    1516             :                  unsigned nr_segs, unsigned fast_segs,
    1517             :                  struct iovec **iovp, struct iov_iter *i)
    1518             : {
    1519           0 :         return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
    1520             :                               in_compat_syscall());
    1521             : }
    1522             : EXPORT_SYMBOL(import_iovec);
    1523             : 
    1524           0 : int import_single_range(int rw, void __user *buf, size_t len,
    1525             :                  struct iovec *iov, struct iov_iter *i)
    1526             : {
    1527           0 :         if (len > MAX_RW_COUNT)
    1528           0 :                 len = MAX_RW_COUNT;
    1529           0 :         if (unlikely(!access_ok(buf, len)))
    1530             :                 return -EFAULT;
    1531             : 
    1532           0 :         iov_iter_ubuf(i, rw, buf, len);
    1533           0 :         return 0;
    1534             : }
    1535             : EXPORT_SYMBOL(import_single_range);
    1536             : 
    1537           0 : int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i)
    1538             : {
    1539           0 :         if (len > MAX_RW_COUNT)
    1540           0 :                 len = MAX_RW_COUNT;
    1541           0 :         if (unlikely(!access_ok(buf, len)))
    1542             :                 return -EFAULT;
    1543             : 
    1544           0 :         iov_iter_ubuf(i, rw, buf, len);
    1545           0 :         return 0;
    1546             : }
    1547             : 
    1548             : /**
    1549             :  * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
    1550             :  *     iov_iter_save_state() was called.
    1551             :  *
    1552             :  * @i: &struct iov_iter to restore
    1553             :  * @state: state to restore from
    1554             :  *
    1555             :  * Used after iov_iter_save_state() to bring restore @i, if operations may
    1556             :  * have advanced it.
    1557             :  *
    1558             :  * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
    1559             :  */
    1560           0 : void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
    1561             : {
    1562           0 :         if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) &&
    1563           0 :                          !iter_is_ubuf(i)) && !iov_iter_is_kvec(i))
    1564             :                 return;
    1565           0 :         i->iov_offset = state->iov_offset;
    1566           0 :         i->count = state->count;
    1567           0 :         if (iter_is_ubuf(i))
    1568             :                 return;
    1569             :         /*
    1570             :          * For the *vec iters, nr_segs + iov is constant - if we increment
    1571             :          * the vec, then we also decrement the nr_segs count. Hence we don't
    1572             :          * need to track both of these, just one is enough and we can deduct
    1573             :          * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
    1574             :          * size, so we can just increment the iov pointer as they are unionzed.
    1575             :          * ITER_BVEC _may_ be the same size on some archs, but on others it is
    1576             :          * not. Be safe and handle it separately.
    1577             :          */
    1578             :         BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
    1579           0 :         if (iov_iter_is_bvec(i))
    1580           0 :                 i->bvec -= state->nr_segs - i->nr_segs;
    1581             :         else
    1582           0 :                 i->__iov -= state->nr_segs - i->nr_segs;
    1583           0 :         i->nr_segs = state->nr_segs;
    1584             : }
    1585             : 
    1586             : /*
    1587             :  * Extract a list of contiguous pages from an ITER_XARRAY iterator.  This does not
    1588             :  * get references on the pages, nor does it get a pin on them.
    1589             :  */
    1590           0 : static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
    1591             :                                              struct page ***pages, size_t maxsize,
    1592             :                                              unsigned int maxpages,
    1593             :                                              iov_iter_extraction_t extraction_flags,
    1594             :                                              size_t *offset0)
    1595             : {
    1596             :         struct page *page, **p;
    1597           0 :         unsigned int nr = 0, offset;
    1598           0 :         loff_t pos = i->xarray_start + i->iov_offset;
    1599           0 :         pgoff_t index = pos >> PAGE_SHIFT;
    1600           0 :         XA_STATE(xas, i->xarray, index);
    1601             : 
    1602           0 :         offset = pos & ~PAGE_MASK;
    1603           0 :         *offset0 = offset;
    1604             : 
    1605           0 :         maxpages = want_pages_array(pages, maxsize, offset, maxpages);
    1606           0 :         if (!maxpages)
    1607             :                 return -ENOMEM;
    1608           0 :         p = *pages;
    1609             : 
    1610             :         rcu_read_lock();
    1611           0 :         for (page = xas_load(&xas); page; page = xas_next(&xas)) {
    1612           0 :                 if (xas_retry(&xas, page))
    1613           0 :                         continue;
    1614             : 
    1615             :                 /* Has the page moved or been split? */
    1616           0 :                 if (unlikely(page != xas_reload(&xas))) {
    1617           0 :                         xas_reset(&xas);
    1618           0 :                         continue;
    1619             :                 }
    1620             : 
    1621           0 :                 p[nr++] = find_subpage(page, xas.xa_index);
    1622           0 :                 if (nr == maxpages)
    1623             :                         break;
    1624             :         }
    1625             :         rcu_read_unlock();
    1626             : 
    1627           0 :         maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
    1628           0 :         iov_iter_advance(i, maxsize);
    1629           0 :         return maxsize;
    1630             : }
    1631             : 
    1632             : /*
    1633             :  * Extract a list of contiguous pages from an ITER_BVEC iterator.  This does
    1634             :  * not get references on the pages, nor does it get a pin on them.
    1635             :  */
    1636           0 : static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
    1637             :                                            struct page ***pages, size_t maxsize,
    1638             :                                            unsigned int maxpages,
    1639             :                                            iov_iter_extraction_t extraction_flags,
    1640             :                                            size_t *offset0)
    1641             : {
    1642             :         struct page **p, *page;
    1643           0 :         size_t skip = i->iov_offset, offset;
    1644             :         int k;
    1645             : 
    1646             :         for (;;) {
    1647           0 :                 if (i->nr_segs == 0)
    1648             :                         return 0;
    1649           0 :                 maxsize = min(maxsize, i->bvec->bv_len - skip);
    1650           0 :                 if (maxsize)
    1651             :                         break;
    1652           0 :                 i->iov_offset = 0;
    1653           0 :                 i->nr_segs--;
    1654           0 :                 i->bvec++;
    1655           0 :                 skip = 0;
    1656             :         }
    1657             : 
    1658           0 :         skip += i->bvec->bv_offset;
    1659           0 :         page = i->bvec->bv_page + skip / PAGE_SIZE;
    1660           0 :         offset = skip % PAGE_SIZE;
    1661           0 :         *offset0 = offset;
    1662             : 
    1663           0 :         maxpages = want_pages_array(pages, maxsize, offset, maxpages);
    1664           0 :         if (!maxpages)
    1665             :                 return -ENOMEM;
    1666           0 :         p = *pages;
    1667           0 :         for (k = 0; k < maxpages; k++)
    1668           0 :                 p[k] = page + k;
    1669             : 
    1670           0 :         maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset);
    1671           0 :         iov_iter_advance(i, maxsize);
    1672           0 :         return maxsize;
    1673             : }
    1674             : 
    1675             : /*
    1676             :  * Extract a list of virtually contiguous pages from an ITER_KVEC iterator.
    1677             :  * This does not get references on the pages, nor does it get a pin on them.
    1678             :  */
    1679           0 : static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i,
    1680             :                                            struct page ***pages, size_t maxsize,
    1681             :                                            unsigned int maxpages,
    1682             :                                            iov_iter_extraction_t extraction_flags,
    1683             :                                            size_t *offset0)
    1684             : {
    1685             :         struct page **p, *page;
    1686             :         const void *kaddr;
    1687           0 :         size_t skip = i->iov_offset, offset, len;
    1688             :         int k;
    1689             : 
    1690             :         for (;;) {
    1691           0 :                 if (i->nr_segs == 0)
    1692             :                         return 0;
    1693           0 :                 maxsize = min(maxsize, i->kvec->iov_len - skip);
    1694           0 :                 if (maxsize)
    1695             :                         break;
    1696           0 :                 i->iov_offset = 0;
    1697           0 :                 i->nr_segs--;
    1698           0 :                 i->kvec++;
    1699           0 :                 skip = 0;
    1700             :         }
    1701             : 
    1702           0 :         kaddr = i->kvec->iov_base + skip;
    1703           0 :         offset = (unsigned long)kaddr & ~PAGE_MASK;
    1704           0 :         *offset0 = offset;
    1705             : 
    1706           0 :         maxpages = want_pages_array(pages, maxsize, offset, maxpages);
    1707           0 :         if (!maxpages)
    1708             :                 return -ENOMEM;
    1709           0 :         p = *pages;
    1710             : 
    1711           0 :         kaddr -= offset;
    1712           0 :         len = offset + maxsize;
    1713           0 :         for (k = 0; k < maxpages; k++) {
    1714           0 :                 size_t seg = min_t(size_t, len, PAGE_SIZE);
    1715             : 
    1716           0 :                 if (is_vmalloc_or_module_addr(kaddr))
    1717           0 :                         page = vmalloc_to_page(kaddr);
    1718             :                 else
    1719           0 :                         page = virt_to_page(kaddr);
    1720             : 
    1721           0 :                 p[k] = page;
    1722           0 :                 len -= seg;
    1723           0 :                 kaddr += PAGE_SIZE;
    1724             :         }
    1725             : 
    1726           0 :         maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset);
    1727           0 :         iov_iter_advance(i, maxsize);
    1728           0 :         return maxsize;
    1729             : }
    1730             : 
    1731             : /*
    1732             :  * Extract a list of contiguous pages from a user iterator and get a pin on
    1733             :  * each of them.  This should only be used if the iterator is user-backed
    1734             :  * (IOBUF/UBUF).
    1735             :  *
    1736             :  * It does not get refs on the pages, but the pages must be unpinned by the
    1737             :  * caller once the transfer is complete.
    1738             :  *
    1739             :  * This is safe to be used where background IO/DMA *is* going to be modifying
    1740             :  * the buffer; using a pin rather than a ref makes forces fork() to give the
    1741             :  * child a copy of the page.
    1742             :  */
    1743           0 : static ssize_t iov_iter_extract_user_pages(struct iov_iter *i,
    1744             :                                            struct page ***pages,
    1745             :                                            size_t maxsize,
    1746             :                                            unsigned int maxpages,
    1747             :                                            iov_iter_extraction_t extraction_flags,
    1748             :                                            size_t *offset0)
    1749             : {
    1750             :         unsigned long addr;
    1751           0 :         unsigned int gup_flags = 0;
    1752             :         size_t offset;
    1753             :         int res;
    1754             : 
    1755           0 :         if (i->data_source == ITER_DEST)
    1756           0 :                 gup_flags |= FOLL_WRITE;
    1757           0 :         if (extraction_flags & ITER_ALLOW_P2PDMA)
    1758           0 :                 gup_flags |= FOLL_PCI_P2PDMA;
    1759           0 :         if (i->nofault)
    1760           0 :                 gup_flags |= FOLL_NOFAULT;
    1761             : 
    1762           0 :         addr = first_iovec_segment(i, &maxsize);
    1763           0 :         *offset0 = offset = addr % PAGE_SIZE;
    1764           0 :         addr &= PAGE_MASK;
    1765           0 :         maxpages = want_pages_array(pages, maxsize, offset, maxpages);
    1766           0 :         if (!maxpages)
    1767             :                 return -ENOMEM;
    1768           0 :         res = pin_user_pages_fast(addr, maxpages, gup_flags, *pages);
    1769           0 :         if (unlikely(res <= 0))
    1770           0 :                 return res;
    1771           0 :         maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset);
    1772           0 :         iov_iter_advance(i, maxsize);
    1773           0 :         return maxsize;
    1774             : }
    1775             : 
    1776             : /**
    1777             :  * iov_iter_extract_pages - Extract a list of contiguous pages from an iterator
    1778             :  * @i: The iterator to extract from
    1779             :  * @pages: Where to return the list of pages
    1780             :  * @maxsize: The maximum amount of iterator to extract
    1781             :  * @maxpages: The maximum size of the list of pages
    1782             :  * @extraction_flags: Flags to qualify request
    1783             :  * @offset0: Where to return the starting offset into (*@pages)[0]
    1784             :  *
    1785             :  * Extract a list of contiguous pages from the current point of the iterator,
    1786             :  * advancing the iterator.  The maximum number of pages and the maximum amount
    1787             :  * of page contents can be set.
    1788             :  *
    1789             :  * If *@pages is NULL, a page list will be allocated to the required size and
    1790             :  * *@pages will be set to its base.  If *@pages is not NULL, it will be assumed
    1791             :  * that the caller allocated a page list at least @maxpages in size and this
    1792             :  * will be filled in.
    1793             :  *
    1794             :  * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
    1795             :  * be allowed on the pages extracted.
    1796             :  *
    1797             :  * The iov_iter_extract_will_pin() function can be used to query how cleanup
    1798             :  * should be performed.
    1799             :  *
    1800             :  * Extra refs or pins on the pages may be obtained as follows:
    1801             :  *
    1802             :  *  (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF), pins will be
    1803             :  *      added to the pages, but refs will not be taken.
    1804             :  *      iov_iter_extract_will_pin() will return true.
    1805             :  *
    1806             :  *  (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are
    1807             :  *      merely listed; no extra refs or pins are obtained.
    1808             :  *      iov_iter_extract_will_pin() will return 0.
    1809             :  *
    1810             :  * Note also:
    1811             :  *
    1812             :  *  (*) Use with ITER_DISCARD is not supported as that has no content.
    1813             :  *
    1814             :  * On success, the function sets *@pages to the new pagelist, if allocated, and
    1815             :  * sets *offset0 to the offset into the first page.
    1816             :  *
    1817             :  * It may also return -ENOMEM and -EFAULT.
    1818             :  */
    1819           0 : ssize_t iov_iter_extract_pages(struct iov_iter *i,
    1820             :                                struct page ***pages,
    1821             :                                size_t maxsize,
    1822             :                                unsigned int maxpages,
    1823             :                                iov_iter_extraction_t extraction_flags,
    1824             :                                size_t *offset0)
    1825             : {
    1826           0 :         maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT);
    1827           0 :         if (!maxsize)
    1828             :                 return 0;
    1829             : 
    1830           0 :         if (likely(user_backed_iter(i)))
    1831           0 :                 return iov_iter_extract_user_pages(i, pages, maxsize,
    1832             :                                                    maxpages, extraction_flags,
    1833             :                                                    offset0);
    1834           0 :         if (iov_iter_is_kvec(i))
    1835           0 :                 return iov_iter_extract_kvec_pages(i, pages, maxsize,
    1836             :                                                    maxpages, extraction_flags,
    1837             :                                                    offset0);
    1838           0 :         if (iov_iter_is_bvec(i))
    1839           0 :                 return iov_iter_extract_bvec_pages(i, pages, maxsize,
    1840             :                                                    maxpages, extraction_flags,
    1841             :                                                    offset0);
    1842           0 :         if (iov_iter_is_xarray(i))
    1843           0 :                 return iov_iter_extract_xarray_pages(i, pages, maxsize,
    1844             :                                                      maxpages, extraction_flags,
    1845             :                                                      offset0);
    1846             :         return -EFAULT;
    1847             : }
    1848             : EXPORT_SYMBOL_GPL(iov_iter_extract_pages);

Generated by: LCOV version 1.14