LCOV - code coverage report
Current view: top level - drivers/gpu/drm - drm_cache.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 2 40 5.0 %
Date: 2023-07-19 18:55:55 Functions: 1 7 14.3 %

          Line data    Source code
       1             : /**************************************************************************
       2             :  *
       3             :  * Copyright (c) 2006-2007 Tungsten Graphics, Inc., Cedar Park, TX., USA
       4             :  * All Rights Reserved.
       5             :  *
       6             :  * Permission is hereby granted, free of charge, to any person obtaining a
       7             :  * copy of this software and associated documentation files (the
       8             :  * "Software"), to deal in the Software without restriction, including
       9             :  * without limitation the rights to use, copy, modify, merge, publish,
      10             :  * distribute, sub license, and/or sell copies of the Software, and to
      11             :  * permit persons to whom the Software is furnished to do so, subject to
      12             :  * the following conditions:
      13             :  *
      14             :  * The above copyright notice and this permission notice (including the
      15             :  * next paragraph) shall be included in all copies or substantial portions
      16             :  * of the Software.
      17             :  *
      18             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      19             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      20             :  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
      21             :  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
      22             :  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
      23             :  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
      24             :  * USE OR OTHER DEALINGS IN THE SOFTWARE.
      25             :  *
      26             :  **************************************************************************/
      27             : /*
      28             :  * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
      29             :  */
      30             : #include <linux/cc_platform.h>
      31             : #include <linux/export.h>
      32             : #include <linux/highmem.h>
      33             : #include <linux/ioport.h>
      34             : #include <linux/iosys-map.h>
      35             : #include <xen/xen.h>
      36             : 
      37             : #include <drm/drm_cache.h>
      38             : 
      39             : /* A small bounce buffer that fits on the stack. */
      40             : #define MEMCPY_BOUNCE_SIZE 128
      41             : 
      42             : #if defined(CONFIG_X86)
      43             : #include <asm/smp.h>
      44             : 
      45             : /*
      46             :  * clflushopt is an unordered instruction which needs fencing with mfence or
      47             :  * sfence to avoid ordering issues.  For drm_clflush_page this fencing happens
      48             :  * in the caller.
      49             :  */
      50             : static void
      51             : drm_clflush_page(struct page *page)
      52             : {
      53             :         uint8_t *page_virtual;
      54             :         unsigned int i;
      55             :         const int size = boot_cpu_data.x86_clflush_size;
      56             : 
      57             :         if (unlikely(page == NULL))
      58             :                 return;
      59             : 
      60             :         page_virtual = kmap_atomic(page);
      61             :         for (i = 0; i < PAGE_SIZE; i += size)
      62             :                 clflushopt(page_virtual + i);
      63             :         kunmap_atomic(page_virtual);
      64             : }
      65             : 
      66             : static void drm_cache_flush_clflush(struct page *pages[],
      67             :                                     unsigned long num_pages)
      68             : {
      69             :         unsigned long i;
      70             : 
      71             :         mb(); /*Full memory barrier used before so that CLFLUSH is ordered*/
      72             :         for (i = 0; i < num_pages; i++)
      73             :                 drm_clflush_page(*pages++);
      74             :         mb(); /*Also used after CLFLUSH so that all cache is flushed*/
      75             : }
      76             : #endif
      77             : 
      78             : /**
      79             :  * drm_clflush_pages - Flush dcache lines of a set of pages.
      80             :  * @pages: List of pages to be flushed.
      81             :  * @num_pages: Number of pages in the array.
      82             :  *
      83             :  * Flush every data cache line entry that points to an address belonging
      84             :  * to a page in the array.
      85             :  */
      86             : void
      87           0 : drm_clflush_pages(struct page *pages[], unsigned long num_pages)
      88             : {
      89             : 
      90             : #if defined(CONFIG_X86)
      91             :         if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
      92             :                 drm_cache_flush_clflush(pages, num_pages);
      93             :                 return;
      94             :         }
      95             : 
      96             :         if (wbinvd_on_all_cpus())
      97             :                 pr_err("Timed out waiting for cache flush\n");
      98             : 
      99             : #elif defined(__powerpc__)
     100             :         unsigned long i;
     101             : 
     102             :         for (i = 0; i < num_pages; i++) {
     103             :                 struct page *page = pages[i];
     104             :                 void *page_virtual;
     105             : 
     106             :                 if (unlikely(page == NULL))
     107             :                         continue;
     108             : 
     109             :                 page_virtual = kmap_atomic(page);
     110             :                 flush_dcache_range((unsigned long)page_virtual,
     111             :                                    (unsigned long)page_virtual + PAGE_SIZE);
     112             :                 kunmap_atomic(page_virtual);
     113             :         }
     114             : #else
     115           0 :         WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
     116             : #endif
     117           0 : }
     118             : EXPORT_SYMBOL(drm_clflush_pages);
     119             : 
     120             : /**
     121             :  * drm_clflush_sg - Flush dcache lines pointing to a scather-gather.
     122             :  * @st: struct sg_table.
     123             :  *
     124             :  * Flush every data cache line entry that points to an address in the
     125             :  * sg.
     126             :  */
     127             : void
     128           0 : drm_clflush_sg(struct sg_table *st)
     129             : {
     130             : #if defined(CONFIG_X86)
     131             :         if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
     132             :                 struct sg_page_iter sg_iter;
     133             : 
     134             :                 mb(); /*CLFLUSH is ordered only by using memory barriers*/
     135             :                 for_each_sgtable_page(st, &sg_iter, 0)
     136             :                         drm_clflush_page(sg_page_iter_page(&sg_iter));
     137             :                 mb(); /*Make sure that all cache line entry is flushed*/
     138             : 
     139             :                 return;
     140             :         }
     141             : 
     142             :         if (wbinvd_on_all_cpus())
     143             :                 pr_err("Timed out waiting for cache flush\n");
     144             : #else
     145           0 :         WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
     146             : #endif
     147           0 : }
     148             : EXPORT_SYMBOL(drm_clflush_sg);
     149             : 
     150             : /**
     151             :  * drm_clflush_virt_range - Flush dcache lines of a region
     152             :  * @addr: Initial kernel memory address.
     153             :  * @length: Region size.
     154             :  *
     155             :  * Flush every data cache line entry that points to an address in the
     156             :  * region requested.
     157             :  */
     158             : void
     159           0 : drm_clflush_virt_range(void *addr, unsigned long length)
     160             : {
     161             : #if defined(CONFIG_X86)
     162             :         if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
     163             :                 const int size = boot_cpu_data.x86_clflush_size;
     164             :                 void *end = addr + length;
     165             : 
     166             :                 addr = (void *)(((unsigned long)addr) & -size);
     167             :                 mb(); /*CLFLUSH is only ordered with a full memory barrier*/
     168             :                 for (; addr < end; addr += size)
     169             :                         clflushopt(addr);
     170             :                 clflushopt(end - 1); /* force serialisation */
     171             :                 mb(); /*Ensure that every data cache line entry is flushed*/
     172             :                 return;
     173             :         }
     174             : 
     175             :         if (wbinvd_on_all_cpus())
     176             :                 pr_err("Timed out waiting for cache flush\n");
     177             : #else
     178           0 :         WARN_ONCE(1, "Architecture has no drm_cache.c support\n");
     179             : #endif
     180           0 : }
     181             : EXPORT_SYMBOL(drm_clflush_virt_range);
     182             : 
     183           0 : bool drm_need_swiotlb(int dma_bits)
     184             : {
     185             :         struct resource *tmp;
     186           0 :         resource_size_t max_iomem = 0;
     187             : 
     188             :         /*
     189             :          * Xen paravirtual hosts require swiotlb regardless of requested dma
     190             :          * transfer size.
     191             :          *
     192             :          * NOTE: Really, what it requires is use of the dma_alloc_coherent
     193             :          *       allocator used in ttm_dma_populate() instead of
     194             :          *       ttm_populate_and_map_pages(), which bounce buffers so much in
     195             :          *       Xen it leads to swiotlb buffer exhaustion.
     196             :          */
     197             :         if (xen_pv_domain())
     198             :                 return true;
     199             : 
     200             :         /*
     201             :          * Enforce dma_alloc_coherent when memory encryption is active as well
     202             :          * for the same reasons as for Xen paravirtual hosts.
     203             :          */
     204           0 :         if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
     205             :                 return true;
     206             : 
     207           0 :         for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling)
     208           0 :                 max_iomem = max(max_iomem,  tmp->end);
     209             : 
     210           0 :         return max_iomem > ((u64)1 << dma_bits);
     211             : }
     212             : EXPORT_SYMBOL(drm_need_swiotlb);
     213             : 
     214           0 : static void memcpy_fallback(struct iosys_map *dst,
     215             :                             const struct iosys_map *src,
     216             :                             unsigned long len)
     217             : {
     218           0 :         if (!dst->is_iomem && !src->is_iomem) {
     219           0 :                 memcpy(dst->vaddr, src->vaddr, len);
     220           0 :         } else if (!src->is_iomem) {
     221           0 :                 iosys_map_memcpy_to(dst, 0, src->vaddr, len);
     222           0 :         } else if (!dst->is_iomem) {
     223           0 :                 memcpy_fromio(dst->vaddr, src->vaddr_iomem, len);
     224             :         } else {
     225             :                 /*
     226             :                  * Bounce size is not performance tuned, but using a
     227             :                  * bounce buffer like this is significantly faster than
     228             :                  * resorting to ioreadxx() + iowritexx().
     229             :                  */
     230             :                 char bounce[MEMCPY_BOUNCE_SIZE];
     231           0 :                 void __iomem *_src = src->vaddr_iomem;
     232           0 :                 void __iomem *_dst = dst->vaddr_iomem;
     233             : 
     234           0 :                 while (len >= MEMCPY_BOUNCE_SIZE) {
     235           0 :                         memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE);
     236           0 :                         memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE);
     237           0 :                         _src += MEMCPY_BOUNCE_SIZE;
     238           0 :                         _dst += MEMCPY_BOUNCE_SIZE;
     239           0 :                         len -= MEMCPY_BOUNCE_SIZE;
     240             :                 }
     241           0 :                 if (len) {
     242           0 :                         memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE);
     243           0 :                         memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE);
     244             :                 }
     245             :         }
     246           0 : }
     247             : 
     248             : #ifdef CONFIG_X86
     249             : 
     250             : static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
     251             : 
     252             : static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len)
     253             : {
     254             :         kernel_fpu_begin();
     255             : 
     256             :         while (len >= 4) {
     257             :                 asm("movntdqa      (%0), %%xmm0\n"
     258             :                     "movntdqa 16(%0), %%xmm1\n"
     259             :                     "movntdqa 32(%0), %%xmm2\n"
     260             :                     "movntdqa 48(%0), %%xmm3\n"
     261             :                     "movaps %%xmm0,   (%1)\n"
     262             :                     "movaps %%xmm1, 16(%1)\n"
     263             :                     "movaps %%xmm2, 32(%1)\n"
     264             :                     "movaps %%xmm3, 48(%1)\n"
     265             :                     :: "r" (src), "r" (dst) : "memory");
     266             :                 src += 64;
     267             :                 dst += 64;
     268             :                 len -= 4;
     269             :         }
     270             :         while (len--) {
     271             :                 asm("movntdqa (%0), %%xmm0\n"
     272             :                     "movaps %%xmm0, (%1)\n"
     273             :                     :: "r" (src), "r" (dst) : "memory");
     274             :                 src += 16;
     275             :                 dst += 16;
     276             :         }
     277             : 
     278             :         kernel_fpu_end();
     279             : }
     280             : 
     281             : /*
     282             :  * __drm_memcpy_from_wc copies @len bytes from @src to @dst using
     283             :  * non-temporal instructions where available. Note that all arguments
     284             :  * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
     285             :  * of 16.
     286             :  */
     287             : static void __drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
     288             : {
     289             :         if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
     290             :                 memcpy(dst, src, len);
     291             :         else if (likely(len))
     292             :                 __memcpy_ntdqa(dst, src, len >> 4);
     293             : }
     294             : 
     295             : /**
     296             :  * drm_memcpy_from_wc - Perform the fastest available memcpy from a source
     297             :  * that may be WC.
     298             :  * @dst: The destination pointer
     299             :  * @src: The source pointer
     300             :  * @len: The size of the area o transfer in bytes
     301             :  *
     302             :  * Tries an arch optimized memcpy for prefetching reading out of a WC region,
     303             :  * and if no such beast is available, falls back to a normal memcpy.
     304             :  */
     305             : void drm_memcpy_from_wc(struct iosys_map *dst,
     306             :                         const struct iosys_map *src,
     307             :                         unsigned long len)
     308             : {
     309             :         if (WARN_ON(in_interrupt())) {
     310             :                 memcpy_fallback(dst, src, len);
     311             :                 return;
     312             :         }
     313             : 
     314             :         if (static_branch_likely(&has_movntdqa)) {
     315             :                 __drm_memcpy_from_wc(dst->is_iomem ?
     316             :                                      (void __force *)dst->vaddr_iomem :
     317             :                                      dst->vaddr,
     318             :                                      src->is_iomem ?
     319             :                                      (void const __force *)src->vaddr_iomem :
     320             :                                      src->vaddr,
     321             :                                      len);
     322             :                 return;
     323             :         }
     324             : 
     325             :         memcpy_fallback(dst, src, len);
     326             : }
     327             : EXPORT_SYMBOL(drm_memcpy_from_wc);
     328             : 
     329             : /*
     330             :  * drm_memcpy_init_early - One time initialization of the WC memcpy code
     331             :  */
     332             : void drm_memcpy_init_early(void)
     333             : {
     334             :         /*
     335             :          * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
     336             :          * emulation. So don't enable movntdqa in hypervisor guest.
     337             :          */
     338             :         if (static_cpu_has(X86_FEATURE_XMM4_1) &&
     339             :             !boot_cpu_has(X86_FEATURE_HYPERVISOR))
     340             :                 static_branch_enable(&has_movntdqa);
     341             : }
     342             : #else
     343           0 : void drm_memcpy_from_wc(struct iosys_map *dst,
     344             :                         const struct iosys_map *src,
     345             :                         unsigned long len)
     346             : {
     347           0 :         WARN_ON(in_interrupt());
     348             : 
     349           0 :         memcpy_fallback(dst, src, len);
     350           0 : }
     351             : EXPORT_SYMBOL(drm_memcpy_from_wc);
     352             : 
     353           1 : void drm_memcpy_init_early(void)
     354             : {
     355           1 : }
     356             : #endif /* CONFIG_X86 */

Generated by: LCOV version 1.14